【问题标题】:Looping through data from a function to create a table in templates using Django循环遍历函数中的数据以使用 Django 在模板中创建表
【发布时间】:2019-03-18 11:06:48
【问题描述】:

我有一个名为def get_scraped_data():的函数,它本质上是一个网络scraper。它返回一个名为 newlaw 的 DataFrame,其中包含我想要的所有数据。整个代码如下。

def get_scraped_data():
    from bs4 import BeautifulSoup
    import requests
    import pandas as pd

    urlh = 'http://solicitors.lawsociety.org.uk/search/results?Type=1&IncludeNlsp=True&Pro=True&parameters=%2C1%3BAPL%2C0%3B%2C1%3BPUB%2C0%3B%2C1%3BADV%2C0%3B%2C1%3BAGR%2C0%3B%2C1%3BAVI%2C0%3B%2C1%3BBAN%2C1%3B%2C1%3BBEN%2C0%3B%2C1%3BCHA%2C0%3B%2C1%3BCHI%2C0%3B%2C1%3BCLI%2C0%3B%2C1%3BCOL%2C1%3B%2C1%3BPCO%2C1%3B%2C1%3BCCL%2C0%3B%2C1%3BCOS%2C1%3B%2C1%3BCOM%2C1%3B%2C1%3BCON%2C1%3B%2C1%3BCSU%2C0%3B%2C1%3BCSF%2C0%3B%2C1%3BCSG%2C0%3B%2C1%3BCUT%2C0%3B%2C1%3BCTR%2C1%3B%2C1%3BPRE%2C0%3B%2C1%3BCFI%2C1%3B%2C1%3BCRD%2C0%3B%2C1%3BCRF%2C0%3B%2C1%3BCRG%2C0%3B%2C1%3BCRJ%2C0%3B%2C1%3BCRL%2C0%3B%2C1%3BCRM%2C0%3B%2C1%3BCRS%2C0%3B%2C1%3BCRO%2C1%3B%2C1%3BDEB%2C0%3B%2C1%3BDTR%2C1%3B%2C1%3BDEF%2C0%3B%2C1%3BDRC%2C0%3B%2C1%3BDRO%2C1%3B%2C1%3BEDU%2C0%3B%2C1%3BELC%2C0%3B%2C1%3BELH%2C0%3B%2C1%3BEMP%2C1%3B%2C1%3BENE%2C0%3B%2C1%3BENV%2C0%3B%2C1%3BEUN%2C0%3B%2C1%3BFDS%2C0%3B%2C1%3BFAM%2C0%3B%2C1%3BFAL%2C0%3B%2C1%3BFMC%2C0%3B%2C1%3BFME%2C0%3B%2C1%3BFML%2C0%3B%2C1%3BFPL%2C0%3B%2C1%3BFIS%2C0%3B%2C1%3BHRI%2C0%3B%2C1%3BIMA%2C0%3B%2C1%3BIML%2C0%3B%2C1%3BIMM%2C0%3B%2C1%3BIMG%2C0%3B%2C1%3BIMN%2C0%3B%2C1%3BITE%2C1%3B%2C1%3BINS%2C1%3B%2C1%3BIUR%2C1%3B%2C1%3BIPR%2C1%3B%2C1%3BJRW%2C0%3B%2C1%3BJRL%2C0%3B%2C1%3BLCO%2C1%3B%2C1%3BLRE%2C0%3B%2C1%3BPOA%2C0%3B%2C1%3BLIC%2C1%3B%2C1%3BLIV%2C0%3B%2C1%3BLIS%2C0%3B%2C1%3BLIT%2C0%3B%2C1%3BLPH%2C0%3B%2C1%3BLPP%2C0%3B%2C1%3BMAR%2C0%3B%2C1%3BMED%2C1%3B%2C1%3BMHE%2C0%3B%2C1%3BMHL%2C0%3B%2C1%3BMAA%2C1%3B%2C1%3BMIL%2C0%3B%2C1%3BNDI%2C0%3B%2C1%3BPEN%2C1%3B%2C1%3BPIN%2C0%3B%2C1%3BPIR%2C0%3B%2C1%3BPLA%2C0%3B%2C1%3BPRZ%2C0%3B%2C1%3BPRP%2C0%3B%2C1%3BPRT%2C0%3B%2C1%3BPRW%2C0%3B%2C1%3BPCI%2C0%3B%2C1%3BPCP%2C0%3B%2C1%3BPCT%2C0%3B%2C1%3BPCW%2C0%3B%2C1%3BPNE%2C0%3B%2C1%3BTAX%2C0%3B%2C1%3BTAC%2C1%3B%2C1%3BTAE%2C0%3B%2C1%3BTAH%2C1%3B%2C1%3BTAM%2C0%3B%2C1%3BTAP%2C0%3B%2C1%3BTAT%2C0%3B+'
    r = requests.get(urlh)
    soup = BeautifulSoup(r.content, 'html.parser')

    names = []
    roles = []
    offices = []
    locations = []

    for i in range(1,2):
        url = 'http://solicitors.lawsociety.org.uk/search/results?Type=1&IncludeNlsp=True&Pro=True&parameters=%2C1%3BAPL%2C0%3B%2C1%3BPUB%2C0%3B%2C1%3BADV%2C0%3B%2C1%3BAGR%2C0%3B%2C1%3BAVI%2C0%3B%2C1%3BBAN%2C1%3B%2C1%3BBEN%2C0%3B%2C1%3BCHA%2C0%3B%2C1%3BCHI%2C0%3B%2C1%3BCLI%2C0%3B%2C1%3BCOL%2C1%3B%2C1%3BPCO%2C1%3B%2C1%3BCCL%2C0%3B%2C1%3BCOS%2C1%3B%2C1%3BCOM%2C1%3B%2C1%3BCON%2C1%3B%2C1%3BCSU%2C0%3B%2C1%3BCSF%2C0%3B%2C1%3BCSG%2C0%3B%2C1%3BCUT%2C0%3B%2C1%3BCTR%2C1%3B%2C1%3BPRE%2C0%3B%2C1%3BCFI%2C1%3B%2C1%3BCRD%2C0%3B%2C1%3BCRF%2C0%3B%2C1%3BCRG%2C0%3B%2C1%3BCRJ%2C0%3B%2C1%3BCRL%2C0%3B%2C1%3BCRM%2C0%3B%2C1%3BCRS%2C0%3B%2C1%3BCRO%2C1%3B%2C1%3BDEB%2C0%3B%2C1%3BDTR%2C1%3B%2C1%3BDEF%2C0%3B%2C1%3BDRC%2C0%3B%2C1%3BDRO%2C1%3B%2C1%3BEDU%2C0%3B%2C1%3BELC%2C0%3B%2C1%3BELH%2C0%3B%2C1%3BEMP%2C1%3B%2C1%3BENE%2C0%3B%2C1%3BENV%2C0%3B%2C1%3BEUN%2C0%3B%2C1%3BFDS%2C0%3B%2C1%3BFAM%2C0%3B%2C1%3BFAL%2C0%3B%2C1%3BFMC%2C0%3B%2C1%3BFME%2C0%3B%2C1%3BFML%2C0%3B%2C1%3BFPL%2C0%3B%2C1%3BFIS%2C0%3B%2C1%3BHRI%2C0%3B%2C1%3BIMA%2C0%3B%2C1%3BIML%2C0%3B%2C1%3BIMM%2C0%3B%2C1%3BIMG%2C0%3B%2C1%3BIMN%2C0%3B%2C1%3BITE%2C1%3B%2C1%3BINS%2C1%3B%2C1%3BIUR%2C1%3B%2C1%3BIPR%2C1%3B%2C1%3BJRW%2C0%3B%2C1%3BJRL%2C0%3B%2C1%3BLCO%2C1%3B%2C1%3BLRE%2C0%3B%2C1%3BPOA%2C0%3B%2C1%3BLIC%2C1%3B%2C1%3BLIV%2C0%3B%2C1%3BLIS%2C0%3B%2C1%3BLIT%2C0%3B%2C1%3BLPH%2C0%3B%2C1%3BLPP%2C0%3B%2C1%3BMAR%2C0%3B%2C1%3BMED%2C1%3B%2C1%3BMHE%2C0%3B%2C1%3BMHL%2C0%3B%2C1%3BMAA%2C1%3B%2C1%3BMIL%2C0%3B%2C1%3BNDI%2C0%3B%2C1%3BPEN%2C1%3B%2C1%3BPIN%2C0%3B%2C1%3BPIR%2C0%3B%2C1%3BPLA%2C0%3B%2C1%3BPRZ%2C0%3B%2C1%3BPRP%2C0%3B%2C1%3BPRT%2C0%3B%2C1%3BPRW%2C0%3B%2C1%3BPCI%2C0%3B%2C1%3BPCP%2C0%3B%2C1%3BPCT%2C0%3B%2C1%3BPCW%2C0%3B%2C1%3BPNE%2C0%3B%2C1%3BTAX%2C0%3B%2C1%3BTAC%2C1%3B%2C1%3BTAE%2C0%3B%2C1%3BTAH%2C1%3B%2C1%3BTAM%2C0%3B%2C1%3BTAP%2C0%3B%2C1%3BTAT%2C0%3B+' + '=&Page=' + str(i)
        response = requests.get(url)
        response.raise_for_status()
        soup = BeautifulSoup(response.content, 'html.parser')
        
        hp_sol_data = soup.find_all('section', {'class':'solicitor'})

        for sol in hp_sol_data:
            try:
                addy = sol.contents[7].find_all('dd', {'class':'feature highlight'})[0].text
                locations.append(addy)
            except IndexError:
                locations.append('None Found')
            try:
                office_names = sol.contents[7].find_all('dd', {'class':'highlight'})[0].text
                offices.append(office_names)
            except IndexError:
                offices.append('None Found')

        for link in soup.find_all('a', href=True):
            if link.get('href').startswith('/person/'):
                tags = (link.get('href'))
                
                url2 = 'http://solicitors.lawsociety.org.uk' + str(tags)
                r2 = requests.get(url2)
                soup = BeautifulSoup(r2.content, 'html.parser')
                
                s_data = soup.find_all('article', {'class':'solicitor solicitor-type-individual details'})
                
                for item in s_data:
                    solicitor_names = (item.contents[3].find_all('h1')[0].text)
                    names.append(solicitor_names)
                    
                    try:
                        role = (item.find_all('div', {'class':'panel-half'})[1].find('dd').get_text(''))
                        roles.append(role)
                    except IndexError:
                        roles.append('Role not specified')

    tls_solicitors = pd.DataFrame({'Solicitor_Name': names,
                                'Role': roles,
                                'Office': offices,
                                'Address': locations},
                                columns = ['Solicitor_Name', 'Office', 'Address', 'Role'])

    law = tls_solicitors
    newd = law['Role'].str.split('\n', n=3, expand = True)
    law['Primary_Role'] = newd[0]
    law['Secondary_Role'] = newd[1]
    law['Other_Role'] = newd[2]
    law['Other_Role_1'] = newd[3]
    law['Other_Role_2'] = ''
    law['Other_Role_3'] = ''
    law['Other_Role_4'] = ''
    newlaw = law.drop('Role', axis=1)
    return newlaw

然后我编辑了 views.py 以调用该函数并将其保存为变量数据 (data = get_scraped_data())。

from django.shortcuts import render
from .tls import get_scraped_data
from django.http import HttpResponse


def law_view(request, *args, **kwargs):
     data = get_scraped_data()
     return render(request, "law.html", {'data': data})

继续尝试显示数据,我创建了一个模板“law.html”并插入了此代码。

<table class="table table-striped">
    <thead>
      <tr>
          {% for row in data %}
        <th>{{ row }}</th>
            {% endfor %}
      </tr>
    </thead>
    <tbody>
        <tr> 
            {% for data in data %}
        <td>{{ data }}</td>
            {% endfor %}
        </tr>
    </tbody>
</table>

这里的问题是只显示DataFrame newlaw 的头部。 例如。

律师姓名、办公室、地址等。

我还尝试了 {{ data }},这正是我想要的,但我似乎无法找到一种方法来处理数据并将其放入表格中以使其更美观。

我希望得到一些帮助。

【问题讨论】:

  • 看图码很难。
  • 我应该如何显示代码。打出来了吗?
  • @juangonzalez,当然可以!仅当您不在图片中编写代码时。
  • 好的,我已经编辑了问题,希望这会更好
  • 视图只是一个 Python 方法,它应该返回一个 HttpResponse 对象。 render() 是使用 django 模板的一种方式,但是如果您有另一个创建 HTML 输出的函数(比如 tls.py 中的函数 output()),只需导入它,调用它并在视图中返回其输出:return HttpResponse(tls.output()) .或者,如果您想在模板中添加输出,请将输出添加到上下文以呈现模板:return render(request, "tls.html", {output: output()}),在您的模板中,您可以使用{{ output }} 变量来显示它。

标签: python django pandas


【解决方案1】:

您应该将 tls.py 中的所有代码包装在一个函数中,该函数返回 newlaw(假设这是您想要的输出)。所以:

def get_scraped_data():
    # all your code here
    return newlaw

那么在你看来你可以这样做:

from tls import get_scraped_data

def law_view(request, *args, **kwargs):
    data = get_scraped_data()
    return render(request, "tls.html", {'data': data})

然后在您的模板 (tls.html) 中,您可以遍历数据:

{% for row in data %}
    {{ row }}
{% endfor %}

当然,您可能希望以更好的方式显示它,例如在&lt;table&gt;。为此,只需将所有内容包装在 &lt;table&gt; 中,显示标题行,然后遍历将它们包装在 &lt;tr&gt; 中的行,并遍历将它们包装在 &lt;td&gt; 中的每一列。

我认为DataFrame 也有很多方法可以渲染自己,所以你也可以这样做:

{{ data.as_html|safe }}

这会将数据呈现为 HTML 表格。

【讨论】:

  • 好的,我接受了你的建议,但由于某种原因,html 页面只呈现了 solicitor_names、office、address 等列的标题......我不知道为什么
  • 我对@9​​87654332@了解不多。您使用了as_html 还是for 循环?您应该打开一个控制台,检查 data = get_scraped_data() 返回的内容,尝试使用它来了解如何提取您需要的信息。 DataFrame 支持的不仅仅是 one hundred methods,请探索。
猜你喜欢
  • 1970-01-01
  • 2016-01-18
  • 1970-01-01
  • 1970-01-01
  • 1970-01-01
  • 2018-05-10
  • 2012-12-25
  • 2011-11-10
  • 2019-08-26
相关资源
最近更新 更多