**

Python爬虫问题求助

**

import bs4
from urllib import request
from bs4 import BeautifulSoup

def getHTMLText(url):
‘’‘获取页面’’’
try:
resp = request.urlopen(url)
html_data = resp.read().decode(‘utf-8’)
return html_data
except:
return “”

def fillUnivList(ulist, html):
‘’‘处理页面’’’
soup = BeautifulSoup(html, “html.parser”)
for tr in soup.find(‘tbody’).children: # 找到关键词’tbody’后,搜索’td’子项
if isinstance(tr, bs4.element.Tag):
tds = tr(‘td’)
ulist.append([tds[1].string, tds[2].string, tds[3].string])

def printUnivList(ulist, num):
tplt = “{0:10}\t{1:{3}10}\t{2:^10}”
print(tplt.format(“类别”, “小说书名/小说章节”, “作者”, chr(12288)))
‘’’
格式输出页面
tplt = “{0:10}\t{1:{3}10}\t{2:^10}”
#冒号前为引用format中参数的位置,0、1、2分别代表排名、学校、分数,
表示居中对齐,:与之前的{3}代表填充内容,10为列宽
‘’’
for i in range(num):
u = ulist[i]
print(tplt.format(u[0], u[1], u[2], chr(12288)))

if name == ‘main’:
uinfo = []
url = ‘http://www.yc.ifeng.com/store/0_0_popularity_total_2_1
html = getHTMLText(url)
fillUnivList(uinfo, html)
printUnivList(uinfo, 40)

运行图为
Python爬虫问题求助
怎么解决???

相关文章: