1 先select到改数据上层的标签,取第一个[0],然后attrs获取单标签里面的内容,或者get_text获取成对标签内文本
2 find就是明确了标签之后,是唯一的,就可以使用find(标签名).get_text(),不需要加0
import requests
from bs4 import BeautifulSoup
def search(url):
response = requests.get(url)
response.encoding = \'gbk\'
text = response.text
with open(\'a.html\', \'wt\', encoding=\'utf-8\')as f:
f.write(text)
soup = BeautifulSoup(text, \'lxml\')
news = soup.find(id="auto-channel-lazyload-article").select(\'ul li a\')
# print(news)
for new in news:
\'\'\'
1 先select到改数据上层的标签,取第一个[0],然后attrs获取单标签里面的内容,或者get_text获取成对标签内文本
2 find就是明确了标签之后,是唯一的,就可以使用find(标签名).get_text(),不需要加0
\'\'\'
link = new.attrs[\'href\']
img = new.select(\'.article-pic img\')[0].attrs[\'src\']
title=new.find(\'h3\').get_text()
sub_time=new.select(\'.fn-left\')[0].get_text()
num=new.select(\'.fn-right em\')[0].get_text()
browsing_num=new.find(\'p\').get_text()
print(link,img,title,sub_time,num,browsing_num)
print(
\'\'\'
链接:http:%s
图片:http:%s
标题:%s
发布时间:%s
浏览数:%s
介绍:%s
\'\'\'%(link,img,title,sub_time,num,browsing_num)
)
if __name__ == \'__main__\':
url = \'https://www.autohome.com.cn/news\'
search(url)