heimu24
#coding:utf-8
#----------------------------------------------------------------------------------------------------------
#                                         功能:爬取汽车之家的新闻
#----------------------------------------------------------------------------------------------------------
# pip3 install requests
# pip3 install BeautifulSoup4

import requests
from bs4 import BeautifulSoup

# 获取一个新闻标题
# response = requests.get(\'http://www.autohome.com.cn/news/\')
# response.encoding = \'gbk\'
# soup = BeautifulSoup(response.text, \'html.parser\')# 解析成对象
# tag = soup.find(id=\'auto-channel-lazyload-article\')
# h3 = tag.find(name=\'h3\')
# print(h3)

#找到所有的新闻,包括标题、简洁、url、图片

response = requests.get(\'http://www.autohome.com.cn/news/\')
response.encoding = \'gbk\'       # 注意编码方式
# print(response.text)
soup = BeautifulSoup(response.text, \'html.parser\')# 解析成对象
li_list = soup.find(id=\'auto-channel-lazyload-article\').find_all(name=\'li\')
i = 1
for li in li_list:
    title = li.find(\'h3\')           # 查找新闻标题[根据标签]
    if not title:
        continue
    # print(title.text)

    summary = li.find(\'p\').text     # 查找新闻简介[根据标签]
    # print(summary)

    # li.find(\'a\').attrs,得到一个字典
    # li.find(\'a\').attrs[\'href\'],和下面的效果一样
    url = li.find(\'a\').get(\'href\')      # 查找新闻标题的超链接url[根据属性]
    # print(url)


    img = li.find(\'img\').get(\'src\')     # 查找图片,其实得到的也是url
    # print(img)

    # -------------------------要保存图片的话需要再次发送请求,保存到本地-------------------------------------

    img = \'https:\' + img    # 补全url
    # print(img)
    # print(title.text, url, summary, img)


    res = requests.get(img)             # 发送请求
    file_name = "%s.jpg" % (i,)         # 设置图片名称为1,2,3,。。
    i+=1
    # print(file_name)

    with open(file_name, \'wb\') as f:    # 保存图片到当前文件夹下
        f.write(res.content)            # 注意这里是二进制[res.content]
View Code

 

分类:

技术点:

相关文章:

  • 2021-06-14
  • 2021-12-05
  • 2021-12-09
  • 2021-12-05
  • 2021-12-05
  • 2021-12-05
  • 2021-06-06
  • 2021-12-05
猜你喜欢
  • 2021-12-05
  • 2021-12-05
  • 2021-12-05
  • 2021-12-05
  • 2021-12-05
  • 2021-12-05
  • 2021-12-15
相关资源
相似解决方案