ilovelh

爬取豆瓣小说图片,并以标题命名该图片

# Author:li
import  re
import requests
headers = {\'User-Agent\':\'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.33 Safari/537.36\'} #加头文件防止被反扒
url = \'https://book.douban.com/top250\'
#获取源代码
yuandaima = requests.get(url,headers = headers).text
#print(yuandaima)
#写正则
picture =r\'<.*?class="item">.*?<.*?src="(.*?)" width=.*?>.*?<a href=".*?".*?onclick=.*?title=".*?"\'
title = r\'<.*?class="item">.*?<.*?src=".*?" width=.*?>.*?<a href=".*?".*?onclick=.*?title="(.*?)"\'
#进行匹配图片url和图片名字
picture_url = re.findall(picture,yuandaima,re.S)  #re.S匹配换行符号
title_name = re.findall(title,yuandaima,re.S)
#把文件写入
for i,b in enumerate(title_name):  #enumerate 第一个取下标,第二关取值
    print(b)
    response = requests.get(picture_url[i],headers=headers)#i是该列表内的下标,b是列表内的值
    with open(\'{}.jpg\'.format(b),\'wb\')  as f:
      f.write(response.content)

然后加上翻页功能,

分类:

技术点:

相关文章: