爬取豆瓣小说图片,并以标题命名该图片
# Author:li import re import requests headers = {\'User-Agent\':\'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.33 Safari/537.36\'} #加头文件防止被反扒 url = \'https://book.douban.com/top250\' #获取源代码 yuandaima = requests.get(url,headers = headers).text #print(yuandaima) #写正则 picture =r\'<.*?class="item">.*?<.*?src="(.*?)" width=.*?>.*?<a href=".*?".*?onclick=.*?title=".*?"\' title = r\'<.*?class="item">.*?<.*?src=".*?" width=.*?>.*?<a href=".*?".*?onclick=.*?title="(.*?)"\' #进行匹配图片url和图片名字 picture_url = re.findall(picture,yuandaima,re.S) #re.S匹配换行符号 title_name = re.findall(title,yuandaima,re.S) #把文件写入 for i,b in enumerate(title_name): #enumerate 第一个取下标,第二关取值 print(b) response = requests.get(picture_url[i],headers=headers)#i是该列表内的下标,b是列表内的值 with open(\'{}.jpg\'.format(b),\'wb\') as f: f.write(response.content)
然后加上翻页功能,