和朋友在QQ上聊天感觉呀没有激情,突然,突发奇想,我写个小的爬虫 ,把表情包爬取下来随便挑,斗到他们吐血。

下面是爬取斗图的代码,代码可供参考
#encoding:utf8
#模块
import re
import requests
from lxml import etree
import os
import random
import threading
import time
import hashlib
def makemake(path):
path = re.sub('\,|\:|\?|\!','',path)
path = path.replace(' ','')
#替换
if os.path.isdir('F:\\11\\斗图\\'+path):
#创建路径
print(path+'已经存在')
else:
print('开始创建'+path)
zhi = os.makedirs('F:\\11\\斗图\\'+path)
# print(zhi)
def make_files(path,source):
path = re.sub('\,|\:|\?|\!','',path)
path = path.replace(' ','')
#替换
if os.path.isfile('F:\\11\\斗图\\'+path):
#判断路径是否存在,如果存在就直接打印
print(path+'已经存在')
else:
#循环网页
while True:
n = 0
n = n + 1
try:
source = requests.get('http://'+source).content
break
except:
print('http://'+source+'连接出错正在重试当前次数:'+str(n))
time.sleep(1)
if n>6:
print('放弃http://'+source)
break
print('正在下载'+path)
file = open('F:\\11\\斗图\\'+path,'wb')
#打开文件夹
file.write(source)
#写入
file.close()
#关闭
def start_spider(g):
print('当前下载页数为:' + str(g))
while True:
n = 1
try:
yuan = requests.get('https://www.doutula.com/article/list/?page=' + str(g)).text
break
except:
print('https://www.doutula.com/article/list/?page=' + str(g) + '连接出错正在重试当前次数:' + str(n))
time.sleep(1)
n = n + 1
lists = etree.HTML(yuan).xpath('//*[@>'完成')