一、验证码获取
import requests def downloads_pic(pic_name): url = \'https://user.guancha.cn/main/captcha?\' r = requests.get(url,stream=True) with open(r\'E:\User\xuDir\codes\%s.jpg\' % (pic_name), \'wb\') as f: for chunk in r.iter_content(chunk_size=1024): print(chunk) if chunk: # 过滤下保持活跃的新块 f.write(chunk) f.flush() if __name__ == \'__main__\': for i in range(300): pic_name = \'photho\'+str(i) # int(time.time() * 1000000) 返回当前时间的时间戳(1970纪元后经过的浮点秒数) downloads_pic(pic_name)
打开方式request.get()、post()、put()、delete()、head()、options()
head={\'User-Agent\':\'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36\'}
request.get(url="https://www.baidu.com/s",params={\'wd\':\'python教程\',\'ie\':\'utf-8\',headers=head} headers查看F12->Network->docs->headers
1. r.status_code 响应状态码404
2. r.cookies <RequestsCookieJar[<Cookie BDORZ=27315 for .baidu.com/>]>
3.r.content 显示二进制和中文,二进制bytes转str r.content.decode(\'utf-8\')
4.r.text Unicode文本格式,通常需要转换为utf-8格式,否则就是乱码。转码用r.encoding=\'utf-8\'
二、验证码绘制
from PIL import Image, ImageDraw,ImageFont,ImageFilter import random _letter_cases = "abcdefghjkmnpqrstuvwxy" # 小写字母,去除可能干扰的i,l,o,z _upper_cases = _letter_cases.upper() # 大写字母 _numbers = \'\'.join(map(str, range(3, 10))) # 数字,去除可能干扰的0,1,2 init_chars = \'\'.join((_upper_cases, _numbers)) # 生成随机字母 def rndChar(): return random.sample(init_chars, 1)[0] # chr(random.randint(65, 90)) # 生成随机颜色 def rndColor(): return (random.randint(0, 255), random.randint(10, 255), random.randint(64, 255)) def check_code(width=150, height=40, char_length=5, font_file=\'C:\WINDOWS\Fonts\ARLRDBD.TTF\', font_size=28): #SIMYOU.TTF code = [] img = Image.new(mode=\'RGB\', size=(width, height), color=(255, 255, 255)) draw = ImageDraw.Draw(img, mode=\'RGB\') # 写文字 Font = ImageFont.truetype(font_file, font_size) for i in range(char_length): char = rndChar() code.append(char) h = random.randint(0, 4) draw.text([7+i * width / (char_length+0.7), h], char, font=Font, fill=rndColor()) # 写干扰点 for i in range(40): draw.point([random.randint(0, width), random.randint(0, height)], fill=rndColor()) # 写干扰圆圈 for i in range(40): draw.point([random.randint(0, width), random.randint(0, height)], fill=rndColor()) x = random.randint(0, width) y = random.randint(0, height) draw.arc((x, y, x + 4, y + 4), 0, 90, fill=rndColor()) # 画干扰线 for i in range(5): x1 = random.randint(0, width) y1 = random.randint(0, height) x2 = random.randint(0, width) y2 = random.randint(0, height) draw.line((x1, y1, x2, y2), fill=rndColor()) # 图形扭曲参数 params = [1 - float(random.randint(1, 2)) / 500, 0, 0, 0, 1 - float(random.randint(1, 10)) / 500, float(random.randint(1, 2)) / 500, 0.001, float(random.randint(1, 2)) / 500 ] # 扭曲图片 img = img.transform((width+20, height+5), Image.PERSPECTIVE, params) img = img.filter(ImageFilter.EDGE_ENHANCE_MORE) # return img, \'\'.join(code) if __name__ == \'__main__\': for i in range(5): # 生成5张验证码 img,code = check_code() img.save(r\'E:\User\xuDir\codes\photo%s.jpg\' %(str(i)),\'JPEG\') print(code)
photo0.jpg photh1.jpg
参考:
爬虫验证码https://www.cnblogs.com/dudumiaomiao/p/6601345.html
https://blog.csdn.net/Day_upon/article/details/84189824
验证码绘制https://www.cnblogs.com/nick477931661/p/8810400.html
图片扭曲https://www.cnblogs.com/hello-/articles/9929217.html
2020-03-08 16:53:48