经测试上一节的代码成功跑通,接下来加上循环爬取所有说说-。-
完整代码:
1 import requests 2 import json 3 import os 4 import shutil 5 import time 6 7 qq = 627911861 8 9 headers = { 10 \'accept\': \'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8\', 11 \'accept-encoding\': \'gzip, deflate, br\', 12 \'accept-language\': \'zh-CN,zh;q=0.8\', 13 \'cache-control\': \'max-age=0\', 14 \'cookie\': \'xxxxxx\', 15 \'upgrade-insecure-requests\': \'1\', 16 \'user-agent\': \'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Mobile Safari/537.36\' 17 } 18 19 url_x = \'https://mobile.qzone.qq.com/list?qzonetoken=9d29961d6fbb88be6236636010e0d4fde43a5b77d57ef984938b5aa0cb695e28c258a4d86b8c02a545bbcce970ff&g_tk=1573033187&res_attach=att%3D\' 20 url_y = \'%26tl%3D1508257557&format=json&list_type=shuoshuo&action=0&res_uin=627911861&count=40\' 21 numbers = 0 # ‘查看更多’翻页 22 img_set = set() # 存放图片url集 23 word_count = 0 # 文字说说计数器 24 words = "" # 存放文字说说 25 images = "" # 存放图片url 26 page = int(1761 / 40) 27 28 29 for i in range(0, page): 30 try: 31 html = requests.get(url_x + str(numbers) + url_y, headers=headers).content 32 data = json.loads(html) 33 # print(data) 34 35 for vFeed in data[\'data\'][\'vFeeds\']: 36 if \'pic\' in vFeed: 37 for pic in vFeed[\'pic\'][\'picdata\'][\'pic\']: 38 img_set.add(pic[\'photourl\'][\'0\'][\'url\']) 39 40 if \'summary\' in vFeed: 41 # print(str(word_count) + \'. \' + vFeed[\'summary\'][\'summary\']) 42 words += str(word_count) + \'. \' + vFeed[\'summary\'][\'summary\'] + \'\r\n\' 43 word_count += 1 44 except: 45 print(\'error\') 46 47 numbers += 40 48 time.sleep(10) 49 50 try: 51 with open(os.getcwd() + \'\\\' + str(qq) + \'.txt\', \'wb\') as fo: 52 fo.write(words.encode(\'utf-8\')) 53 print("文字说说写入完毕") 54 55 with open(os.getcwd() + \'\\\' + \'images_url\', \'wb\') as foImg: 56 for imgUrl in img_set: 57 images += imgUrl + \'\r\n\' 58 foImg.write(images.encode(\'utf-8\')) 59 print("图片写入完毕") 60 61 except: 62 print(\'写入数据出错\') 63 64 65 if not img_set: 66 print(u\'不存在图片说说\') 67 else: 68 image_path = os.getcwd() + \'\images\' 69 if os.path.exists(image_path) is False: 70 os.mkdir(image_path) 71 x = 1 72 for imgUrl in img_set: 73 temp = image_path + \'/%s.jpg\' % x 74 print(u\'正在下载地%s张图片\' % x) 75 try: 76 r = requests.get(imgUrl, stream=True) 77 if r.status_code == 200: 78 with open(temp, \'wb\') as f: 79 r.raw.decode_content = True 80 shutil.copyfileobj(r.raw, f) 81 except: 82 print(u\'该图片下载失败:%s\' % imgUrl) 83 x += 1