neilshi

经测试上一节的代码成功跑通,接下来加上循环爬取所有说说-。-

 

 

 

完整代码:

 1 import requests
 2 import json
 3 import os
 4 import shutil
 5 import time
 6 
 7 qq = 627911861
 8 
 9 headers = {
10     \'accept\': \'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8\',
11     \'accept-encoding\': \'gzip, deflate, br\',
12     \'accept-language\': \'zh-CN,zh;q=0.8\',
13     \'cache-control\': \'max-age=0\',
14     \'cookie\': \'xxxxxx\',
15     \'upgrade-insecure-requests\': \'1\',
16     \'user-agent\': \'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Mobile Safari/537.36\'
17 }
18 
19 url_x = \'https://mobile.qzone.qq.com/list?qzonetoken=9d29961d6fbb88be6236636010e0d4fde43a5b77d57ef984938b5aa0cb695e28c258a4d86b8c02a545bbcce970ff&g_tk=1573033187&res_attach=att%3D\'
20 url_y = \'%26tl%3D1508257557&format=json&list_type=shuoshuo&action=0&res_uin=627911861&count=40\'
21 numbers = 0      # ‘查看更多’翻页
22 img_set = set()  # 存放图片url集
23 word_count = 0   # 文字说说计数器
24 words = ""       # 存放文字说说
25 images = ""      # 存放图片url
26 page = int(1761 / 40)
27 
28 
29 for i in range(0, page):
30     try:
31         html = requests.get(url_x + str(numbers) + url_y, headers=headers).content
32         data = json.loads(html)
33         # print(data)
34 
35         for vFeed in data[\'data\'][\'vFeeds\']:
36             if \'pic\' in vFeed:
37                 for pic in vFeed[\'pic\'][\'picdata\'][\'pic\']:
38                     img_set.add(pic[\'photourl\'][\'0\'][\'url\'])
39 
40             if \'summary\' in vFeed:
41                 # print(str(word_count) + \'. \' + vFeed[\'summary\'][\'summary\'])
42                 words += str(word_count) + \'. \' + vFeed[\'summary\'][\'summary\'] + \'\r\n\'
43                 word_count += 1
44     except:
45         print(\'error\')
46 
47     numbers += 40
48     time.sleep(10)
49 
50 try:
51     with open(os.getcwd() + \'\\\' + str(qq) + \'.txt\', \'wb\') as fo:
52         fo.write(words.encode(\'utf-8\'))
53         print("文字说说写入完毕")
54 
55     with open(os.getcwd() + \'\\\' + \'images_url\', \'wb\') as foImg:
56         for imgUrl in img_set:
57             images += imgUrl + \'\r\n\'
58         foImg.write(images.encode(\'utf-8\'))
59         print("图片写入完毕")
60 
61 except:
62     print(\'写入数据出错\')
63 
64 
65 if not img_set:
66     print(u\'不存在图片说说\')
67 else:
68     image_path = os.getcwd() + \'\images\'
69     if os.path.exists(image_path) is False:
70         os.mkdir(image_path)
71     x = 1
72     for imgUrl in img_set:
73         temp = image_path + \'/%s.jpg\' % x
74         print(u\'正在下载地%s张图片\' % x)
75         try:
76             r = requests.get(imgUrl, stream=True)
77             if r.status_code == 200:
78                 with open(temp, \'wb\') as f:
79                     r.raw.decode_content = True
80                     shutil.copyfileobj(r.raw, f)
81         except:
82             print(u\'该图片下载失败:%s\' % imgUrl)
83         x += 1

 

分类:

技术点:

相关文章: