Trojan00
 1 import requests
 2 from urllib.parse import urlencode
 3 from pyquery import PyQuery as pq
 4 from pymongo import MongoClient
 5 
 6 base_url = \'https://m.weibo.cn/api/container/getIndex?\'
 7 headers = {
 8     \'Host\': \'m.weibo.cn\',
 9     \'Referer\': \'https://m.weibo.cn/u/2803301701\',
10     \'User-Agent\': \'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.75 Safari/537.36\',
11     \'X-Requested-With\': \'XMLHttpRequest\',
12 }
13 client = MongoClient()
14 db = client[\'weibo\']
15 collection = db[\'weibo\']
16 max_page = 10
17 
18 
19 def get_page(page):
20     params = {
21         \'type\': \'uid\',
22         \'value\': \'2803301701\',
23         \'containerid\': \'1076032803301701\',
24         \'page\': page
25     }
26     url = base_url + urlencode(params)
27     try:
28         response = requests.get(url, headers=headers)
29         if response.status_code == 200:
30             return response.json(), page
31     except requests.ConnectionError as e:
32         print(\'Error\', e.args)
33 
34 
35 def parse_page(json, page: int):
36     if json:
37         items = json.get(\'data\').get(\'cards\')
38         for index, item in enumerate(items):
39             if page == 1 and index == 1:
40                 continue
41             else:
42                 item = item.get(\'mblog\')
43                 weibo = {}
44                 weibo[\'id\'] = item.get(\'id\')
45                 weibo[\'text\'] = pq(item.get(\'text\')).text()
46                 weibo[\'attitudes\'] = item.get(\'attitudes_count\')
47                 weibo[\'comments\'] = item.get(\'comments_count\')
48                 weibo[\'reposts\'] = item.get(\'reposts_count\')
49                 yield weibo
50 
51 
52 # def save_to_mongo(result):
53     # if collection.insert(result):
54         # print(\'Saved to Mongo\')
55 
56 
57 if __name__ == \'__main__\':
58     for page in range(1, max_page + 1):
59         json = get_page(page)
60         results = parse_page(*json)
61         for result in results:
62             print(result)
63             # save_to_mongo(result)

 

运行结果:

 

分类:

技术点:

相关文章: