1 import requests 2 from urllib.parse import urlencode 3 from pyquery import PyQuery as pq 4 from pymongo import MongoClient 5 6 base_url = \'https://m.weibo.cn/api/container/getIndex?\' 7 headers = { 8 \'Host\': \'m.weibo.cn\', 9 \'Referer\': \'https://m.weibo.cn/u/2803301701\', 10 \'User-Agent\': \'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.75 Safari/537.36\', 11 \'X-Requested-With\': \'XMLHttpRequest\', 12 } 13 client = MongoClient() 14 db = client[\'weibo\'] 15 collection = db[\'weibo\'] 16 max_page = 10 17 18 19 def get_page(page): 20 params = { 21 \'type\': \'uid\', 22 \'value\': \'2803301701\', 23 \'containerid\': \'1076032803301701\', 24 \'page\': page 25 } 26 url = base_url + urlencode(params) 27 try: 28 response = requests.get(url, headers=headers) 29 if response.status_code == 200: 30 return response.json(), page 31 except requests.ConnectionError as e: 32 print(\'Error\', e.args) 33 34 35 def parse_page(json, page: int): 36 if json: 37 items = json.get(\'data\').get(\'cards\') 38 for index, item in enumerate(items): 39 if page == 1 and index == 1: 40 continue 41 else: 42 item = item.get(\'mblog\') 43 weibo = {} 44 weibo[\'id\'] = item.get(\'id\') 45 weibo[\'text\'] = pq(item.get(\'text\')).text() 46 weibo[\'attitudes\'] = item.get(\'attitudes_count\') 47 weibo[\'comments\'] = item.get(\'comments_count\') 48 weibo[\'reposts\'] = item.get(\'reposts_count\') 49 yield weibo 50 51 52 # def save_to_mongo(result): 53 # if collection.insert(result): 54 # print(\'Saved to Mongo\') 55 56 57 if __name__ == \'__main__\': 58 for page in range(1, max_page + 1): 59 json = get_page(page) 60 results = parse_page(*json) 61 for result in results: 62 print(result) 63 # save_to_mongo(result)
运行结果: