QQ空间网页版:https://user.qzone.qq.com/
登陆后,进入设置,有一个权限设置,设置“谁能看我的空间”为好友可见,然后构造爬虫。
(1)获取Cookie
两种方式:
第一种:通过chrome F12慢慢找获取
第二种:selenium模拟登陆获取
1 from selenium import webdriver 2 from time import sleep 3 import json 4 5 QQ_num = \'\' 6 QQ_s = \'\' 7 driver = webdriver.Firefox() 8 driver.get(\'https://user.qzone.qq.com/\' + QQ_num + \'/main\') 9 driver.switch_to.frame(\'login_frame\') 10 #找到账号密码登陆的地方 11 driver.find_element_by_id(\'switcher_plogin\').click() 12 driver.find_element_by_id(\'u\').send_keys(QQ_num) 13 driver.find_element_by_id(\'p\').send_keys(QQ_s) 14 driver.find_element_by_id(\'login_button\').click() 15 #保存本地的cookie 16 sleep(1) 17 cookies = driver.get_cookies() 18 cookie_dic = {} 19 for cookie in cookies: 20 if \'name\' in cookie and \'value\' in cookie: 21 cookie_dic[cookie[\'name\']] = cookie[\'value\'] 22 with open(\'cookie_dict.txt\', \'w\') as f: 23 json.dump(cookie_dic, f)
(2)构造链接
这个从知乎上看别人的思路,主要是g_tk的构造
(3)获取所有好友信息(主要是昵称和QQ号)
1 import urllib 2 import requests 3 import csv 4 import json 5 import re 6 7 class Qzone: 8 9 #算出来gtk 10 def get_gtk(self): 11 p_skey = cookie[\'p_skey\'] 12 h = 5381 13 for i in p_skey: 14 h += (h << 5) + ord(i) 15 g_tk = h & 2147483647 16 return g_tk 17 18 #得到uin 19 def get_uin(self): 20 uin = cookie[\'ptui_loginuin\'] 21 return uin 22 23 # 得到好友qq 24 def get_qq(self): 25 qq_list = [] 26 friend_list = self.get_friend() 27 28 csvfile = open(\'friends.csv\', \'w\', newline=\'\') 29 csv_write = csv.writer(csvfile, dialect=\'excel\') 30 31 for friend in friend_list: 32 csv_write.writerow(friend) 33 qq_list.append(friend[1]) 34 csvfile.close() 35 return qq_list 36 37 # 找出好友列表 38 def get_friend(self): 39 url_friend = \'https://user.qzone.qq.com/proxy/domain/r.qzone.qq.com/cgi-bin/tfriend/friend_ship_manager.cgi?\' 40 g_tk = self.get_gtk() 41 uin = self.get_uin() 42 data = { 43 \'uin\': uin, 44 \'do\': 1, 45 \'g_tk\': g_tk 46 } 47 data_encode = urllib.parse.urlencode(data) 48 url_friend += data_encode 49 res = requests.get(url_friend, headers=header, cookies=cookie) 50 friend_json = re.findall(\'\((.*)\)\', res.text, re.S)[0] 51 friend_dict = json.loads(friend_json) 52 friend_result_list = [] 53 # 循环将好友的姓名qq号存入字典中 54 for friend in friend_dict[\'data\'][\'items_list\']: 55 friend_result_list.append([friend[\'name\'], friend[\'uin\']]) 56 # 得到的好友字典是{name: qqNum}格式的 57 return friend_result_list 58 59 if __name__ == \'__main__\': 60 qzone = Qzone() 61 62 #将关系设置为全局变量以供方便调用 63 relationships = [] 64 header = { 65 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:61.0) Gecko/20100101 Firefox/61.0", 66 "Accepted-Language": "zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3", 67 "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" 68 } 69 with open(\'cookie_dict.txt\',\'r\') as f: 70 cookie = json.load(f) 71 #得到qq列表, 72 qq_list = qzone.get_qq()