一、爬取b站用户信息
本次爬取b站第1-10个用户的个人信息,包括昵称,性别,头像,粉丝数,播放数等。
1 import requests 2 import json 3 import os 4 5 6 vip_type = {0:\'普通用户\',1:\'小会员\',2:\'大会员\'} 7 headers = { 8 \'Referer\': \'https://space.bilibili.com\', 9 \'User-Agent\': \'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36\' 10 } 11 12 def download_face(url,mid,name): 13 \'\'\' 下载头像 \'\'\' 14 print(\'正在下载用户头像...\') 15 if not os.path.exists(\'b站用户头像\'): 16 os.mkdir(\'b站用户头像\') # 创建目录存放头像 17 img = requests.get(url,headers=headers).content # 获得头像的二进制源码 18 with open(\'b站用户头像/{}_{}.jpg\'.format(mid,name),\'wb\') as f: 19 f.write(img) 20 21 def get_user_ele_info(url): 22 \'\'\'获取用户基本信息\'\'\' 23 jsondata = requests.get(url).json()[\'data\'] 24 mid = jsondata[\'mid\'] 25 name = jsondata[\'name\'] 26 sex = jsondata[\'sex\'] 27 level = jsondata[\'level\'] 28 sign = jsondata[\'sign\'] 29 official = jsondata[\'official\'][\'title\'] 30 vip_num = jsondata[\'vip\'][\'type\'] 31 face_url = jsondata[\'face\'] 32 download_face(face_url,mid,name) # 33 print(\'用户UID:{}\'.format(mid)) 34 print(\'用户姓名:{}\'.format(name)) 35 print(\'用户性别:{}\'.format(sex)) 36 print(\'用户等级:{}\'.format(level)) 37 print(\'用户类型:{}\'.format(vip_type.get(vip_num))) 38 print(\'用户个性签名:{}\'.format(sign)) 39 print(\'用户官方认证:{}\'.format(official)) 40 41 def get_user_follow(url): 42 \'\'\' __jp3获取用户关注数和粉丝数 \'\'\' 43 text_data = requests.get(url,headers=headers).text[6:-1] # str 44 # 获取的是非标准的json格式的数据,做一下字符串截取操作,去掉冗余数据 45 jsondata = json.loads(text_data) # 用json包重新加载一下字符串 ,变为dict类型 46 data = jsondata[\'data\'] 47 following = data[\'following\'] 48 follower = data[\'follower\'] 49 print(\'用户关注数:{}\'.format(following)) 50 print(\'用户粉丝数:{}\'.format(follower)) 51 52 def get_user_achieve(url): 53 \'\'\' __jp4获取用户成就,如粉丝数、获赞数、播放数等 \'\'\' 54 text_data = requests.get(url,headers=headers).text[6:-1] 55 jsondata = json.loads(text_data) 56 data = jsondata[\'data\'] 57 video_views = data[\'archive\'][\'view\'] 58 likes = data[\'likes\'] 59 print(\'用户获赞数:{}\'.format(likes)) 60 print(\'用户视频播放数:{}\'.format(video_views)) 61 62 63 def main(): 64 for i in range(1,11): 65 url_ele_info = \'https://api.bilibili.com/x/space/acc/info?mid={}&jsonp=json\'.format(i) 66 url_follow = \'https://api.bilibili.com/x/relation/stat?vmid={}&jsonp=jsonp&callback=__jp3\'.format(i) 67 url_achieve = \'https://api.bilibili.com/x/space/upstat?mid={}&jsonp=jsonp&callback=__jp4\'.format(i) 68 get_user_ele_info(url_ele_info) 69 get_user_follow(url_follow) 70 get_user_achieve(url_achieve) 71 print(\'-\'*100) 72 73 if __name__ == \'__main__\': 74 main()
运行结果:
用户头像:
二、
待续。。。