实例操作。非常规页面爬取

import requests
import lxml.html
import json

class Bili:
    def __init__(self):
        pass
    def getMsg(self,url):
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.26 Safari/537.36 Core/1.63.6788.400 QQBrowser/10.3.2727.400'}
        html=requests.get(url=url,headers=headers).content.decode('utf-8')
        str1=json.loads(html)
        for i in str1['data']['replies']:
            print(i['member']['uname'],i['member']['sex'] ,'   ' ,i['content']['message'])
            if i['replies']==None:
                pass
            else:
                for k in i['replies']:
                    print(k['member']['uname'], k['member']['sex'], '   ', k['content']['message'])
if __name__ == '__main__':
    bl=Bili()
    id=46820799
    for i in range(20):
        try:
            url1='https://api.bilibili.com/x/v2/reply?&jsonp=jsonp&pn={}&type=1&oid={}&sort=0'.format(i,id)
            bl.getMsg(url1)
        except Exception:
            pass

python爬取bili评论

相关文章: