爬取哔哩哔哩每日热榜

#哔哩哔哩每日热榜
#网页链接：https://www.bilibili.com/ranking?
import requests
from bs4 import BeautifulSoup
#发出request请求，获取html网页
url = \'https://www.bilibili.com/ranking?\'
kv = {\'user-agent\': \'Mozilla/5.0\'}#伪装爬虫
r = requests.get(url,timeout = 30,headers=kv)
r.text#获取源代码
html=r.text
#解析网页，提取内容
soup=BeautifulSoup(html,\'lxml\')#构造Soup的对象
res = soup.find_all(\'a\',class_=\'title\')
num = 0
text = \'\'
for i in res:
    num+=1
    text+=\'{}{}\n\'.format(num,i.string)#先把内容保存到变量里去
print(text)
#保存
with open(\'rank.text\',\'w\',encoding=\'utf8\')as fout:
    fout.write(text)

2020.3.26

import requests
from bs4 import BeautifulSoup
import csv
import datetime
url = \'https://www.bilibili.com/ranking?\'
#发起网络请求
response = requests.get(url)
html_text = response.text
soup = BeautifulSoup(html_text,\'html.parser\')
def main():
    #用来保存视频信息的对象
    class Video:
        def __init__(self,rank,title,point,visit,up,url):
            self.rank = rank
            self.title = title
            self.point = point
            self.visit = visit
            self.up = up
            self.url = url
        def to_csv(self):
            return [self.rank,self.title,self.point,self.visit,self.up,self.url]

        @staticmethod
        def csv_title():
            return [\'排名\',\'标题\',\'分数\',\'播放量\',\'UP\',\'URL\']
    #提取列表
    items = soup.find_all(\'li\',{\'class\':\'rank-item\'})
    videos = [] #保存提取出来的video
    for itm in items:
        title = itm.find(\'a\',{\'class\':\'title\'}).text#标题
        point = itm.find(\'div\',{\'class\':\'pts\'}).text#综合得分
        rank = itm.find(\'div\',{\'class\':\'num\'}).text#排名
        visit = itm.find(\'span\',{\'class\':\'data-box\'}).text#播放量
        up = itm.find_all(\'a\',)[2].text#up
        url = itm.find(\'a\',{\'class\':\'title\'}).get(\'href\')#获取链接
        v = Video(rank,title,point,visit,up,url)
        videos.append(v)

    #保存
    now_str = datetime.datetime.now().strftime(\'%Y%m%d\')
    file_name = f\'top100.csv_{now_str}.csv\'
    with open(file_name,\'w\',newline=\'\') as f:
        writer = csv.writer(f)
        writer.writerow(Video.csv_title())
        for v in videos:
            writer.writerow(v.to_csv())

1.打开网页

2.获取源代码

3.解析网页，提取需要的内容，先找第一名的

这里找到需要提取的标题a标签，分析特点，它的类是title，在代码中可以用find_all函数查找

发现成功将排行榜爬取下来，想到可以用for循环把结果一个个打印出来

因为内容都是按顺序排下来的，所以可以自己弄数字形成排名

然后把内容保存到一个变量里去并检查有没有正常保存

最后直接保存到文件里面去，创建一个rank.txt,以写入的方式打开，把它赋值到fout这个变量里，fout写入获取到的文本内容

获取数据截图