syc0403
#哔哩哔哩每日热榜
#网页链接:https://www.bilibili.com/ranking?
import requests
from bs4 import BeautifulSoup
#发出request请求,获取html网页
url = \'https://www.bilibili.com/ranking?\'
kv = {\'user-agent\': \'Mozilla/5.0\'}#伪装爬虫
r = requests.get(url,timeout = 30,headers=kv)
r.text#获取源代码
html=r.text
#解析网页,提取内容
soup=BeautifulSoup(html,\'lxml\')#构造Soup的对象
res = soup.find_all(\'a\',class_=\'title\')
num = 0
text = \'\'
for i in res:
    num+=1
    text+=\'{}{}\n\'.format(num,i.string)#先把内容保存到变量里去
print(text)
#保存
with open(\'rank.text\',\'w\',encoding=\'utf8\')as fout:
    fout.write(text)

 

2020.3.26

import requests
from bs4 import BeautifulSoup
import csv
import datetime
url = \'https://www.bilibili.com/ranking?\'
#发起网络请求
response = requests.get(url)
html_text = response.text
soup = BeautifulSoup(html_text,\'html.parser\')
def main():
#用来保存视频信息的对象
class Video:
def __init__(self,rank,title,point,visit,up,url):
self.rank = rank
self.title = title
self.point = point
self.visit = visit
self.up = up
self.url = url
def to_csv(self):
return [self.rank,self.title,self.point,self.visit,self.up,self.url]

@staticmethod
def csv_title():
return [\'排名\',\'标题\',\'分数\',\'播放量\',\'UP\',\'URL\']
#提取列表
items = soup.find_all(\'li\',{\'class\':\'rank-item\'})
videos = [] #保存提取出来的video
for itm in items:
title = itm.find(\'a\',{\'class\':\'title\'}).text#标题
point = itm.find(\'div\',{\'class\':\'pts\'}).text#综合得分
rank = itm.find(\'div\',{\'class\':\'num\'}).text#排名
visit = itm.find(\'span\',{\'class\':\'data-box\'}).text#播放量
up = itm.find_all(\'a\',)[2].text#up
url = itm.find(\'a\',{\'class\':\'title\'}).get(\'href\')#获取链接
v = Video(rank,title,point,visit,up,url)
videos.append(v)

#保存
now_str = datetime.datetime.now().strftime(\'%Y%m%d\')
file_name = f\'top100.csv_{now_str}.csv\'
with open(file_name,\'w\',newline=\'\') as f:
writer = csv.writer(f)
writer.writerow(Video.csv_title())
for v in videos:
writer.writerow(v.to_csv())

 

1.打开网页

2.获取源代码

3.解析网页,提取需要的内容,先找第一名的

 

 

 

这里找到需要提取的标题a标签,分析特点,它的类是title,在代码中可以用find_all函数查找

 

发现成功将排行榜爬取下来,想到可以用for循环把结果一个个打印出来

 

 

因为内容都是按顺序排下来的,所以可以自己弄数字形成排名

 

 

然后把内容保存到一个变量里去并检查有没有正常保存

 

 

 

 

最后直接保存到文件里面去,创建一个rank.txt,以写入的方式打开,把它赋值到fout这个变量里,fout写入获取到的文本内容

 

 

获取数据截图

 

 

分类:

技术点:

相关文章: