YYQ-4414

分析:

代码部分:

import requests,time
from pandas import DataFrame
from my_fake_useragent import UserAgent

def get_headers():
    ua = UserAgent().random()
    headers = {
        \'Connection\':\'close\',
        \'Referer\':\'猫眼热度\',
        \'User-Agent\':\'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.67 Safari/537.36 Edg/87.0.664.47\',
    }#这是想使用动态User-Agent的,但是在持久化储存的时候,使用动态的User-Agent会报错,所以就不用了
    return headers

def get_requests():
        
    params = {
        
        \'seriesType\':\'\',
        \'platformType\':\'\',
        \'showDate\':\'2\',
        \'dateType\':\'0\',
        \'rankType\':\'0\',
    }
    session = requests.Session()#处理cookie
    session.get(url=\'猫眼热度\',headers=get_headers())
    my_url = \'猫眼热度ajax请求\'
    response = session.get(url=my_url,headers=get_headers(),params=params)
    response.encoding = \'utf-8\'
    data_list = response.json()[\'dataList\']
    
    dateTime = data_list[\'updateInfo\'][\'updateTimestamp\']
    dateTime = time.strftime("%Y-%m-%d %H:%M",time.localtime(dateTime/1000))
    print(\'抓取数据数据\',dateTime,\'\n\')
    
    
    name_list = []
    platfor_list = []
    releaseInfo_list = []
    heat_list = []
    play_list = []#把抓取到的数据放在列表里面
    
    for dic in data_list[\'list\']:
        name = dic[\'seriesInfo\'][\'name\']
        platfor = dic[\'seriesInfo\'][\'platformDesc\']
        releaseInfo = dic[\'seriesInfo\'][\'releaseInfo\']
        heat = dic[\'currHeatDesc\']
        try:
            play = dic[\'playCountSplitUnit\'][\'num\']#这部分是数据的解析提取
        except:
            play = None
            
        
        print(name,platfor,releaseInfo,\'{}万热度 \'.format(heat),\'{}万播放量\'.format(play),\'\n\')
        name_list.append(name)
        platfor_list.append(platfor)
        releaseInfo_list.append(releaseInfo)
        heat_list.append(heat)
        play_list.append(play)
        
    return dateTime,name_list,platfor_list,releaseInfo_list,heat_list,play_list
    
def ptable_to_csv():#数据的持久化储存
    
    dic = {
        
        \'时间\':get_requests()[0],
        \'剧名\':get_requests()[1],
        \'平台\':get_requests()[2],
        \'上线时间\':get_requests()[3],
        \'热度(万)\':get_requests()[4],
        \'平台播放量(万)\':get_requests()[5],
    }
    data = DataFrame(data=dic)
    data.to_csv(\'video_data.csv\',encoding=\'utf-8-sig\',mode=\'a\',header=None)
    
def main():
    
    while True:
        ptable_to_csv()
        time.sleep(30)#循环,实验效果,这里设置是30秒抓一次(可以设置30分钟等)
        
main()



ok,下面是抓取效果:

分类:

技术点:

相关文章:

  • 2021-08-14
  • 2021-10-25
  • 2021-07-16
  • 2022-12-23
  • 2021-07-16
  • 2021-08-17
  • 2021-09-04
  • 2021-09-05
猜你喜欢
  • 2022-12-23
  • 2021-09-11
  • 2021-11-30
  • 2022-12-23
  • 2022-02-08
相关资源
相似解决方案