tian1137

爬取酷狗top500歌曲信息

import requests
from bs4 import BeautifulSoup
import time

headers ={\'User-Agent\':\'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36\'} # 请求头文件

def get_infos(url):  # 定义获取信息的函数
    resp = requests.get(url,headers)
    bs = BeautifulSoup(resp.text,\'lxml\')
    # rank = bs.find_all(\'span\',{\'class\':"pc_temp_num"}) # 两种方式
    ranks = bs.select(\'#rankWrap > div.pc_temp_songlist > ul > li > span.pc_temp_num\')
    titles = bs.select(\'#rankWrap > div.pc_temp_songlist > ul > li > a\')
    times = bs.select(\'#rankWrap > div.pc_temp_songlist > ul > li > span.pc_temp_tips_r > span\')
    for rank,title,time in zip(ranks,titles,times):
        data = {\'rank\':rank.text.strip(),
                \'song\':title.text.split(\'-\')[1],   # 通过split函数获取歌曲与歌手的信息
                \'singer\':title.text.split(\'-\')[0],
                \'time\':time.text.strip()}
        print(data) # 数据打印出来

if __name__ == \'__main__\':  # 主函数入口
    urls = [\'http://www.kugou.com/yy/rank/home/{}-8888.html\'.format(str(i)) for i in range(1,24)]  # 页数url获取
    for url in urls:
        get_infos(url)
    time.sleep(2) # 两秒延时

 

发表于 2018-03-20 16:21  刘书田  阅读(795)  评论(0编辑  收藏  举报
 

分类:

技术点:

相关文章:

  • 2021-08-27
  • 2021-07-02
  • 2021-09-02
  • 2021-09-06
  • 2021-12-07
  • 2017-12-22
  • 2021-12-22
  • 2021-07-29
猜你喜欢
  • 2021-11-12
  • 2021-11-23
  • 2021-11-23
  • 2019-05-27
  • 2022-01-01
  • 2021-11-27
相关资源
相似解决方案