import requests
from bs4 import BeautifulSoup
import time
headers ={\'User-Agent\':\'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36\'} # 请求头文件
def get_infos(url): # 定义获取信息的函数
resp = requests.get(url,headers)
bs = BeautifulSoup(resp.text,\'lxml\')
# rank = bs.find_all(\'span\',{\'class\':"pc_temp_num"}) # 两种方式
ranks = bs.select(\'#rankWrap > div.pc_temp_songlist > ul > li > span.pc_temp_num\')
titles = bs.select(\'#rankWrap > div.pc_temp_songlist > ul > li > a\')
times = bs.select(\'#rankWrap > div.pc_temp_songlist > ul > li > span.pc_temp_tips_r > span\')
for rank,title,time in zip(ranks,titles,times):
data = {\'rank\':rank.text.strip(),
\'song\':title.text.split(\'-\')[1], # 通过split函数获取歌曲与歌手的信息
\'singer\':title.text.split(\'-\')[0],
\'time\':time.text.strip()}
print(data) # 数据打印出来
if __name__ == \'__main__\': # 主函数入口
urls = [\'http://www.kugou.com/yy/rank/home/{}-8888.html\'.format(str(i)) for i in range(1,24)] # 页数url获取
for url in urls:
get_infos(url)
time.sleep(2) # 两秒延时