爬虫-网易云歌单下载
\'\'\'网易热门歌单
1. https://music.163.com/discover/playlist 进入歌单列表页,返回所有歌单datas = [(\'歌单名1\',\'歌单id\'),...]
2. \'https://music.163.com/playlist?id=%s\'%data[1] 进入每一个歌单
2. 正则匹配出每个歌单的url
3. 用requests.get().content方法下载歌曲
4. 用multiprocessing 模块下的Pool方法
\'\'\'
import re
import requests
import json
from fake_useragent import UserAgent
from multiprocessing import Pool
class WangyiSpider(object):
def __init__(self):
\'\'\'爬取前2个歌单的所有歌曲\'\'\'
self.headers = {
\'Referer\': \'https://music.163.com/discover/playlist\',
\'User-Agent\': \'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) \'
\'Chrome/72.0.3626.119 Safari/537.36\'
}
def get_song(self):
# 首先进入歌单页面
songlist_url = \'https://music.163.com/discover/playlist\'
songlist_res = requests.get(songlist_url,verify=False,headers=self.headers)
# print(songlist_res.text)
# 找到所有的li标签,找出a标签的链接
a_data = re.findall(\'<a title="(.*?)" href="/playlist\?id=(\d+)" class="msk"></a>\',songlist_res.text)
# 进程池提高效率
pool = Pool(processes=4)
pool.map(self.get_song_content,a_data[:2]) # 将a_data中的每一个数放入get_song_content函数中
def get_song_content(self,data):
\'\'\'(\'世界它太小,小到容不下爱人的心\', \'2885665791\')\'\'\'
url = \'https://music.163.com/playlist?id=%s\'%data[1]
# 拿到所有的歌名和对应的url
res = requests.get(url,headers=self.headers,verify=False)
for i in re.findall(r\'<li><a href="/song\?id=(\d+)">(.*?)</a></li>\', res.text)[:3]:
id = i[0]
title = i[1]
song_url = \'https://music.163.com/song/media/outer/url?id=%s\'%id
song_content = requests.get(song_url,headers=self.headers,verify=False)
try:
with open(\'music/%s.mp3\'%title,\'wb\') as f:
f.write(song_content.content)
print(title + \' 下载完成!\')
except Exception as e:
print(e)
if __name__ == \'__main__\':
# song_name = input(\'enter song name you want: \').strip()
ws = WangyiSpider()
ws.get_song()