aby321

 

 

  1 """
  2 
  3 用selenium+PhantomJS配合,不需要进行逆向工程
  4  python 3下的selenium不能默认安装,需要指定版本2.48.0
  5 """
  6 from selenium import webdriver
  7 import os,requests
  8 
  9 
 10 
 11 url = \'https://music.163.com/#/playlist?id=884698131\'#网易云音乐歌单
 12 headers = {
 13     \'User-Agent\':\'User-Agent:Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36\',
 14 }
 15 driver = webdriver.PhantomJS()
 16 driver.get(url)
 17 driver.implicitly_wait(20)#隐式等待20s
 18 #pprint.pprint(driver.page_source)#查看源代码
 19 
 20 def is_chinese(uchar):
 21     """
 22     判断一个unicode是否是汉字
 23     :param uchar: <char>字符
 24     :return: <boolean>
 25     """
 26     if uchar >= u\'\u4e00\' and uchar <= u\'\u9fa5\':
 27         return True
 28     else:
 29         return False
 30 
 31 def is_number(uchar):
 32     """
 33     判断一个unicode是否是数字
 34     :param uchar: <char>字符
 35     :return: <boolean>
 36     """
 37     if uchar >= u\'\u0030\' and uchar <= u\'\u0039\':
 38         return True
 39     else:
 40         return False
 41 
 42 
 43 def is_alphabet(uchar):
 44     """
 45     判断一个unicode是否是英文字母
 46     :param uchar: <char>字符
 47     :return: <boolean>
 48     """
 49     if (uchar >= u\'\u0041\' and uchar <= u\'\u005a\') or (uchar >= u\'\u0061\' and uchar <= u\'\u007a\'):
 50         return True
 51     else:
 52         return False
 53 def is_filename(uchar):
 54     """
 55     排除文件名不可用的字符,这里请注意都是在英文输入法下的,若是中文输入法下的例如?“等是可以用来生成文件名的
 56     \/:*?"<>|
 57     :param uchar:
 58     :return:
 59     """
 60     if uchar == u\'\u005c\' or  uchar == u\'\u002f\' or uchar == u\'\u003a\' or uchar == u\'\u002a\' or uchar == u\'\u003f\' or uchar == u\'\u007c\' \
 61         or  uchar == u\'\u003c\' or uchar == u\'\u003e\' or uchar == u\'\u007c\':
 62         return False
 63     else:
 64         return True
 65 
 66 def format_str(content):
 67     """
 68     将字符串content中的字符逐个判断
 69     :param content: <str>输入文本
 70     :return: <str>输出只包含中文、数字及英文字母的字符串
 71     """
 72     content_str = \'\'
 73     for i in content:
 74         if is_chinese(i) or is_alphabet(i) or is_number(i):
 75             content_str = content_str+i
 76     print(\'处理后的content:\',content_str)
 77     return content_str
 78 
 79 def format_str2(content):
 80 
 81     content_str = \'\'
 82     for i in content:
 83         if is_filename(i):
 84             content_str = content_str+i
 85     print(\'处理后的content:\',content_str)
 86     return content_str
 87 
 88 
 89 driver.switch_to.frame(\'g_iframe\')#跳转到id为g_iframe的iframe框架下
 90 
 91 
 92 list_name = driver.find_element_by_class_name(\'f-ff2\').text   #歌单名称
 93 #大坑,源代码中是class=“f-ff2 f-brk”,属性值中间有空格,全写上报错
 94 # #list_name2 = driver.find_element_by_xpath(\'//div[@class=\"m-info\"]/div[2]\')
 95 print(list_name)
 96 
 97 #list_path = os.getcwd()+ \'/网易云音乐歌单-\' + format_str(list_name)
 98 list_path = os.getcwd()+ \'/网易云音乐歌单-\' + format_str2(list_name)
 99 
100 if os.path.exists(list_path):
101     pass
102 else:
103     os.mkdir(list_path)#若文件夹不存在,则新建
104 
105 ids = driver.find_elements_by_xpath(\'//a[starts-with(@href,"/song?id=")]\')
106 names = driver.find_elements_by_xpath(\'//a[starts-with(@href,"/song?id=")]/b\')
107 for i in range(0,len(ids)):
108     #id = ids[i].get_attribute(\'href\').strip(\'https://music.163.com/song?id=\')#有问题,获取的某些歌曲id不全,原因未知
109     id = ids[i].get_attribute(\'href\').replace(\'https://music.163.com/song?id=\',\'\')
110     name = format_str2(names[i].get_attribute(\'title\').strip().replace(\'/\',\'\'))
111 
112     print(id,name)
113     link = \'http://music.163.com/song/media/outer/url?id=\' + id + \'.mp3\'
114     #urllib.request.urlretrieve(link,\'E:/\'+id+\'.mp3\')#有时候下载下来的歌曲只有几十kb
115 
116     if os.path.exists(list_path + \'/\' + str(i + 1) + \'_\' + name + \'.mp3\') \
117             and (os.path.getsize(list_path + \'/\' + str(i + 1) + \'_\' + name + \'.mp3\')) >= 102400:
118         # 若歌曲存在且能正常播放(此处判断为大于100kb)则不再下载
119         pass
120 
121     else:
122         with open(list_path + \'/\' + str(i + 1) + \'_\' + name + \'.mp3\', \'wb\') as f:
123             f.write(requests.get(link, headers=headers).content)
124 
125     #有些歌曲能下载下来但是只有67kb左右,也无法播放,也就是网易云上显示但是无法在线听的音乐
126     if os.path.getsize(list_path + \'/\' + str(i + 1) + \'_\' + name + \'.mp3\') < 102400:
127         os.remove(list_path + \'/\' + str(i + 1) + \'_\' + name + \'.mp3\')
128     else:
129         pass
130 
131 driver.switch_to.default_content()#返回主文档

 

分类:

技术点:

相关文章: