import re import urllib.request import urllib.error import urllib.parse import jieba def get_all_hotSong(url): headers={\'user-agent\': \'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36\'} request = urllib.request.Request(url=url, headers=headers) html = urllib.request.urlopen(request).read().decode(\'utf-8\') # 打开url html = str(html) # 转换成str pat1 = r\'playlist\?id=(\d*?)" class="t\' result_id = re.compile(pat1).findall(html) # 用正则表达式进行筛选id pat2=r\'<a title="(.*?)" href="/playlist\?id=\d*?" class="t\' result_name = re.compile(pat2).findall(html) # 用正则表达式进行筛选歌单名字name return result_name,result_id def get_Lables(url): headers={\'user-agent\': \'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36\'} request = urllib.request.Request(url=url, headers=headers) html = urllib.request.urlopen(request).read().decode(\'utf-8\') # 打开url html = str(html) # 转换成str w1=\'标签:\' w2=\',简介\' pat = re.compile(w1+\'(.*?)\'+w2,re.S) result = pat.findall(html) # print(result) return result f = open(\'result3.txt\', \'a\', encoding=\'utf-8\') # 写入文件 for i in range(0,1): url = \'https://music.163.com/discover/playlist/?order=hot&cat=%E5%85%A8%E9%83%A8&limit=35&offset=\' + str(i*35) name,id=get_all_hotSong(url) num=0 for j in id: #遍历歌单 t_url=\'https://music.163.com/playlist?id=\'+j label_=get_Lables(t_url) k=0 if len(label_): #print(label_[k]) f.write(label_[k]) f.write(\'\n\') f.close() f = open("result3.txt", \'r\', encoding=\'utf-8\').read() f2 = open(\'result4.txt\', \'a\', encoding=\'utf-8\') # 写入文件 counts = {} wordsList =jieba.lcut(f) for word in wordsList: word = word.replace(",", "").replace("!", "").replace("“", "") \ .replace("”", "").replace("。", "").replace("?", "").replace(":", "") \ .replace("...", "").replace("、", "").strip(\' \').strip(\'\r\n\') if len(word) == 1 or word == "": continue else: counts[word]=counts.get(word,0)+1 #单词计数 items = list(counts.items()) #将字典转为list items.sort(key=lambda x:x[1],reverse=True) #根据单词出现次数降序排序 #打印前15个 for item in items: word,counter = item print("单词:{},次数:{}".format(word,counter)) f2.write("单词:{},次数:{}".format(word,counter)) f2.write(\'\n\') f2.close()