lzy20001230
import re
import urllib.request
import urllib.error
import urllib.parse  
import jieba

def get_all_hotSong(url):
    headers={\'user-agent\': \'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36\'}
    request = urllib.request.Request(url=url, headers=headers)
    html = urllib.request.urlopen(request).read().decode(\'utf-8\')  # 打开url    
    html = str(html)  # 转换成str    
    pat1 = r\'playlist\?id=(\d*?)" class="t\'    
    result_id = re.compile(pat1).findall(html)  # 用正则表达式进行筛选id     
    pat2=r\'<a title="(.*?)" href="/playlist\?id=\d*?" class="t\'    
    result_name = re.compile(pat2).findall(html)  # 用正则表达式进行筛选歌单名字name     
    return result_name,result_id

def get_Lables(url):
    headers={\'user-agent\': \'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36\'}
    request = urllib.request.Request(url=url, headers=headers)
    html = urllib.request.urlopen(request).read().decode(\'utf-8\')  # 打开url    
    html = str(html)  # 转换成str     
    w1=\'标签:\'
    w2=\',简介\'
    pat = re.compile(w1+\'(.*?)\'+w2,re.S)
    result = pat.findall(html)
#    print(result)
    return result
    
f = open(\'result3.txt\', \'a\', encoding=\'utf-8\')  # 写入文件
for i in range(0,1):
    url = \'https://music.163.com/discover/playlist/?order=hot&cat=%E5%85%A8%E9%83%A8&limit=35&offset=\' + str(i*35)
    name,id=get_all_hotSong(url)
    num=0
    for j in id:        #遍历歌单
        t_url=\'https://music.163.com/playlist?id=\'+j
        label_=get_Lables(t_url)
        k=0
        if len(label_):
            #print(label_[k])
            f.write(label_[k])
            f.write(\'\n\')
f.close()

f = open("result3.txt", \'r\', encoding=\'utf-8\').read()
f2 = open(\'result4.txt\', \'a\', encoding=\'utf-8\')  # 写入文件
counts = {}
wordsList =jieba.lcut(f)
for word in wordsList:
    word = word.replace("", "").replace("", "").replace("", "") \
        .replace("", "").replace("", "").replace("", "").replace("", "") \
        .replace("...", "").replace("", "").strip(\' \').strip(\'\r\n\')
    if len(word) == 1 or word == "":
        continue
    else:
        counts[word]=counts.get(word,0)+1 #单词计数
items = list(counts.items()) #将字典转为list
items.sort(key=lambda x:x[1],reverse=True) #根据单词出现次数降序排序
    #打印前15个
for item in items:
    word,counter = item
    print("单词:{},次数:{}".format(word,counter))
    f2.write("单词:{},次数:{}".format(word,counter))
    f2.write(\'\n\')
f2.close()

 

分类:

技术点:

相关文章: