以Hamlet文本为例,文本下载链接: https://python123.io/resources/pye/hamlet.txt

#CalHamletV1.py
#hamlet文本下载链接:https://python123.io/resources/pye/hamlet.txt
def getText():     #对文本归一化处理(变为小写,特殊字符替换为空格)
    txt = open("hamlet.txt","r").read()
    txt = txt.lower()   #所有字母变为小写
    for ch in '!"#$%&()*+,-./:;<=>?@[\\]^_{|}`~':
        txt= txt.replace(ch," ")  #用空格代替各种特殊字符
    return txt
hamletTxt=getText()
words =hamletTxt.split()  #根据空格分隔每一个字母
counts ={}
for word in words:
    counts[word] = counts.get(word,0) + 1  #如果键不存在字典中,给出默认值
items = list(counts.items())   #变为列表类型,便于排序操作
items.sort(key=lambda x:x[1], reverse=True)   #对第二个元素,从大到小倒排
#sort方法小知识:参数lambda用来指定列表中使用哪一个多元选项的列作为排序列,默认的排序是从小到大;reverse设为True,则排序从大到小
for i in range(10):   #输出最多的10个单词
    word, count = items[i]
    print("{0:<10}{1:>5}".format(word, count))
CalHamletV1 Code

相关文章: