以Hamlet文本为例,文本下载链接: https://python123.io/resources/pye/hamlet.txt
![]()
#CalHamletV1.py
#hamlet文本下载链接:https://python123.io/resources/pye/hamlet.txt
def getText(): #对文本归一化处理(变为小写,特殊字符替换为空格)
txt = open("hamlet.txt","r").read()
txt = txt.lower() #所有字母变为小写
for ch in '!"#$%&()*+,-./:;<=>?@[\\]^_{|}`~':
txt= txt.replace(ch," ") #用空格代替各种特殊字符
return txt
hamletTxt=getText()
words =hamletTxt.split() #根据空格分隔每一个字母
counts ={}
for word in words:
counts[word] = counts.get(word,0) + 1 #如果键不存在字典中,给出默认值
items = list(counts.items()) #变为列表类型,便于排序操作
items.sort(key=lambda x:x[1], reverse=True) #对第二个元素,从大到小倒排
#sort方法小知识:参数lambda用来指定列表中使用哪一个多元选项的列作为排序列,默认的排序是从小到大;reverse设为True,则排序从大到小
for i in range(10): #输出最多的10个单词
word, count = items[i]
print("{0:<10}{1:>5}".format(word, count))
CalHamletV1 Code