• 列表的操作方法
    易忘知识点总结

  • random库
    易忘知识点总结

  • 字典
    易忘知识点总结

  • 文件

易忘知识点总结

  • format中的格式是:内容 -> 填充 -> 对齐 -> 宽度 为固定格式。同时别忘了最外面有双引号和大话括号,以及冒号,"{: }".format()...

  • for 循环写作一行的方法 print("True" if type(eval("123")) == type(1) else "False" 注意,无需加冒号

#基础版本的词频统计
fi = open("1.txt", "r", encoding = "utf-8")
fo = open("2.txt", "w", encoding = "utf-8")
txt = fi.read()
d = {}
for c in fi:
    d[c] = d.get(c, 0) + 1
del d[' ']
del d['\n']
ls = []
for key in d:
    ls.append("{}:{}".format(key, d[key]))

fo.write(",".join(ls))
fi.close()
fo.close()

#中级版本的词频统计
#加入jieba库后的词频统计
import jieba
fi = open("1.txt", "r", encoding = "utf-8")
fo = open("2.txt", "w", encoding = "utf-8")
t = fi.read()
txt = jieba.lcut(t)
d = {}
for c in txt:
    d[c] = d.get(c, 0) + 1
del d[' ']
del d['\n']
ls = []
for key in d:
    ls.append("{}:{}".format(key, d[key]))
fo.write(",".join(ls))
fi.close()
fo.close()

#高级版本的词频统计
#高频词提取并排序
fi = open("1.txt", "r", encoding = "utf-8")
fo = open("2.txt", "w", encoding = "utf-8")
txt = fi.read()
d = {}
for c in txt:
	d[c] = d.get(c, 0) + 1
del d[' ']
del d['\n']
ls = list(d.items())
ls.sort(key=lambda x:x[1], reverse=True)
for i in range(100)
	ls[i] = "{}:{}".format(ls[i][0], ls[i][1])
fo.write(",".join(ls[:100]))
fi.close()
fo.close()

#特高级版本
#在上述统计高频词并排序后,比较两个文件中相同的字符

def getList(name):
	fi = open(name+".txt", "r", encoding = "utf-8")
	words = fi.read().split(',')
	for i in range(len(words)):
		words[i] = words[i].split(':')[0]
	fi.close()
	return words


def main():
	fo = open("out.txt", "w", encoding="utf-8")
	ls1 = getList("1")
	ls2 = getList("2")
	for c in ls1:
		for c in ls2:
			ls3.append(c)
	fo.write(",".join(ls3))
	fo.close()
	
main()

相关文章: