Python词云分析

 1 import jieba
 2 from matplotlib import pyplot as plt
 3 from wordcloud import WordCloud
 4 from PIL import Image
 5 import numpy as np
 6 txt = (open("红楼梦.txt", "r", encoding=\'utf-8\')).read()
 7 file1 = open("stopwords_cn.txt")
 8 file2 = open("stopwords_cn(more).txt")
 9 ls1 = []
10 while 1:
11     line = file1.readline()
12     new_word = line.strip()
13     if not line:
14         break
15     ls1.append(new_word)
16 ls2 = []
17 while 1:
18     line = file2.readline()
19     new_word = line.strip()
20     if not line:
21         break
22     ls2.append(new_word)
23 ls = ls1+ls2
24 words = jieba.lcut(txt)
25 counts = {}
26 for word in words:
27     for i in ls:
28         if word == i:
29             continue
30     if (len(word)) == 1:
31         continue
32     else:
33         counts[word] = counts.get(word, 0) + 1
34 items = list(counts.items())
35 items.sort(key=lambda x: x[1], reverse=True)
36 for i in range(15):
37     word, count = items[i]
38     print("{0:<10}{1:>5}".format(word, count))
39 string = \' \'.join(words)
40 print(len(string))
41 img = Image.open(\'22.png\') #打开图片
42 img_array = np.array(img) #将图片装换为数组
43 stopword=[\'什么\', \'一个\', \'我们\', \'那里\', \'你们\', \'如今\', \'起来\', \'知道\', \'这里\', \'众人\', \'他们\', \'出来\', \'自己\', \'说道\', \'听见\', \'两个\', \'姑娘\', \'不好\',
44           \'不知\', \'只见\', \'东西\', \'告诉\']  #设置停止词，也就是你不想显示的词，这里这个词是我前期处理没处理好，你可以删掉他看看他的作用
45 stopword=stopword+ls
46 print(stopword)
47 wc = WordCloud(
48     background_color=\'white\',
49     width=1000,
50     height=800,
51     mask=img_array,
52     font_path=\'./fonts/simhei.ttf\',
53     stopwords=stopword
54 )
55 wc.generate_from_text(string)#绘制图片
56 plt.imshow(wc)
57 plt.axis(\'off\')
58 plt.figure()
59 plt.show()  #显示图片
60 wc.to_file(\'new.png\')  #保存图片