1 import jieba 2 from matplotlib import pyplot as plt 3 from wordcloud import WordCloud 4 from PIL import Image 5 import numpy as np 6 txt = (open("红楼梦.txt", "r", encoding=\'utf-8\')).read() 7 file1 = open("stopwords_cn.txt") 8 file2 = open("stopwords_cn(more).txt") 9 ls1 = [] 10 while 1: 11 line = file1.readline() 12 new_word = line.strip() 13 if not line: 14 break 15 ls1.append(new_word) 16 ls2 = [] 17 while 1: 18 line = file2.readline() 19 new_word = line.strip() 20 if not line: 21 break 22 ls2.append(new_word) 23 ls = ls1+ls2 24 words = jieba.lcut(txt) 25 counts = {} 26 for word in words: 27 for i in ls: 28 if word == i: 29 continue 30 if (len(word)) == 1: 31 continue 32 else: 33 counts[word] = counts.get(word, 0) + 1 34 items = list(counts.items()) 35 items.sort(key=lambda x: x[1], reverse=True) 36 for i in range(15): 37 word, count = items[i] 38 print("{0:<10}{1:>5}".format(word, count)) 39 string = \' \'.join(words) 40 print(len(string)) 41 img = Image.open(\'22.png\') #打开图片 42 img_array = np.array(img) #将图片装换为数组 43 stopword=[\'什么\', \'一个\', \'我们\', \'那里\', \'你们\', \'如今\', \'起来\', \'知道\', \'这里\', \'众人\', \'他们\', \'出来\', \'自己\', \'说道\', \'听见\', \'两个\', \'姑娘\', \'不好\', 44 \'不知\', \'只见\', \'东西\', \'告诉\'] #设置停止词,也就是你不想显示的词,这里这个词是我前期处理没处理好,你可以删掉他看看他的作用 45 stopword=stopword+ls 46 print(stopword) 47 wc = WordCloud( 48 background_color=\'white\', 49 width=1000, 50 height=800, 51 mask=img_array, 52 font_path=\'./fonts/simhei.ttf\', 53 stopwords=stopword 54 ) 55 wc.generate_from_text(string)#绘制图片 56 plt.imshow(wc) 57 plt.axis(\'off\') 58 plt.figure() 59 plt.show() #显示图片 60 wc.to_file(\'new.png\') #保存图片
相关文章:
- Python词频分析 2022-12-23
- 红楼梦词云分析+词云可视化 2022-12-23
- 如何用Python 制作词云-对1000首古诗做词云分析 2021-11-28
- python 词云 2021-05-06
- Python 词云 2021-05-08
- python-词云 2021-11-28