简单的爬取页面数据,并生成词云和网络图

需要安卓  networkx wordcloud 包

代码如下

 

 1 # @Author  :whyCai
 2 # @Time    :2020/10/17 10:35
 3 from time import sleep
 4 
 5 import requests,json,jieba,wordcloud,networkx as nx,jieba.posseg as jp
 6 from matplotlib import pyplot as plt
 7 
 8 """
 9 #爬取页面的数据
10 def getCommText():
11     '''
12     爬取页面的数据
13     :return: 
14     '''
15     text = ''
16     url = 'https://xxxxxxx'
17     headers = {'content-type': 'application/json'}
18 
19     for i in range(0,300):
20         data = {"pageIndex": i+1,"xxxx":1}
21         r = requests.post(url, data=json.dumps(data), headers=headers)
22         res = json.loads(r.text)
23         #获取接口的字段值
24         resContent = res['result']['items']
25         lenComm = len(resContent)
26         # 获取接口的字段值
27         for j in range(0,lenComm):
28             # text = text + resContent[j]['content']+' '
29             print(resContent[j]['content'])
30         sleep(0.2)
31     # print(text)
32     # return text
33 getCommText()
34 """
35 
36 """
37 #生成词云
38 
39 #读取数据
40 f = open('xxx.txt',encoding='utf-8')
41 text = f.read()
42 txtlist = jieba.lcut(text)
43 txtlist = " ".join(txtlist)
44 w = wordcloud.WordCloud(width=1000,height=700,background_color='white',font_path='msyh.ttc')
45 w.generate(txtlist)
46 #生成词云
47 w.to_file('output2-poem.png')
48 """
49 
50 
51 """
52 #生成网络图
53 
54 #text 为 上面词云中的 text = f.read()
55 words = jp.lcut(text)
56 G = nx.MultiDiGraph()  
57 # 添加节点
58 for word in words:
59     G.add_node(word.flag)
60 # 添加边
61 for i in range(len(words) - 1):
62     G.add_edge(words[i].flag, words[i+1].flag)
63 # 绘图
64 nx.draw(G, alpha=0.8, with_labels=True, node_color='lightgreen', font_size=36, node_size=999, width=2)
65 # 展示
66 plt.show()
67 """
View Code

相关文章:

  • 2021-10-09
  • 2021-12-08
  • 2021-11-04
  • 2022-12-23
  • 2022-01-31
  • 2022-01-05
  • 2021-12-04
  • 2021-11-19
猜你喜欢
  • 2021-07-05
  • 2021-10-02
  • 2021-12-16
  • 2022-02-04
  • 2021-12-12
  • 2021-10-22
  • 2021-05-31
相关资源
相似解决方案