新年新气象,申请了cnblogs,新的一年希望多多更新~
首先是三道leetcode题:
2022. 将一维数组转变成二维数组
class Solution:
def construct2DArray(self, original: List[int], m: int, n: int) -> List[List[int]]:
if len(original) != m * n:
return []
res = [[0] * n for i in range(m)] # 这边要注意不能写成深拷贝,或者调换m、n的顺序
for i in range(m):
for j in range(n):
print(i, j, i * n + j)
res[i][j] = original[i * n + j]
print(res)
return res
剑指 Offer 09. 用两个栈实现队列
class CQueue {
stack<int> sa, sb;
public:
CQueue() {
// c++ 清空栈的方式
while (!sa.empty()) {
sa.pop();
}
while (!sb.empty()) {
sb.pop();
}
}
void appendTail(int value) {
sa.push(value);
}
int deleteHead() {
if (sb.empty()) {
while (!sa.empty()) {
sb.push(sa.top());
sa.pop();
}
}
if (!sb.empty()) {
int res = sb.top();
sb.pop();
return res;
} else {
return -1;
}
}
};
/**
* Your CQueue object will be instantiated and called as such:
* CQueue* obj = new CQueue();
* obj->appendTail(value);
* int param_2 = obj->deleteHead();
*/
剑指 Offer 30. 包含min函数的栈
class MinStack {
stack<int> s, sm;
public:
/** initialize your data structure here. */
MinStack() {
while (!s.empty()) {
s.pop();
}
while (!sm.empty()) {
sm.pop();
}
}
void push(int x) {
s.push(x);
if (sm.empty() || x <= sm.top()) {
sm.push(x);
}
}
void pop() {
if (s.top() == sm.top()) {
sm.pop();
}
s.pop();
}
int top() {
return s.top();
}
int min() {
return sm.top();
}
};
/**
* Your MinStack object will be instantiated and called as such:
* MinStack* obj = new MinStack();
* obj->push(x);
* obj->pop();
* int param_3 = obj->top();
* int param_4 = obj->min();
*/
今天搞了一个比较有意思的事情,总结了某个活跃群的年度词汇,效果如下:
Step1:导出微信群的聊天记录,手机usb连接电脑使用itunes可以直接备份
Step2:使用软件wxbackup导出备份中的微信聊天记录
Step3:读取json,jieba分词,wordcloud画图
代码主要参考博客:
https://pythondict.com/python-paintings/python-qixi-wechat-wordcloud/#lwptoc1
https://www.cnblogs.com/huzihu/p/9675304.html
# coding:utf-8
import json
import jieba
import numpy
import codecs
import pandas
import matplotlib.pyplot as plt
from wordcloud import WordCloud
def loadJson():
with open(\'message.js\') as dataFile:
data = dataFile.read()
obj = data[data.find(\'{\'): data.rfind(\'}\') + 1]
jsonObj = json.loads(obj)
data = open("聊天记录.txt", \'w+\', encoding=\'utf-8\')
msg = jsonObj["message"]
for i in range(len(msg)):
mi = msg[i]
# 只关注2021年的用户消息
if mi[\'m_uiMessageType\'] == 1 and mi[\'m_uiCreateTime\'] > 1609430400:
data.write(jsonObj["message"][i][\'m_nsContent\'] + \'\n\')
data.close()
def load_file_segment():
# 读取文本文件并分词
jieba.load_userdict("mywords.txt")
# 加载我们自己的词典
f = codecs.open(u"聊天记录.txt", \'r\', encoding=\'utf-8\')
# 打开文件
content = f.read()
# 读取文件到content中
f.close()
# 关闭文件
segment = []
# 保存分词结果
segs = jieba.cut(content)
# 对整体进行分词
for seg in segs:
if len(seg) != 1 and seg != \'\r\n\':
# 如果说分词得到的结果非单字,且不是换行符,则加入到数组中
segment.append(seg)
return segment
def get_words_count_dict():
segment = load_file_segment()
# 获得分词结果
df = pandas.DataFrame({\'segment\': segment})
# 将分词数组转化为pandas数据结构
stopwords = pandas.read_csv("stopwords.txt", index_col=False, quoting=3, sep="\t", names=[\'stopword\'],
encoding="utf-8")
# 加载停用词
df = df[~df.segment.isin(stopwords.stopword)]
# 如果不是在停用词中
words_count = df.groupby(by=[\'segment\'])[\'segment\'].agg([("计数", numpy.size)])
# 按词分组,计算每个词的个数
words_count = words_count.reset_index().sort_values(by="计数", ascending=False)
# reset_index是为了保留segment字段,排序,数字大的在前面
return words_count
# loadJson()
words_count = get_words_count_dict()
# 获得词语和频数
words = words_count.set_index("segment").to_dict()
# 筛选后统计
word_counts = words[\'计数\']
# wc = {k: v for k, v in sorted(word_counts.items(), key=lambda x: x[1], reverse=True)[:200]}
# print(wc)
# 绘制词云
my_cloud = WordCloud(
background_color=\'white\', # 设置背景颜色 默认是black
width=2000, height=1000,
max_words=200, # 词云显示的最大词语数量
font_path=\'/System/Library/Fonts/Hiragino Sans GB.ttc\', # 设置字体 显示中文
max_font_size=140, # 设置字体最大值
min_font_size=40, # 设置子图最小值
random_state=100 # 设置随机生成状态,即多少种配色方案
).generate_from_frequencies(word_counts)
# 显示生成的词云图片
plt.imshow(my_cloud)
# 显示设置词云图中无坐标轴
plt.axis(\'off\')
plt.show()
my_cloud.to_file(r"cloud.png")