Skip to content

Commit

Permalink
实现词云图
Browse files Browse the repository at this point in the history
  • Loading branch information
LC044 committed Dec 3, 2023
1 parent e281c08 commit 8abd38d
Show file tree
Hide file tree
Showing 9 changed files with 210 additions and 12 deletions.
4 changes: 2 additions & 2 deletions app/DataBase/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@
# from . import output
from .misc import Misc
from .msg import Msg

from .msg import MsgType
misc_db = Misc()
msg_db = Msg()
micro_msg_db = MicroMsg()
hard_link_db = HardLink()
__all__ = ["data", 'output', 'misc_db', 'micro_msg_db', 'msg_db', 'hard_link_db']
__all__ = ["data", 'output', 'misc_db', 'micro_msg_db', 'msg_db', 'hard_link_db','MsgType']
77 changes: 75 additions & 2 deletions app/DataBase/msg.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os.path
import random
import sqlite3
import threading
import traceback
Expand Down Expand Up @@ -27,16 +28,24 @@ def inner():
return inner


@singleton
class MsgType:
TEXT = 1
IMAGE = 3
EMOJI = 47


class Msg:
def __init__(self):
self.DB = None
self.cursor = None
self.open_flag = False
self.init_database()

def init_database(self):
def init_database(self, path=None):
global db_path
if not self.open_flag:
if path:
db_path = path
if os.path.exists(db_path):
self.DB = sqlite3.connect(db_path, check_same_thread=False)
# '''创建游标'''
Expand Down Expand Up @@ -102,6 +111,67 @@ def get_message_by_num(self, username_, local_id):
# result.sort(key=lambda x: x[5])
return result

def get_messages_by_type(self, username_, type_):
if not self.open_flag:
return None
sql = '''
select localId,TalkerId,Type,SubType,IsSender,CreateTime,Status,StrContent,strftime('%Y-%m-%d %H:%M:%S',CreateTime,'unixepoch','localtime') as StrTime,MsgSvrID
from MSG
where StrTalker=? and Type=?
order by CreateTime
'''
try:
lock.acquire(True)
self.cursor.execute(sql, [username_, type_])
result = self.cursor.fetchall()
finally:
lock.release()
return result

def get_messages_by_keyword(self, username_, keyword, num=5):
if not self.open_flag:
return None
sql = '''
select localId,TalkerId,Type,SubType,IsSender,CreateTime,Status,StrContent,strftime('%Y-%m-%d %H:%M:%S',CreateTime,'unixepoch','localtime') as StrTime,MsgSvrID
from MSG
where StrTalker=? and Type=1 and StrContent like ?
order by CreateTime desc
'''
temp = []
try:
lock.acquire(True)
self.cursor.execute(sql, [username_, f'%{keyword}%'])
messages = self.cursor.fetchall()
finally:
lock.release()
if len(messages) > 5:
messages = random.sample(messages, num)
try:
lock.acquire(True)
for msg in messages:
local_id = msg[0]
is_send = msg[4]
sql = '''
select localId,TalkerId,Type,SubType,IsSender,CreateTime,Status,StrContent,strftime('%Y-%m-%d %H:%M:%S',CreateTime,'unixepoch','localtime') as StrTime,MsgSvrID
from MSG
where localId > ? and StrTalker=? and Type=1 and IsSender=?
limit 1
'''
self.cursor.execute(sql, [local_id, username_, 1 - is_send])
temp.append((msg, self.cursor.fetchone()))
finally:
lock.release()
res = []
for dialog in temp:
msg1 = dialog[0]
msg2 = dialog[1]
res.append((
(msg1[4], msg1[5], msg1[7].split(keyword), msg1[8]),
(msg2[4], msg2[5], msg2[7], msg2[8])
))

return res

def close(self):
if self.open_flag:
try:
Expand All @@ -123,4 +193,7 @@ def __del__(self):
print(result)
print(result[-1][0])
local_id = result[-1][0]
wxid = 'wxid_0o18ef858vnu22'
pprint(msg.get_message_by_num('wxid_0o18ef858vnu22', local_id))
print(msg.get_messages_by_keyword(wxid, '干嘛'))
pprint(msg.get_messages_by_keyword(wxid, '干嘛')[0])
4 changes: 4 additions & 0 deletions app/analysis/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@

from .analysis import Analysis

__all__=['Analysis']
66 changes: 66 additions & 0 deletions app/analysis/analysis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
from collections import Counter

from app.DataBase import msg_db, MsgType
from app.person_pc import ContactPC
import jieba
from pyecharts import options as opts
from pyecharts.charts import Pie, WordCloud, Calendar, Bar, Line, Timeline, Grid

charts_width = 800
charts_height = 450
wordcloud_width = 780
wordcloud_height = 720


def wordcloud(wxid):
import jieba
txt_messages = msg_db.get_messages_by_type(wxid, MsgType.TEXT)
text = ''.join(map(lambda x: x[7], txt_messages))
total_msg_len = len(text)
# 使用jieba进行分词,并加入停用词
words = jieba.cut(text)
# 统计词频
word_count = Counter(words)
# 过滤停用词
stopwords_file = '../data/stopwords.txt'
with open(stopwords_file, "r", encoding="utf-8") as stopword_file:
stopwords = set(stopword_file.read().splitlines())
filtered_word_count = {word: count for word, count in word_count.items() if len(word) > 1 and word not in stopwords}

# 转换为词云数据格式
data = [(word, count) for word, count in filtered_word_count.items()]
# text_data = data
data.sort(key=lambda x: x[1], reverse=True)

text_data = data[:100] if len(data) > 100 else data
# 创建词云图
keyword, max_num = text_data[0]
w = (
WordCloud(init_opts=opts.InitOpts(width=f"{wordcloud_width}px", height=f"{wordcloud_height}px"))
.add(series_name="聊天文字", data_pair=text_data, word_size_range=[20, 100])
.set_global_opts(
title_opts=opts.TitleOpts(
title=f"词云图", subtitle=f"总计{total_msg_len}字",
title_textstyle_opts=opts.TextStyleOpts(font_size=23)
),
tooltip_opts=opts.TooltipOpts(is_show=True),
legend_opts=opts.LegendOpts(is_show=False)
)
)
# return w.render_embed()
return {
'chart_data': w.dump_options_with_quotes(),
'keyword': keyword,
'max_num': str(max_num),
'dialogs': msg_db.get_messages_by_keyword(wxid, keyword, num=5)
}


class Analysis:
pass


if __name__ == '__main__':
msg_db.init_database(path='../DataBase/Msg/MSG.db')
w = wordcloud('wxid_0o18ef858vnu22')
print(w)
21 changes: 21 additions & 0 deletions app/data/stopwords.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,17 @@
wxid
乡村
炸弹
腹肌
Expand Down Expand Up @@ -2518,3 +2531,11 @@ sup
🙄
旺柴
9 changes: 4 additions & 5 deletions app/util/dat2pic.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@ def get_code(file_path):
code = dat_read[0] ^ pic_head[head_index]
idf_code = dat_read[1] ^ code
head_index = head_index + 1
# if idf_code == pic_head[head_index]:
# dat_file.close()
return head_index, code
if idf_code == pic_head[head_index]:
dat_file.close()
return head_index, code
head_index = head_index + 1
dat_file.close()
print("not jpg, png, gif")
Expand Down Expand Up @@ -64,9 +64,8 @@ def decode_dat(file_path, out_path):
with open(file_path, 'rb') as file_in:
data = file_in.read()
# 对数据进行异或加密/解密
encrypted_data = bytes([byte ^ decode_code for byte in data])
with open(file_outpath, 'wb') as file_out:
file_out.write(encrypted_data)
file_out.write(bytes([byte ^ decode_code for byte in data]))
print(file_path, '->', file_outpath)
return file_outpath

Expand Down
10 changes: 10 additions & 0 deletions app/util/emoji.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
# -*- coding: utf-8 -*-
"""
emoji.py
!!!声明:
由于表情包并不属于个人,并且其可能具有版权风险,你只有浏览权没有拥有权
另外访问腾讯API可能会给腾讯服务器造成压力
所以禁止任何人以任何方式修改或间接修改该文件,违者后果自负
"""

import os
import xml.etree.ElementTree as ET

Expand Down
28 changes: 26 additions & 2 deletions app/web_ui/web.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
import json

from flask import Flask, render_template
from pyecharts import options as opts
from pyecharts.charts import Bar
from pyecharts.globals import ThemeType

from app.DataBase import msg_db
from app.analysis import analysis

app = Flask(__name__)


Expand All @@ -25,7 +30,7 @@ def index():

@app.route("/index")
def index0():
return render_template("index.html")
return render_template("index1.html")


@app.route('/home')
Expand All @@ -41,7 +46,26 @@ def home():

@app.route('/message_num')
def one():
return "1hello world"
msg_db.init_database(path='../DataBase/Msg/MSG.db')
wxid = 'wxid_0o18ef858vnu22'
# wxid = 'wxid_8piw6sb4hvfm22'
wxid = 'wxid_lltzaezg38so22'
world_cloud_data = analysis.wordcloud(wxid)
# 创建一个简单的柱状图
with open('message_num_test.html','w',encoding='utf-8') as f:
f.write(render_template('message_num.html', **world_cloud_data))
return render_template('message_num.html', **world_cloud_data)


@app.route('/test')
def test():
bar = (
Bar(init_opts=opts.InitOpts(theme=ThemeType.LIGHT))
.add_xaxis(["A", "B", "C", "D", "E"])
.add_yaxis("Series", [5, 20, 36, 10, 75])
.set_global_opts(title_opts=opts.TitleOpts(title="Flask and Pyecharts Interaction"))
)
return bar.dump_options_with_quotes()


if __name__ == "__main__":
Expand Down
3 changes: 2 additions & 1 deletion readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -288,10 +288,11 @@ python main.py
# 🏆致谢

* PC微信解密工具:[https://github.com/xaoyaoo/PyWxDump](https://github.com/xaoyaoo/PyWxDump)
* 我的得力助手:[ChatGPT](https://chat.openai.com/)

---

> 说明:该项目仅可用于交流学习,禁止任何非法用途,创作者不承担任何责任🙄
> 声明:该项目有且仅有一个目的:留痕——我的数据我做主,前提是“我的数据”其次才是“我做主”,禁止任何人以任何形式将其用于任何非法用途,对于使用该程序所造成的任何后果,创作者不承担任何责任🙄
[![Star History Chart](https://api.star-history.com/svg?repos=LC044/WeChatMsg&type=Date)](https://star-history.com/?utm_source=bestxtools.com#LC044/WeChatMsg&Date)

Expand Down

0 comments on commit 8abd38d

Please sign in to comment.