diff --git a/app/DataBase/__init__.py b/app/DataBase/__init__.py index ec2c05a3..d5c33c0d 100644 --- a/app/DataBase/__init__.py +++ b/app/DataBase/__init__.py @@ -13,9 +13,9 @@ # from . import output from .misc import Misc from .msg import Msg - +from .msg import MsgType misc_db = Misc() msg_db = Msg() micro_msg_db = MicroMsg() hard_link_db = HardLink() -__all__ = ["data", 'output', 'misc_db', 'micro_msg_db', 'msg_db', 'hard_link_db'] +__all__ = ["data", 'output', 'misc_db', 'micro_msg_db', 'msg_db', 'hard_link_db','MsgType'] diff --git a/app/DataBase/msg.py b/app/DataBase/msg.py index f74ef981..091aafca 100644 --- a/app/DataBase/msg.py +++ b/app/DataBase/msg.py @@ -1,4 +1,5 @@ import os.path +import random import sqlite3 import threading import traceback @@ -27,7 +28,12 @@ def inner(): return inner -@singleton +class MsgType: + TEXT = 1 + IMAGE = 3 + EMOJI = 47 + + class Msg: def __init__(self): self.DB = None @@ -35,8 +41,11 @@ def __init__(self): self.open_flag = False self.init_database() - def init_database(self): + def init_database(self, path=None): + global db_path if not self.open_flag: + if path: + db_path = path if os.path.exists(db_path): self.DB = sqlite3.connect(db_path, check_same_thread=False) # '''创建游标''' @@ -102,6 +111,67 @@ def get_message_by_num(self, username_, local_id): # result.sort(key=lambda x: x[5]) return result + def get_messages_by_type(self, username_, type_): + if not self.open_flag: + return None + sql = ''' + select localId,TalkerId,Type,SubType,IsSender,CreateTime,Status,StrContent,strftime('%Y-%m-%d %H:%M:%S',CreateTime,'unixepoch','localtime') as StrTime,MsgSvrID + from MSG + where StrTalker=? and Type=? + order by CreateTime + ''' + try: + lock.acquire(True) + self.cursor.execute(sql, [username_, type_]) + result = self.cursor.fetchall() + finally: + lock.release() + return result + + def get_messages_by_keyword(self, username_, keyword, num=5): + if not self.open_flag: + return None + sql = ''' + select localId,TalkerId,Type,SubType,IsSender,CreateTime,Status,StrContent,strftime('%Y-%m-%d %H:%M:%S',CreateTime,'unixepoch','localtime') as StrTime,MsgSvrID + from MSG + where StrTalker=? and Type=1 and StrContent like ? + order by CreateTime desc + ''' + temp = [] + try: + lock.acquire(True) + self.cursor.execute(sql, [username_, f'%{keyword}%']) + messages = self.cursor.fetchall() + finally: + lock.release() + if len(messages) > 5: + messages = random.sample(messages, num) + try: + lock.acquire(True) + for msg in messages: + local_id = msg[0] + is_send = msg[4] + sql = ''' + select localId,TalkerId,Type,SubType,IsSender,CreateTime,Status,StrContent,strftime('%Y-%m-%d %H:%M:%S',CreateTime,'unixepoch','localtime') as StrTime,MsgSvrID + from MSG + where localId > ? and StrTalker=? and Type=1 and IsSender=? + limit 1 + ''' + self.cursor.execute(sql, [local_id, username_, 1 - is_send]) + temp.append((msg, self.cursor.fetchone())) + finally: + lock.release() + res = [] + for dialog in temp: + msg1 = dialog[0] + msg2 = dialog[1] + res.append(( + (msg1[4], msg1[5], msg1[7].split(keyword), msg1[8]), + (msg2[4], msg2[5], msg2[7], msg2[8]) + )) + + return res + def close(self): if self.open_flag: try: @@ -123,4 +193,7 @@ def __del__(self): print(result) print(result[-1][0]) local_id = result[-1][0] + wxid = 'wxid_0o18ef858vnu22' pprint(msg.get_message_by_num('wxid_0o18ef858vnu22', local_id)) + print(msg.get_messages_by_keyword(wxid, '干嘛')) + pprint(msg.get_messages_by_keyword(wxid, '干嘛')[0]) diff --git a/app/analysis/__init__.py b/app/analysis/__init__.py new file mode 100644 index 00000000..864dad84 --- /dev/null +++ b/app/analysis/__init__.py @@ -0,0 +1,4 @@ + +from .analysis import Analysis + +__all__=['Analysis'] \ No newline at end of file diff --git a/app/analysis/analysis.py b/app/analysis/analysis.py new file mode 100644 index 00000000..5ec1b026 --- /dev/null +++ b/app/analysis/analysis.py @@ -0,0 +1,66 @@ +from collections import Counter + +from app.DataBase import msg_db, MsgType +from app.person_pc import ContactPC +import jieba +from pyecharts import options as opts +from pyecharts.charts import Pie, WordCloud, Calendar, Bar, Line, Timeline, Grid + +charts_width = 800 +charts_height = 450 +wordcloud_width = 780 +wordcloud_height = 720 + + +def wordcloud(wxid): + import jieba + txt_messages = msg_db.get_messages_by_type(wxid, MsgType.TEXT) + text = ''.join(map(lambda x: x[7], txt_messages)) + total_msg_len = len(text) + # 使用jieba进行分词,并加入停用词 + words = jieba.cut(text) + # 统计词频 + word_count = Counter(words) + # 过滤停用词 + stopwords_file = '../data/stopwords.txt' + with open(stopwords_file, "r", encoding="utf-8") as stopword_file: + stopwords = set(stopword_file.read().splitlines()) + filtered_word_count = {word: count for word, count in word_count.items() if len(word) > 1 and word not in stopwords} + + # 转换为词云数据格式 + data = [(word, count) for word, count in filtered_word_count.items()] + # text_data = data + data.sort(key=lambda x: x[1], reverse=True) + + text_data = data[:100] if len(data) > 100 else data + # 创建词云图 + keyword, max_num = text_data[0] + w = ( + WordCloud(init_opts=opts.InitOpts(width=f"{wordcloud_width}px", height=f"{wordcloud_height}px")) + .add(series_name="聊天文字", data_pair=text_data, word_size_range=[20, 100]) + .set_global_opts( + title_opts=opts.TitleOpts( + title=f"词云图", subtitle=f"总计{total_msg_len}字", + title_textstyle_opts=opts.TextStyleOpts(font_size=23) + ), + tooltip_opts=opts.TooltipOpts(is_show=True), + legend_opts=opts.LegendOpts(is_show=False) + ) + ) + # return w.render_embed() + return { + 'chart_data': w.dump_options_with_quotes(), + 'keyword': keyword, + 'max_num': str(max_num), + 'dialogs': msg_db.get_messages_by_keyword(wxid, keyword, num=5) + } + + +class Analysis: + pass + + +if __name__ == '__main__': + msg_db.init_database(path='../DataBase/Msg/MSG.db') + w = wordcloud('wxid_0o18ef858vnu22') + print(w) diff --git a/app/data/stopwords.txt b/app/data/stopwords.txt index 89620bc4..b6ae9eab 100644 --- a/app/data/stopwords.txt +++ b/app/data/stopwords.txt @@ -1,4 +1,17 @@ wxid +就 +说 +啥 +好 +干 +哦 +好 +嗯 +恩 +噢 +喔 +行 +拿 乡村 炸弹 腹肌 @@ -2518,3 +2531,11 @@ sup 他 她 它 +听 +哪 +想 +打 +🙄 +奥 +真 +旺柴 \ No newline at end of file diff --git a/app/util/dat2pic.py b/app/util/dat2pic.py index 4710e7d3..9c7df78d 100644 --- a/app/util/dat2pic.py +++ b/app/util/dat2pic.py @@ -29,9 +29,9 @@ def get_code(file_path): code = dat_read[0] ^ pic_head[head_index] idf_code = dat_read[1] ^ code head_index = head_index + 1 - # if idf_code == pic_head[head_index]: - # dat_file.close() - return head_index, code + if idf_code == pic_head[head_index]: + dat_file.close() + return head_index, code head_index = head_index + 1 dat_file.close() print("not jpg, png, gif") @@ -64,9 +64,8 @@ def decode_dat(file_path, out_path): with open(file_path, 'rb') as file_in: data = file_in.read() # 对数据进行异或加密/解密 - encrypted_data = bytes([byte ^ decode_code for byte in data]) with open(file_outpath, 'wb') as file_out: - file_out.write(encrypted_data) + file_out.write(bytes([byte ^ decode_code for byte in data])) print(file_path, '->', file_outpath) return file_outpath diff --git a/app/util/emoji.py b/app/util/emoji.py index 092e2dc3..d1a2ee91 100644 --- a/app/util/emoji.py +++ b/app/util/emoji.py @@ -1,3 +1,13 @@ +# -*- coding: utf-8 -*- +""" +emoji.py + +!!!声明: +由于表情包并不属于个人,并且其可能具有版权风险,你只有浏览权没有拥有权 +另外访问腾讯API可能会给腾讯服务器造成压力 +所以禁止任何人以任何方式修改或间接修改该文件,违者后果自负 +""" + import os import xml.etree.ElementTree as ET diff --git a/app/web_ui/web.py b/app/web_ui/web.py index 5edb71d8..23adec6e 100644 --- a/app/web_ui/web.py +++ b/app/web_ui/web.py @@ -1,8 +1,13 @@ +import json + from flask import Flask, render_template from pyecharts import options as opts from pyecharts.charts import Bar from pyecharts.globals import ThemeType +from app.DataBase import msg_db +from app.analysis import analysis + app = Flask(__name__) @@ -25,7 +30,7 @@ def index(): @app.route("/index") def index0(): - return render_template("index.html") + return render_template("index1.html") @app.route('/home') @@ -41,7 +46,26 @@ def home(): @app.route('/message_num') def one(): - return "1hello world" + msg_db.init_database(path='../DataBase/Msg/MSG.db') + wxid = 'wxid_0o18ef858vnu22' + # wxid = 'wxid_8piw6sb4hvfm22' + wxid = 'wxid_lltzaezg38so22' + world_cloud_data = analysis.wordcloud(wxid) + # 创建一个简单的柱状图 + with open('message_num_test.html','w',encoding='utf-8') as f: + f.write(render_template('message_num.html', **world_cloud_data)) + return render_template('message_num.html', **world_cloud_data) + + +@app.route('/test') +def test(): + bar = ( + Bar(init_opts=opts.InitOpts(theme=ThemeType.LIGHT)) + .add_xaxis(["A", "B", "C", "D", "E"]) + .add_yaxis("Series", [5, 20, 36, 10, 75]) + .set_global_opts(title_opts=opts.TitleOpts(title="Flask and Pyecharts Interaction")) + ) + return bar.dump_options_with_quotes() if __name__ == "__main__": diff --git a/readme.md b/readme.md index 22ce7fd3..ddaeeac2 100644 --- a/readme.md +++ b/readme.md @@ -288,10 +288,11 @@ python main.py # 🏆致谢 * PC微信解密工具:[https://github.com/xaoyaoo/PyWxDump](https://github.com/xaoyaoo/PyWxDump) +* 我的得力助手:[ChatGPT](https://chat.openai.com/) --- -> 说明:该项目仅可用于交流学习,禁止任何非法用途,创作者不承担任何责任🙄 +> 声明:该项目有且仅有一个目的:留痕——我的数据我做主,前提是“我的数据”其次才是“我做主”,禁止任何人以任何形式将其用于任何非法用途,对于使用该程序所造成的任何后果,创作者不承担任何责任🙄 [![Star History Chart](https://api.star-history.com/svg?repos=LC044/WeChatMsg&type=Date)](https://star-history.com/?utm_source=bestxtools.com#LC044/WeChatMsg&Date)