Repository: QQBackup/QQ-History-Backup Branch: master Commit: edc6653a7043 Files: 14 Total size: 133.5 KB Directory structure: gitextract_56qtarj0/ ├── .gitignore ├── GUI.py ├── GUI.spec ├── LICENSE ├── QQ_History.py ├── README.md ├── emoticon/ │ └── face_config.json ├── icon.py ├── install.ps1 ├── proto/ │ ├── RichMsg.proto │ ├── RichMsg_pb2.py │ ├── __init__.py │ └── compile └── requirements.txt ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ *.txt !requirements.txt *.db *.html *.db-shm *.db-wal *.zip *.exe *.spec !GUI.spec build/ dist/ __pycache__/ qq/ QQ*/ com.tencent.mobileqq/ chatimg/ output_* test_qq_files/ ================================================ FILE: GUI.py ================================================ import tkinter as tk from tkinter import filedialog import tkinter.ttk as ttk from icon import qq_icon_png, github_mark # 防止加载不出图标 import base64 import QQ_History import os import webbrowser from time import sleep class GUI_CONST: TITLE = "QQ聊天记录导出" URL = "https://github.com/Young-Lord/QQ_History_Backup" def Enter(): base_path, qq_self, qq = e1.get(), e2.get(), e3.get() for i in (e4, e5, e6, e7, e8): if i.get() not in i['values']: info.set(f"某个输入框的值{i.get()}不在允许的取值{i['values']}内!") return () group = 1 if e4.get() == '私聊' else 2 emoji = 1 if e5.get() == '新' else 2 dump_all = True if e8.get() == '是' else False with_img = True if e6.get() == '是' else False combine_img = True if e7.get() == '是' else False if (base_path == "" or qq_self == "") or (qq == "" and (not dump_all)): info.set("信息不完整!") return () # info.set("开始导出……") # if dump_all: # info.set("批量导出较慢,请耐心等待……") # 只要界面未更新 用户就看不到 try: config = (base_path, qq_self, qq, group, emoji, with_img, combine_img, dump_all) QQ_History.main(base_path, qq_self, qq, group, emoji, with_img, combine_img, dump_all=dump_all) info.set("导出完成。") except Exception as e: info.set(str(config)+"\r\n"+str(repr(e))) return () def SelectDBPath(): dir = filedialog.askdirectory() base_path_get.set(dir) def SelectImgPath(): dir = filedialog.askdirectory() img_path_get.set(dir) def url(): webbrowser.open_new(GUI_CONST.URL) root = tk.Tk() base_path_get, img_path_get, key_get, info = tk.StringVar( ), tk.StringVar(), tk.StringVar(), tk.StringVar() tmp = open("tmp.png", "wb+") tmp.write(base64.b64decode(qq_icon_png)) tmp.close() root.call('wm', 'iconphoto', root._w,tk.PhotoImage(file='tmp.png')) os.remove("tmp.png") root.title(GUI_CONST.TITLE) ttk.Label(root, text="*com.tencent.mobileqq:").grid(row=0, column=0, sticky="e") e1 = ttk.Entry(root, textvariable=base_path_get) e1.grid(row=0, column=1, columnspan=2, sticky="ew", pady=3) ttk.Button(root, text="选择", command=SelectDBPath, width=5).grid(row=0, column=3) ttk.Label(root, text="*自己QQ号:").grid(row=1, column=0, sticky="e") e2 = ttk.Entry(root) e2.grid(row=1, column=1, columnspan=3, sticky="ew", pady=3) ttk.Label(root, text="导出所有记录:").grid( row=2, column=0, sticky="e") # 每个row属性都得更改,什么离谱布局 e8 = ttk.Combobox(root) e8['values'] = ('是', '否') e8.current(1) e8.grid(row=2, column=1, columnspan=3, sticky="ew", pady=3) ttk.Label(root, text="QQ号/群号:").grid(row=3, column=0, sticky="e") e3 = ttk.Entry(root) e3.grid(row=3, column=1, columnspan=3, sticky="ew", pady=3) ttk.Label(root, text="私聊/群聊:").grid(row=4, column=0, sticky="e") e4 = ttk.Combobox(root) e4['values'] = ('私聊', '群聊') e4.current(0) e4.grid(row=4, column=1, columnspan=3, sticky="ew", pady=3) ttk.Label(root, text="表情版本:").grid(row=5, column=0, sticky="e") e5 = ttk.Combobox(root) e5['values'] = ('新', '旧') e5.current(0) e5.grid(row=5, column=1, columnspan=3, sticky="ew", pady=3) ttk.Label(root, text="导出图片:").grid(row=6, column=0, sticky="e") e6 = ttk.Combobox(root) e6['values'] = ('是', '否') e6.current(0) e6.grid(row=6, column=1, columnspan=3, sticky="ew", pady=3) ttk.Label(root, text="合并图片:").grid(row=7, column=0, sticky="e") e7 = ttk.Combobox(root) e7['values'] = ('是', '否') e7.current(1) e7.grid(row=7, column=1, columnspan=3, sticky="ew", pady=3) root.grid_columnconfigure(2, weight=1) info.set("开始") ttk.Button(root, textvariable=info, command=Enter).grid(row=8, column=1) tmp = open("tmp.png", "wb+") tmp.write(base64.b64decode(github_mark)) tmp.close() github = tk.PhotoImage(file='tmp.png') os.remove("tmp.png") button_img = tk.Button(root, image=github, text='b', command=url, bd=0) button_img.grid(row=9, rowspan=7, column=0, sticky="ws") root.mainloop() ================================================ FILE: GUI.spec ================================================ # -*- mode: python ; coding: utf-8 -*- block_cipher = None a = Analysis( ['GUI.py'], pathex=[], binaries=[], datas=[], hiddenimports=[], hookspath=[], hooksconfig={}, runtime_hooks=[], excludes=[], win_no_prefer_redirects=False, win_private_assemblies=False, cipher=block_cipher, noarchive=False, ) pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher) exe = EXE( pyz, a.scripts, a.binaries, a.zipfiles, a.datas, [], name='GUI', debug=False, bootloader_ignore_signals=False, strip=False, upx=True, upx_exclude=[], runtime_tmpdir=None, console=False, disable_windowed_traceback=False, argv_emulation=False, target_arch=None, codesign_identity=None, entitlements_file=None, icon=['icon.ico'], ) ================================================ FILE: LICENSE ================================================ MIT License Copyright (c) 2023 Young-Lord Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: QQ_History.py ================================================ import hashlib import sqlite3 import time import os import traceback import json import base64 from proto.RichMsg_pb2 import PicRec from proto.RichMsg_pb2 import Elem from proto.RichMsg_pb2 import Msg from proto.RichMsg_pb2 import PttRec from html import escape from tempfile import NamedTemporaryFile _crc64_init = False _crc64_table = [0] * 256 def crc64(s): global _crc64_init if not _crc64_init: for i in range(256): bf = i for j in range(8): if bf & 1 != 0: bf = bf >> 1 ^ -7661587058870466123 else: bf >>= 1 _crc64_table[i] = bf _crc64_init = True v = -1 for i in range(len(s)): v = _crc64_table[(ord(s[i]) ^ v) & 255] ^ v >> 8 return v def tempFilename() -> str: f = NamedTemporaryFile(delete=False) f.close() return f.name def isEmpty(s): if s is None: return True if type(s) == int and s == 0: return True if type(s) == str and s == '': return True return False class QQoutput(): def __init__(self, base_path: str, qq_self: str, emoji: int = 1, with_img: bool = True, combine_img: bool = False): # 真正用到的文件只有[f"{QQ}.db", f"slowtable_{QQ}.db", "kc"],这里我直接合并到一个层级下了 self.IS_TIM = False # TIM会缺少一些字段 self.base_path = base_path if type(qq_self) == int: qq_self = str(qq_self) assert(type(qq_self) == str) self.qq_self: str = qq_self # 自己的QQ号 self.uin_to_username = {} self.troopuin_to_troopname = {} self.troopuin_to_troopmembers = {} self.init_paths() self.init_key() # 解密用的密钥 self.c1 = sqlite3.connect(self.db_main_path).cursor() try: self.c2 = sqlite3.connect(self.db_slow_path).cursor() except: pass self.detect_TIM() self.init_friend_list() self.init_troop_list() # self.qq: str = qq # 导出对象的QQ号 # self.mode = mode # 1为私聊,2为群聊 assert(emoji in (1, 2)) self.emoji = emoji # 1为新表情,2为旧表情 assert(type(with_img) == bool) self.with_img = with_img # True为生成图片,False为不生成图片 assert(type(combine_img) == bool) self.combine_img = combine_img # True为将图片嵌入HTML文件中,False为在HTML中存储图片的相对路径 # self.num_to_name = {} # 双重映射,即self.troop_members_name[群号][发言人QQ号] self.troop_members_name = {} self.emoji_map = self.map_new_emoji() @staticmethod def getDisplayName(friend: list) -> str: if isEmpty(friend[1]): ans = friend[2] else: ans = friend[1] return ans @staticmethod def getSafePath(ans: str) -> str: ban_words = "\\ / : * ? \" ' < > | $ \r \n".replace( ' ', '') ban_strips = "#/~" while True: ans_bak = ans for i in ban_words: ans = ans.replace(i, "") for i in ban_strips: ans = ans.strip(i) if ans == ans_bak: # 多次匹配 break return ans def detect_TIM(self): try: self.fill_cursor("select troopRemark from TroopInfoV2") except sqlite3.OperationalError: self.IS_TIM = True print("检测到 TIM,部分功能可能缺失!") else: self.IS_TIM = False return self.IS_TIM def mydecrypt(self, data): # 综合一下 s = self.fix(data, 1) if s != "": return s return self.decrypt(data) def fix(self, data, mode): # msgdata mode=0 # other mode=1 # https://github.com/roadwide/qqmessageoutput/blob/master/q.py # decrypt处理Emoji时会出问题,而这个不会 if (mode == 0): rowbyte = [] # 这么做是为了解决汉字的utf-8是三字节 for i in range(0, len(data)): rowbyte.append(data[i] ^ ord(self.key[i % len(self.key)])) rowbyte = bytes(rowbyte) try: msg = rowbyte.decode(encoding='utf-8') except: msg = "" return msg elif (mode == 1): str = '' try: j = 0 for i in range(0, len(data)): # 获取unicode码 unicode = ord(data[i]) # 如果大于ffff 处理emoji if (unicode > 0xffff): # 分为2个10位二进制与两个密码进行异或 code = unicode ^ ( (ord(self.key[i+j % len(self.key)]) << 10) + ord(self.key[i+j+1 % len(self.key)])) str += chr(code) j = j + 1 else: str += chr(ord(data[i]) ^ ord(self.key[i+j % len(self.key)])) except: str = "" return str def decrypt(self, data, msg_type=-1000): # fix处理**一些东西**会出问题,这个不会 try: msg = b'' if type(data) == bytes: msg = b'' for i in range(0, len(data)): msg += bytes([data[i] ^ ord(self.key[i % len(self.key)])]) elif type(data) == str: msg = '' for i in range(0, len(data)): msg += chr(ord(data[i]) ^ ord(self.key[i % len(self.key)])) return msg if msg_type == -1000 or msg_type == -1049 or msg_type == -1051: try: return escape(msg.decode('utf-8')) except: # print(msg) pass return '[decode error]' if not self.with_img: return None elif msg_type == -2000: return self.decode_pic(msg) elif msg_type == -1035: return self.decode_mix_msg(msg) elif msg_type == -5008: return self.decode_share_url(msg) elif msg_type == -5012 or msg_type == -5018: return '[戳一戳]' elif msg_type == -2002: # 语音消息 return self.decode_silk(msg) except: return f'[解码失败({msg_type})]' # for debug return '[unknown msg_type {}]'.format(msg_type) # return '' def add_emoji(self, msg): pos = msg.find('\x14') while pos != -1: lastpos = pos num = ord(msg[pos + 1]) if str(num) in self.emoji_map: index = self.emoji_map[str(num)] if self.emoji == 1: filename = "new/s" + index + ".png" else: filename = "old/" + index + ".gif" emoticon_path = os.path.join('emoticon', filename) if self.combine_img: try: emoticon_path = self.get_base64_from_pic(emoticon_path) except: pass msg = msg.replace( msg[pos:pos + 2], '{}'.format(emoticon_path, index)) else: msg = msg.replace(msg[pos:pos + 2], '[emoji:{}]'.format(str(num))) pos = msg.find('\x14') if pos == lastpos: break return msg def message(self, qq: str, mode: int): # mode=1 friend # mode=2 troop num = qq.encode("utf-8") md5num = hashlib.md5(num).hexdigest().upper() if mode == 1: cmd = "select msgData,senderuin,time,msgtype from mr_friend_{}_New order by time".format( md5num) # self.get_friends() else: cmd = "select msgData,senderuin,time,msgtype from mr_troop_{}_New order by time".format( md5num) # print('Groups {} -> {}'.format(num, md5num)) self.get_troop_members(qq) cursor = self.fill_cursor(cmd) allmsg = [] for row in cursor: msgdata: bytes = row[0] if not msgdata: continue uin = row[1] ltime = time.localtime(row[2]) sendtime = time.strftime("%Y-%m-%d %H:%M:%S", ltime) msg_type = row[3] msg_final = self.decrypt(msgdata, msg_type) if msg_final is None: continue allmsg.append( [sendtime, msg_type, self.decrypt(uin), msg_final]) return allmsg def get_friends(self): raise NotImplementedError def get_troop_members(self, qq: str): self.troopuin_to_troopmembers[qq] = {} cmd = "SELECT troopuin, memberuin, autoremark, troopnick, friendnick, recommendRemark, mUniqueTitle FROM TroopMemberInfo" cursor = self.fill_cursor(cmd) for row in cursor: if self.fix(row[0], 1) != qq: continue num = self.fix(row[1], 1) names = [self.fix(i, 1) for i in row[2:6]] # 2是你给好友的备注,3是好友的群昵称,4是好友名字,5是好友的群昵称,mUniqueTitle是群头衔 # xxx 我不知道这个顺序怎么搞的 一部分是猜 try: final_name = [i for i in names[1:] if not isEmpty(i)][0] except IndexError: try: final_name = names[0] except IndexError: print(f"{qq}群中{num}好友无法匹配名字。names={names}") print("开Issue!") if num in self.uin_to_username: # 是你对话过的人 if not isEmpty(names[0]): final_name = names[0] else: # print(names) # print("↑你这个好友怎么没有备注的?开Issue!") pass self.troopuin_to_troopmembers[qq][num] = final_name # print([self.fix(i, 1) for i in row[2:6]]) if not isEmpty(row[6]): # 添加头衔 self.troopuin_to_troopmembers[qq][num] = f"【{row[6]}】" + \ self.troopuin_to_troopmembers[qq][num] def _fill_cursors(self, cmd): cursors = [] # slowtable might not contain related message, so just skip it try: cursors.append(self.c2.execute(cmd)) except: pass try: cursors.append(self.c1.execute(cmd)) except sqlite3.OperationalError: pass return cursors def fill_cursor(self, cmd): cursors = self._fill_cursors(cmd) ans = [] for cs in cursors: for row in cs: ans.append(row) return ans def output(self, qq: str, mode: int, output_path: str = "."): self.outut_path = output_path if type(qq) == int: qq = str(qq) assert(type(qq) == str) assert(mode in (1, 2)) name1 = "我" fileprefix = "" if mode == 1: fileprefix = "私聊" filebasename = self.getSafePath(self.uin_to_username.get(qq, str(qq))) if qq not in self.uin_to_username: print(f"警告:{qq}无法在好友列表内找到,请检查聊天类型是否填写正确") else: fileprefix = "群聊" filebasename = self.getSafePath(self.troopuin_to_troopname.get(qq, str(qq))) if qq not in self.troopuin_to_troopname: print(f"警告:{qq}无法在群聊列表内找到,请检查聊天类型是否填写正确") file = f"{fileprefix}-{filebasename}-{qq}.html" file = os.path.join(output_path, file) allmsg = self.message(qq, mode) if len(allmsg) == 0: print(f"{qq}_{mode}没有聊天记录,跳过。") return f2 = open(file, "w", encoding="utf-8") f2.write( "" ) f2.write("
") if mode == 1: table = self.uin_to_username else: table = self.troopuin_to_troopmembers.get(qq, {}) for ts, _, uid, msg in allmsg: if not msg: continue if uid == str(self.qq_self): f2.write("

") f2.write("") f2.write(ts) f2.write("-----") f2.write(name1) f2.write("
") else: f2.write("

") f2.write("") f2.write(escape("{}({})".format( table.get(uid, "???未知???"), uid))) f2.write("-----") f2.write(ts) f2.write("
") f2.write(self.add_emoji(msg)) f2.write("

") f2.write("

") f2.write("
") f2.close() print("导出已完成。文件目录:" + file) def init_key(self): kc_file = open(self.kc_path, "r") self.key = kc_file.read().strip('\r \n') kc_file.close() def init_paths(self): join = os.path.join mainb = self.qq_self + ".db" slowb = "slowtable_" + self.qq_self + ".db" db_main_paths = [mainb, join("databases", mainb), join("db", mainb)] db_slow_paths = [slowb, join("databases", slowb), join("db", slowb)] kc_paths = ["kc", join("files", "kc"), join("f", "kc")] self.kc_path = self.db_main_path = self.db_slow_path = None for i in db_main_paths: current_file = join(self.base_path, i) if os.path.isfile(current_file): self.db_main_path = current_file for i in db_slow_paths: current_file = join(self.base_path, i) if os.path.isfile(current_file): self.db_slow_path = current_file for i in kc_paths: current_file = join(self.base_path, i) if os.path.isfile(current_file): self.kc_path = current_file if self.kc_path is None or self.db_main_path is None: # 很少记录的号没有slowtable,故不判断 raise FileNotFoundError( f"无法找到目标文件!\n路径:{self.base_path}\n当前匹配列表:{[self.kc_path, self.db_main_path, self.db_slow_path]}") def init_friend_list(self): self.FriendsData = [] # uin-QQ号,remark-备注,name-昵称 execute = "select uin,remark,name from Friends" cursor = self.fill_cursor(execute) for i in cursor: uin, remark, name = i[0], i[1], i[2] decode_uin = self.mydecrypt(uin) decode_remark = self.mydecrypt(remark) decode_name = self.mydecrypt(name) friend = [decode_uin, decode_remark, decode_name] self.FriendsData.append(friend) self.uin_to_username[decode_uin] = self.getDisplayName(friend) def init_troop_list(self): self.TroopsData = [] # troopuin-群号,troopRemark-群备注,troopname-群名 execute = "select troopuin,troopRemark,troopname from TroopInfoV2" if self.IS_TIM: execute = execute.replace( "troopRemark", "troopname") # TIM无法给群聊设备注 cursor = self.fill_cursor(execute) for i in cursor: uin, remark, name = i[0], i[1], i[2] # print([self.fix(ii,1) for ii in i]) decode_uin = self.mydecrypt(uin) decode_remark = self.mydecrypt(remark) decode_name = self.mydecrypt(name) troop = [decode_uin, decode_remark, decode_name] self.TroopsData.append(troop) # print(troop) self.troopuin_to_troopname[decode_uin] = self.getDisplayName(troop) def map_new_emoji(self): with open(os.path.join(os.path.dirname(os.path.abspath(__file__)), './emoticon/face_config.json'), encoding='utf-8') as f: # 这个地方可能会在打包的时候出问题 emojis = json.load(f) new_emoji_map = {} for e in emojis['sysface']: if self.emoji == 1: new_emoji_map[e["AQLid"]] = e["QSid"] else: if len(e["EMCode"]) == 3: new_emoji_map[e["AQLid"]] = str(int(e["EMCode"]) - 100) return new_emoji_map def get_base64_from_pic(self, path): with open(path, "rb") as image_file: return (b'data:image/png;base64,' + base64.b64encode(image_file.read())).decode("utf-8") def decode_pic(self, data): try: doc = PicRec() doc.ParseFromString(data) url = 'chatimg:' + doc.md5 filename = hex(crc64(url)) filename = 'Cache_' + filename.replace('0x', '') chatimg_basepath = os.path.join(self.base_path, "chatimg") if not os.path.isdir(chatimg_basepath): chatimg_basepath = "chatimg" rel_path = os.path.join(chatimg_basepath, filename[-3:], filename) if os.path.exists(rel_path): print(rel_path) w = 'auto' if doc.uint32_thumb_width == 0 else str( doc.uint32_thumb_width) h = 'auto' if doc.uint32_thumb_height == 0 else str( doc.uint32_thumb_height) if self.combine_img: rel_path = self.get_base64_from_pic(rel_path) return ''.format(rel_path, w, h) # 最后这里必须用相对路径 except Exception as e: pass return '[图片]' def decode_mix_msg(self, data): try: doc = Msg() doc.ParseFromString(data) message = '' for elem in doc.elems: if elem.picMsg: message += self.decode_pic(elem.picMsg) else: message += escape(elem.textMsg.decode('utf-8')) return message except: pass return '[混合消息]' def decode_silk(self, data): # TODO try: import pilk import av av.logging.set_level(av.logging.ERROR) doc = PttRec() doc.ParseFromString(data) print(doc.sttText) voiceLength = doc.voiceLength # 以秒为单位 filename = doc.localPath[doc.localPath.find("/ptt/")+5:] ptt_basepath = os.path.join(self.base_path, "ptt") if not os.path.isdir(ptt_basepath): ptt_basepath = "ptt" if not os.path.isdir(ptt_basepath): return '[语音消息](目录不存在)' rel_path = os.path.join(ptt_basepath, filename) if not os.path.exists(rel_path): p = [".amr", ".slk"] if rel_path.endswith(p[0]) and os.path.exists(rel_path[:-4]+p[1]): # 试着更改后缀匹配 filename = filename[:-4]+p[1] rel_path = rel_path[:-4]+p[1] elif rel_path.endswith(p[1]) and os.path.exists(rel_path[:-4]+p[0]): filename = filename[:-4]+p[0] rel_path = rel_path[:-4]+p[0] else: # 摆了! return f"[语音消息](文件{rel_path}不存在)" voice_path = os.path.join(self.outut_path, "voice") if not os.path.exists(voice_path): os.makedirs(voice_path) pcm = tempFilename() pilk.decode(rel_path, pcm) absolute_output = os.path.join(voice_path, filename[:-4]+".mp3") relative_output = os.path.join("voice", filename[:-4]+".mp3") rate=24000# pilk源码写的,不管了 with av.open(pcm,format='s16le',options={'ar':str(rate),'ac':'1'}) as in_container: in_stream = in_container.streams.audio[0] with av.open(absolute_output, 'w') as out_container: out_stream = out_container.add_stream( 'mp3', rate=rate, layout='mono' ) try: for frame in in_container.decode(in_stream): frame.pts = None for packet in out_stream.encode(frame): out_container.mux(packet) except Exception as ee: raise ee pass os.remove(pcm) return '