From a1822d7d89f7133a6c5b99386242c2d82d951a6f Mon Sep 17 00:00:00 2001 From: ddcw Date: Tue, 24 Sep 2024 15:02:54 +0800 Subject: [PATCH] add lz4 --- ibd2sql/ibd2sql.py | 8 ++++- ibd2sql/lz4.py | 76 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 83 insertions(+), 1 deletion(-) create mode 100644 ibd2sql/lz4.py diff --git a/ibd2sql/ibd2sql.py b/ibd2sql/ibd2sql.py index 4ff3a39..1a09e1a 100644 --- a/ibd2sql/ibd2sql.py +++ b/ibd2sql/ibd2sql.py @@ -5,6 +5,7 @@ from ibd2sql.innodb_page_spaceORxdes import * from ibd2sql.innodb_page_inode import * from ibd2sql.innodb_page_index import * +from ibd2sql import lz4 import sys @@ -69,7 +70,12 @@ def read(self): data = self.f.read(self.PAGESIZE) if data[24:26] == b'\x00\x0e': # 14: 压缩页, 先解压 FIL_PAGE_VERSION,FIL_PAGE_ALGORITHM_V1,FIL_PAGE_ORIGINAL_TYPE_V1,FIL_PAGE_ORIGINAL_SIZE_V1,FIL_PAGE_COMPRESS_SIZE_V1 = struct.unpack('>BBHHH',data[26:34]) - data = data[:24] + struct.pack('>H',FIL_PAGE_ORIGINAL_TYPE_V1) + b'\x00'*8 + data[34:38] + zlib.decompress(data[38:38+FIL_PAGE_COMPRESS_SIZE_V1]) + if FIL_PAGE_ALGORITHM_V1 == 1: + data = data[:24] + struct.pack('>H',FIL_PAGE_ORIGINAL_TYPE_V1) + b'\x00'*8 + data[34:38] + zlib.decompress(data[38:38+FIL_PAGE_COMPRESS_SIZE_V1]) + elif FIL_PAGE_ALGORITHM_V1 == 2: + data = data[:24] + struct.pack('>H',FIL_PAGE_ORIGINAL_TYPE_V1) + b'\x00'*8 + data[34:38] + lz4.decompress(data[38:38+FIL_PAGE_COMPRESS_SIZE_V1],FIL_PAGE_ORIGINAL_SIZE_V1) + else: + pass return data def _init_sql_prefix(self): diff --git a/ibd2sql/lz4.py b/ibd2sql/lz4.py new file mode 100644 index 0000000..32fa40a --- /dev/null +++ b/ibd2sql/lz4.py @@ -0,0 +1,76 @@ +# write by ddcw @https://github.com/ddcw +# lz4解压(fast) +# 参考: https://github.com/lz4/lz4/blob/dev/doc/lz4_Block_format.md + +""" +LZ4 compressed block is composed of sequences. +sequence = token + [length literals] + literals + offset + [length match] + match +token: 1bytes, first 4-bits length of literals + last 4-bits length of match + each field ranges from 0 to 15, when 15, read more 1 bytes for length to add +literals: not-compressed bytes +offset : 从解压后的数据的某个位置开始复制 match长度的数据 +match : 要复制的数据的长度 +""" + +# lz4压缩(TODO) +def compress(bdata): + """ + input: bdata: 要压缩的数据 + return: data: 压缩之后的数据 + """ + return bdata + + +# lz4解压 +def decompress(bdata,decompress_size): + """ + input: + bdata: 压缩数据 + decompress_size : 解压之后的大小 + return: data 解压之后的数据 + 不考虑dict和prefix_size了 + """ + def read_to_less255(tdata,ip): + length = 0 + while True: + #t = struct.unpack('> 4 # literals length + if ll == 15: + tll,ip = read_to_less255(bdata,ip) + ll += tll + data[op:op+ll] = bdata[ip:ip+ll] # literals 不可压缩的部分 + op += ll + ip += ll + if decompress_size-op < 12: + if op == decompress_size: # 解压完了, 因为可能没得后面的match部分 + break + else: + raise ValueError('Invalid lz4 compress data.') + #offset = struct.unpack('