forked from micropython/micropython
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlz77.c
91 lines (81 loc) · 3.55 KB
/
lz77.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
/*
* Simple LZ77 streaming compressor.
*
* The scheme implemented here doesn't use a hash table and instead does a brute
* force search in the history for a previous string. It is relatively slow
* (but still O(N)) but gives good compression and minimal memory usage. For a
* small history window (eg 256 bytes) it's not too slow and compresses well.
*
* MIT license; Copyright (c) 2021 Damien P. George
*/
#include "uzlib.h"
#include "defl_static.c"
#define MATCH_LEN_MIN (3)
#define MATCH_LEN_MAX (258)
// hist should be a preallocated buffer of hist_max size bytes.
// hist_max should be greater than 0 a power of 2 (ie 1, 2, 4, 8, ...).
// It's possible to pass in hist=NULL, and then the history window will be taken from the
// src passed in to uzlib_lz77_compress (this is useful when not doing streaming compression).
void uzlib_lz77_init(uzlib_lz77_state_t *state, uint8_t *hist, size_t hist_max) {
memset(state, 0, sizeof(uzlib_lz77_state_t));
state->hist_buf = hist;
state->hist_max = hist_max;
state->hist_start = 0;
state->hist_len = 0;
}
// Search back in the history for the maximum match of the given src data,
// with support for searching beyond the end of the history and into the src buffer
// (effectively the history and src buffer are concatenated).
static size_t uzlib_lz77_search_max_match(uzlib_lz77_state_t *state, const uint8_t *src, size_t len, size_t *longest_offset) {
size_t longest_len = 0;
for (size_t hist_search = 0; hist_search < state->hist_len; ++hist_search) {
// Search for a match.
size_t match_len;
for (match_len = 0; match_len <= MATCH_LEN_MAX && match_len < len; ++match_len) {
uint8_t hist;
if (hist_search + match_len < state->hist_len) {
hist = state->hist_buf[(state->hist_start + hist_search + match_len) & (state->hist_max - 1)];
} else {
hist = src[hist_search + match_len - state->hist_len];
}
if (src[match_len] != hist) {
break;
}
}
// Take this match if its length is at least the minimum, and larger than previous matches.
// If the length is the same as the previous longest then take this match as well, because
// this match will be closer (more recent in the history) and take less bits to encode.
if (match_len >= MATCH_LEN_MIN && match_len >= longest_len) {
longest_len = match_len;
*longest_offset = state->hist_len - hist_search;
}
}
return longest_len;
}
// Compress the given chunk of data.
void uzlib_lz77_compress(uzlib_lz77_state_t *state, const uint8_t *src, unsigned len) {
const uint8_t *top = src + len;
while (src < top) {
// Look for a match in the history window.
size_t match_offset = 0;
size_t match_len = uzlib_lz77_search_max_match(state, src, top - src, &match_offset);
// Encode the literal byte or the match.
if (match_len == 0) {
uzlib_literal(state, *src);
match_len = 1;
} else {
uzlib_match(state, match_offset, match_len);
}
// Push the bytes into the history buffer.
size_t mask = state->hist_max - 1;
while (match_len--) {
uint8_t b = *src++;
state->hist_buf[(state->hist_start + state->hist_len) & mask] = b;
if (state->hist_len == state->hist_max) {
state->hist_start = (state->hist_start + 1) & mask;
} else {
++state->hist_len;
}
}
}
}