27
27
/* block size: min = 16, max = 64k, power of 2 */
28
28
#define BLK_SIZE 16
29
29
30
- #define MIN (a , b ) ((a) < (b) ? (a) : (b))
30
+ /* maximum hash entry list for the same hash bucket */
31
+ #define HASH_LIMIT 64
31
32
32
33
#define GR_PRIME 0x9e370001
33
34
#define HASH (v , shift ) (((unsigned int)(v) * GR_PRIME) >> (shift))
34
35
35
- struct index {
36
+ struct index_entry {
36
37
const unsigned char * ptr ;
37
38
unsigned int val ;
38
- struct index * next ;
39
+ struct index_entry * next ;
39
40
};
40
41
41
- static struct index * * delta_index (const unsigned char * buf ,
42
- unsigned long bufsize ,
43
- unsigned long trg_bufsize ,
44
- unsigned int * hash_shift )
42
+ struct delta_index {
43
+ const void * src_buf ;
44
+ unsigned long src_size ;
45
+ unsigned int hash_shift ;
46
+ struct index_entry * hash [0 ];
47
+ };
48
+
49
+ struct delta_index * create_delta_index (const void * buf , unsigned long bufsize )
45
50
{
46
- unsigned int i , hsize , hshift , hlimit , entries , * hash_count ;
47
- const unsigned char * data ;
48
- struct index * entry , * * hash ;
51
+ unsigned int i , hsize , hshift , entries , * hash_count ;
52
+ const unsigned char * data , * buffer = buf ;
53
+ struct delta_index * index ;
54
+ struct index_entry * entry , * * hash ;
49
55
void * mem ;
50
56
57
+ if (!buf || !bufsize )
58
+ return NULL ;
59
+
51
60
/* determine index hash size */
52
61
entries = bufsize / BLK_SIZE ;
53
62
hsize = entries / 4 ;
54
63
for (i = 4 ; (1 << i ) < hsize && i < 31 ; i ++ );
55
64
hsize = 1 << i ;
56
65
hshift = 32 - i ;
57
- * hash_shift = hshift ;
58
66
59
67
/* allocate lookup index */
60
- mem = malloc (hsize * sizeof (* hash ) + entries * sizeof (* entry ));
68
+ mem = malloc (sizeof (* index ) +
69
+ sizeof (* hash ) * hsize +
70
+ sizeof (* entry ) * entries );
61
71
if (!mem )
62
72
return NULL ;
73
+ index = mem ;
74
+ mem = index + 1 ;
63
75
hash = mem ;
64
- entry = mem + hsize * sizeof (* hash );
76
+ mem = hash + hsize ;
77
+ entry = mem ;
78
+
79
+ index -> src_buf = buf ;
80
+ index -> src_size = bufsize ;
81
+ index -> hash_shift = hshift ;
65
82
memset (hash , 0 , hsize * sizeof (* hash ));
66
83
67
84
/* allocate an array to count hash entries */
68
85
hash_count = calloc (hsize , sizeof (* hash_count ));
69
86
if (!hash_count ) {
70
- free (hash );
87
+ free (index );
71
88
return NULL ;
72
89
}
73
90
74
91
/* then populate the index */
75
- data = buf + entries * BLK_SIZE - BLK_SIZE ;
76
- while (data >= buf ) {
92
+ data = buffer + entries * BLK_SIZE - BLK_SIZE ;
93
+ while (data >= buffer ) {
77
94
unsigned int val = adler32 (0 , data , BLK_SIZE );
78
95
i = HASH (val , hshift );
79
96
entry -> ptr = data ;
@@ -91,27 +108,18 @@ static struct index ** delta_index(const unsigned char *buf,
91
108
* bucket that would bring us to O(m*n) computing costs (m and n
92
109
* corresponding to reference and target buffer sizes).
93
110
*
94
- * The more the target buffer is large, the more it is important to
95
- * have small entry lists for each hash buckets. With such a limit
96
- * the cost is bounded to something more like O(m+n).
97
- */
98
- hlimit = (1 << 26 ) / trg_bufsize ;
99
- if (hlimit < 4 * BLK_SIZE )
100
- hlimit = 4 * BLK_SIZE ;
101
-
102
- /*
103
- * Now make sure none of the hash buckets has more entries than
111
+ * Make sure none of the hash buckets has more entries than
104
112
* we're willing to test. Otherwise we cull the entry list
105
113
* uniformly to still preserve a good repartition across
106
114
* the reference buffer.
107
115
*/
108
116
for (i = 0 ; i < hsize ; i ++ ) {
109
- if (hash_count [i ] < hlimit )
117
+ if (hash_count [i ] < HASH_LIMIT )
110
118
continue ;
111
119
entry = hash [i ];
112
120
do {
113
- struct index * keep = entry ;
114
- int skip = hash_count [i ] / hlimit / 2 ;
121
+ struct index_entry * keep = entry ;
122
+ int skip = hash_count [i ] / HASH_LIMIT / 2 ;
115
123
do {
116
124
entry = entry -> next ;
117
125
} while (-- skip && entry );
@@ -120,7 +128,12 @@ static struct index ** delta_index(const unsigned char *buf,
120
128
}
121
129
free (hash_count );
122
130
123
- return hash ;
131
+ return index ;
132
+ }
133
+
134
+ void free_delta_index (struct delta_index * index )
135
+ {
136
+ free (index );
124
137
}
125
138
126
139
/* provide the size of the copy opcode given the block offset and size */
@@ -131,82 +144,73 @@ static struct index ** delta_index(const unsigned char *buf,
131
144
/* the maximum size for any opcode */
132
145
#define MAX_OP_SIZE COPYOP_SIZE(0xffffffff, 0xffffffff)
133
146
134
- void * diff_delta ( void * from_buf , unsigned long from_size ,
135
- void * to_buf , unsigned long to_size ,
136
- unsigned long * delta_size ,
137
- unsigned long max_size )
147
+ void *
148
+ create_delta ( const struct delta_index * index ,
149
+ const void * trg_buf , unsigned long trg_size ,
150
+ unsigned long * delta_size , unsigned long max_size )
138
151
{
139
152
unsigned int i , outpos , outsize , hash_shift ;
140
153
int inscnt ;
141
154
const unsigned char * ref_data , * ref_top , * data , * top ;
142
155
unsigned char * out ;
143
- struct index * entry , * * hash ;
144
156
145
- if (!from_size || !to_size )
146
- return NULL ;
147
- hash = delta_index (from_buf , from_size , to_size , & hash_shift );
148
- if (!hash )
157
+ if (!trg_buf || !trg_size )
149
158
return NULL ;
150
159
151
160
outpos = 0 ;
152
161
outsize = 8192 ;
153
162
if (max_size && outsize >= max_size )
154
163
outsize = max_size + MAX_OP_SIZE + 1 ;
155
164
out = malloc (outsize );
156
- if (!out ) {
157
- free (hash );
165
+ if (!out )
158
166
return NULL ;
159
- }
160
-
161
- ref_data = from_buf ;
162
- ref_top = from_buf + from_size ;
163
- data = to_buf ;
164
- top = to_buf + to_size ;
165
167
166
168
/* store reference buffer size */
167
- out [outpos ++ ] = from_size ;
168
- from_size >>= 7 ;
169
- while (from_size ) {
170
- out [outpos - 1 ] |= 0x80 ;
171
- out [outpos ++ ] = from_size ;
172
- from_size >>= 7 ;
169
+ i = index -> src_size ;
170
+ while (i >= 0x80 ) {
171
+ out [outpos ++ ] = i | 0x80 ;
172
+ i >>= 7 ;
173
173
}
174
+ out [outpos ++ ] = i ;
174
175
175
176
/* store target buffer size */
176
- out [outpos ++ ] = to_size ;
177
- to_size >>= 7 ;
178
- while (to_size ) {
179
- out [outpos - 1 ] |= 0x80 ;
180
- out [outpos ++ ] = to_size ;
181
- to_size >>= 7 ;
177
+ i = trg_size ;
178
+ while (i >= 0x80 ) {
179
+ out [outpos ++ ] = i | 0x80 ;
180
+ i >>= 7 ;
182
181
}
182
+ out [outpos ++ ] = i ;
183
183
184
+ ref_data = index -> src_buf ;
185
+ ref_top = ref_data + index -> src_size ;
186
+ data = trg_buf ;
187
+ top = trg_buf + trg_size ;
188
+ hash_shift = index -> hash_shift ;
184
189
inscnt = 0 ;
185
190
186
191
while (data < top ) {
187
192
unsigned int moff = 0 , msize = 0 ;
188
- if (data + BLK_SIZE <= top ) {
189
- unsigned int val = adler32 (0 , data , BLK_SIZE );
190
- i = HASH (val , hash_shift );
191
- for (entry = hash [i ]; entry ; entry = entry -> next ) {
192
- const unsigned char * ref = entry -> ptr ;
193
- const unsigned char * src = data ;
194
- unsigned int ref_size = ref_top - ref ;
195
- if (entry -> val != val )
196
- continue ;
197
- if (ref_size > top - src )
198
- ref_size = top - src ;
199
- if (ref_size > 0x10000 )
200
- ref_size = 0x10000 ;
201
- if (ref_size <= msize )
202
- break ;
203
- while (ref_size -- && * src ++ == * ref )
204
- ref ++ ;
205
- if (msize < ref - entry -> ptr ) {
206
- /* this is our best match so far */
207
- msize = ref - entry -> ptr ;
208
- moff = entry -> ptr - ref_data ;
209
- }
193
+ struct index_entry * entry ;
194
+ unsigned int val = adler32 (0 , data , BLK_SIZE );
195
+ i = HASH (val , hash_shift );
196
+ for (entry = index -> hash [i ]; entry ; entry = entry -> next ) {
197
+ const unsigned char * ref = entry -> ptr ;
198
+ const unsigned char * src = data ;
199
+ unsigned int ref_size = ref_top - ref ;
200
+ if (entry -> val != val )
201
+ continue ;
202
+ if (ref_size > top - src )
203
+ ref_size = top - src ;
204
+ if (ref_size > 0x10000 )
205
+ ref_size = 0x10000 ;
206
+ if (ref_size <= msize )
207
+ break ;
208
+ while (ref_size -- && * src ++ == * ref )
209
+ ref ++ ;
210
+ if (msize < ref - entry -> ptr ) {
211
+ /* this is our best match so far */
212
+ msize = ref - entry -> ptr ;
213
+ moff = entry -> ptr - ref_data ;
210
214
}
211
215
}
212
216
@@ -271,7 +275,6 @@ void *diff_delta(void *from_buf, unsigned long from_size,
271
275
out = realloc (out , outsize );
272
276
if (!out ) {
273
277
free (tmp );
274
- free (hash );
275
278
return NULL ;
276
279
}
277
280
}
@@ -280,7 +283,6 @@ void *diff_delta(void *from_buf, unsigned long from_size,
280
283
if (inscnt )
281
284
out [outpos - inscnt - 1 ] = inscnt ;
282
285
283
- free (hash );
284
286
* delta_size = outpos ;
285
287
return out ;
286
288
}
0 commit comments