Skip to content

Commit 08abe66

Browse files
Nicolas PitreJunio C Hamano
Nicolas Pitre
authored and
Junio C Hamano
committed
split the diff-delta interface
This patch splits the diff-delta interface into index creation and delta generation. A wrapper is provided to preserve the diff-delta() call. This will allow for an optimization in pack-objects.c where the source object could be fixed and a full window of objects tentatively tried against that same source object without recomputing the source index each time. This patch only restructure things, plus a couple cleanups for good measure. There is no performance change yet. Signed-off-by: Nicolas Pitre <[email protected]>
1 parent 36932ea commit 08abe66

File tree

3 files changed

+155
-92
lines changed

3 files changed

+155
-92
lines changed

delta.h

+68-7
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,81 @@
11
#ifndef DELTA_H
22
#define DELTA_H
33

4-
/* handling of delta buffers */
5-
extern void *diff_delta(void *from_buf, unsigned long from_size,
6-
void *to_buf, unsigned long to_size,
7-
unsigned long *delta_size, unsigned long max_size);
8-
extern void *patch_delta(void *src_buf, unsigned long src_size,
9-
void *delta_buf, unsigned long delta_size,
4+
/* opaque object for delta index */
5+
struct delta_index;
6+
7+
/*
8+
* create_delta_index: compute index data from given buffer
9+
*
10+
* This returns a pointer to a struct delta_index that should be passed to
11+
* subsequent create_delta() calls, or to free_delta_index(). A NULL pointer
12+
* is returned on failure. The given buffer must not be freed nor altered
13+
* before free_delta_index() is called. The returned pointer must be freed
14+
* using free_delta_index().
15+
*/
16+
extern struct delta_index *
17+
create_delta_index(const void *buf, unsigned long bufsize);
18+
19+
/*
20+
* free_delta_index: free the index created by create_delta_index()
21+
*/
22+
extern void free_delta_index(struct delta_index *index);
23+
24+
/*
25+
* create_delta: create a delta from given index for the given buffer
26+
*
27+
* This function may be called multiple times with different buffers using
28+
* the same delta_index pointer. If max_delta_size is non-zero and the
29+
* resulting delta is to be larger than max_delta_size then NULL is returned.
30+
* On success, a non-NULL pointer to the buffer with the delta data is
31+
* returned and *delta_size is updated with its size. The returned buffer
32+
* must be freed by the caller.
33+
*/
34+
extern void *
35+
create_delta(const struct delta_index *index,
36+
const void *buf, unsigned long bufsize,
37+
unsigned long *delta_size, unsigned long max_delta_size);
38+
39+
/*
40+
* diff_delta: create a delta from source buffer to target buffer
41+
*
42+
* If max_delta_size is non-zero and the resulting delta is to be larger
43+
* than max_delta_size then NULL is returned. On success, a non-NULL
44+
* pointer to the buffer with the delta data is returned and *delta_size is
45+
* updated with its size. The returned buffer must be freed by the caller.
46+
*/
47+
static inline void *
48+
diff_delta(const void *src_buf, unsigned long src_bufsize,
49+
const void *trg_buf, unsigned long trg_bufsize,
50+
unsigned long *delta_size, unsigned long max_delta_size)
51+
{
52+
struct delta_index *index = create_delta_index(src_buf, src_bufsize);
53+
if (index) {
54+
void *delta = create_delta(index, trg_buf, trg_bufsize,
55+
delta_size, max_delta_size);
56+
free_delta_index(index);
57+
return delta;
58+
}
59+
return NULL;
60+
}
61+
62+
/*
63+
* patch_delta: recreate target buffer given source buffer and delta data
64+
*
65+
* On success, a non-NULL pointer to the target buffer is returned and
66+
* *trg_bufsize is updated with its size. On failure a NULL pointer is
67+
* returned. The returned buffer must be freed by the caller.
68+
*/
69+
extern void *patch_delta(const void *src_buf, unsigned long src_size,
70+
const void *delta_buf, unsigned long delta_size,
1071
unsigned long *dst_size);
1172

1273
/* the smallest possible delta size is 4 bytes */
1374
#define DELTA_SIZE_MIN 4
1475

1576
/*
1677
* This must be called twice on the delta data buffer, first to get the
17-
* expected reference buffer size, and again to get the result buffer size.
78+
* expected source buffer size, and again to get the target buffer size.
1879
*/
1980
static inline unsigned long get_delta_hdr_size(const unsigned char **datap,
2081
const unsigned char *top)

diff-delta.c

+85-83
Original file line numberDiff line numberDiff line change
@@ -27,53 +27,70 @@
2727
/* block size: min = 16, max = 64k, power of 2 */
2828
#define BLK_SIZE 16
2929

30-
#define MIN(a, b) ((a) < (b) ? (a) : (b))
30+
/* maximum hash entry list for the same hash bucket */
31+
#define HASH_LIMIT 64
3132

3233
#define GR_PRIME 0x9e370001
3334
#define HASH(v, shift) (((unsigned int)(v) * GR_PRIME) >> (shift))
3435

35-
struct index {
36+
struct index_entry {
3637
const unsigned char *ptr;
3738
unsigned int val;
38-
struct index *next;
39+
struct index_entry *next;
3940
};
4041

41-
static struct index ** delta_index(const unsigned char *buf,
42-
unsigned long bufsize,
43-
unsigned long trg_bufsize,
44-
unsigned int *hash_shift)
42+
struct delta_index {
43+
const void *src_buf;
44+
unsigned long src_size;
45+
unsigned int hash_shift;
46+
struct index_entry *hash[0];
47+
};
48+
49+
struct delta_index * create_delta_index(const void *buf, unsigned long bufsize)
4550
{
46-
unsigned int i, hsize, hshift, hlimit, entries, *hash_count;
47-
const unsigned char *data;
48-
struct index *entry, **hash;
51+
unsigned int i, hsize, hshift, entries, *hash_count;
52+
const unsigned char *data, *buffer = buf;
53+
struct delta_index *index;
54+
struct index_entry *entry, **hash;
4955
void *mem;
5056

57+
if (!buf || !bufsize)
58+
return NULL;
59+
5160
/* determine index hash size */
5261
entries = bufsize / BLK_SIZE;
5362
hsize = entries / 4;
5463
for (i = 4; (1 << i) < hsize && i < 31; i++);
5564
hsize = 1 << i;
5665
hshift = 32 - i;
57-
*hash_shift = hshift;
5866

5967
/* allocate lookup index */
60-
mem = malloc(hsize * sizeof(*hash) + entries * sizeof(*entry));
68+
mem = malloc(sizeof(*index) +
69+
sizeof(*hash) * hsize +
70+
sizeof(*entry) * entries);
6171
if (!mem)
6272
return NULL;
73+
index = mem;
74+
mem = index + 1;
6375
hash = mem;
64-
entry = mem + hsize * sizeof(*hash);
76+
mem = hash + hsize;
77+
entry = mem;
78+
79+
index->src_buf = buf;
80+
index->src_size = bufsize;
81+
index->hash_shift = hshift;
6582
memset(hash, 0, hsize * sizeof(*hash));
6683

6784
/* allocate an array to count hash entries */
6885
hash_count = calloc(hsize, sizeof(*hash_count));
6986
if (!hash_count) {
70-
free(hash);
87+
free(index);
7188
return NULL;
7289
}
7390

7491
/* then populate the index */
75-
data = buf + entries * BLK_SIZE - BLK_SIZE;
76-
while (data >= buf) {
92+
data = buffer + entries * BLK_SIZE - BLK_SIZE;
93+
while (data >= buffer) {
7794
unsigned int val = adler32(0, data, BLK_SIZE);
7895
i = HASH(val, hshift);
7996
entry->ptr = data;
@@ -91,27 +108,18 @@ static struct index ** delta_index(const unsigned char *buf,
91108
* bucket that would bring us to O(m*n) computing costs (m and n
92109
* corresponding to reference and target buffer sizes).
93110
*
94-
* The more the target buffer is large, the more it is important to
95-
* have small entry lists for each hash buckets. With such a limit
96-
* the cost is bounded to something more like O(m+n).
97-
*/
98-
hlimit = (1 << 26) / trg_bufsize;
99-
if (hlimit < 4*BLK_SIZE)
100-
hlimit = 4*BLK_SIZE;
101-
102-
/*
103-
* Now make sure none of the hash buckets has more entries than
111+
* Make sure none of the hash buckets has more entries than
104112
* we're willing to test. Otherwise we cull the entry list
105113
* uniformly to still preserve a good repartition across
106114
* the reference buffer.
107115
*/
108116
for (i = 0; i < hsize; i++) {
109-
if (hash_count[i] < hlimit)
117+
if (hash_count[i] < HASH_LIMIT)
110118
continue;
111119
entry = hash[i];
112120
do {
113-
struct index *keep = entry;
114-
int skip = hash_count[i] / hlimit / 2;
121+
struct index_entry *keep = entry;
122+
int skip = hash_count[i] / HASH_LIMIT / 2;
115123
do {
116124
entry = entry->next;
117125
} while(--skip && entry);
@@ -120,7 +128,12 @@ static struct index ** delta_index(const unsigned char *buf,
120128
}
121129
free(hash_count);
122130

123-
return hash;
131+
return index;
132+
}
133+
134+
void free_delta_index(struct delta_index *index)
135+
{
136+
free(index);
124137
}
125138

126139
/* provide the size of the copy opcode given the block offset and size */
@@ -131,82 +144,73 @@ static struct index ** delta_index(const unsigned char *buf,
131144
/* the maximum size for any opcode */
132145
#define MAX_OP_SIZE COPYOP_SIZE(0xffffffff, 0xffffffff)
133146

134-
void *diff_delta(void *from_buf, unsigned long from_size,
135-
void *to_buf, unsigned long to_size,
136-
unsigned long *delta_size,
137-
unsigned long max_size)
147+
void *
148+
create_delta(const struct delta_index *index,
149+
const void *trg_buf, unsigned long trg_size,
150+
unsigned long *delta_size, unsigned long max_size)
138151
{
139152
unsigned int i, outpos, outsize, hash_shift;
140153
int inscnt;
141154
const unsigned char *ref_data, *ref_top, *data, *top;
142155
unsigned char *out;
143-
struct index *entry, **hash;
144156

145-
if (!from_size || !to_size)
146-
return NULL;
147-
hash = delta_index(from_buf, from_size, to_size, &hash_shift);
148-
if (!hash)
157+
if (!trg_buf || !trg_size)
149158
return NULL;
150159

151160
outpos = 0;
152161
outsize = 8192;
153162
if (max_size && outsize >= max_size)
154163
outsize = max_size + MAX_OP_SIZE + 1;
155164
out = malloc(outsize);
156-
if (!out) {
157-
free(hash);
165+
if (!out)
158166
return NULL;
159-
}
160-
161-
ref_data = from_buf;
162-
ref_top = from_buf + from_size;
163-
data = to_buf;
164-
top = to_buf + to_size;
165167

166168
/* store reference buffer size */
167-
out[outpos++] = from_size;
168-
from_size >>= 7;
169-
while (from_size) {
170-
out[outpos - 1] |= 0x80;
171-
out[outpos++] = from_size;
172-
from_size >>= 7;
169+
i = index->src_size;
170+
while (i >= 0x80) {
171+
out[outpos++] = i | 0x80;
172+
i >>= 7;
173173
}
174+
out[outpos++] = i;
174175

175176
/* store target buffer size */
176-
out[outpos++] = to_size;
177-
to_size >>= 7;
178-
while (to_size) {
179-
out[outpos - 1] |= 0x80;
180-
out[outpos++] = to_size;
181-
to_size >>= 7;
177+
i = trg_size;
178+
while (i >= 0x80) {
179+
out[outpos++] = i | 0x80;
180+
i >>= 7;
182181
}
182+
out[outpos++] = i;
183183

184+
ref_data = index->src_buf;
185+
ref_top = ref_data + index->src_size;
186+
data = trg_buf;
187+
top = trg_buf + trg_size;
188+
hash_shift = index->hash_shift;
184189
inscnt = 0;
185190

186191
while (data < top) {
187192
unsigned int moff = 0, msize = 0;
188-
if (data + BLK_SIZE <= top) {
189-
unsigned int val = adler32(0, data, BLK_SIZE);
190-
i = HASH(val, hash_shift);
191-
for (entry = hash[i]; entry; entry = entry->next) {
192-
const unsigned char *ref = entry->ptr;
193-
const unsigned char *src = data;
194-
unsigned int ref_size = ref_top - ref;
195-
if (entry->val != val)
196-
continue;
197-
if (ref_size > top - src)
198-
ref_size = top - src;
199-
if (ref_size > 0x10000)
200-
ref_size = 0x10000;
201-
if (ref_size <= msize)
202-
break;
203-
while (ref_size-- && *src++ == *ref)
204-
ref++;
205-
if (msize < ref - entry->ptr) {
206-
/* this is our best match so far */
207-
msize = ref - entry->ptr;
208-
moff = entry->ptr - ref_data;
209-
}
193+
struct index_entry *entry;
194+
unsigned int val = adler32(0, data, BLK_SIZE);
195+
i = HASH(val, hash_shift);
196+
for (entry = index->hash[i]; entry; entry = entry->next) {
197+
const unsigned char *ref = entry->ptr;
198+
const unsigned char *src = data;
199+
unsigned int ref_size = ref_top - ref;
200+
if (entry->val != val)
201+
continue;
202+
if (ref_size > top - src)
203+
ref_size = top - src;
204+
if (ref_size > 0x10000)
205+
ref_size = 0x10000;
206+
if (ref_size <= msize)
207+
break;
208+
while (ref_size-- && *src++ == *ref)
209+
ref++;
210+
if (msize < ref - entry->ptr) {
211+
/* this is our best match so far */
212+
msize = ref - entry->ptr;
213+
moff = entry->ptr - ref_data;
210214
}
211215
}
212216

@@ -271,7 +275,6 @@ void *diff_delta(void *from_buf, unsigned long from_size,
271275
out = realloc(out, outsize);
272276
if (!out) {
273277
free(tmp);
274-
free(hash);
275278
return NULL;
276279
}
277280
}
@@ -280,7 +283,6 @@ void *diff_delta(void *from_buf, unsigned long from_size,
280283
if (inscnt)
281284
out[outpos - inscnt - 1] = inscnt;
282285

283-
free(hash);
284286
*delta_size = outpos;
285287
return out;
286288
}

patch-delta.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@
1313
#include <string.h>
1414
#include "delta.h"
1515

16-
void *patch_delta(void *src_buf, unsigned long src_size,
17-
void *delta_buf, unsigned long delta_size,
16+
void *patch_delta(const void *src_buf, unsigned long src_size,
17+
const void *delta_buf, unsigned long delta_size,
1818
unsigned long *dst_size)
1919
{
2020
const unsigned char *data, *top;

0 commit comments

Comments
 (0)