Skip to content

Commit

Permalink
binary diff: further updates.
Browse files Browse the repository at this point in the history
This updates the user interface and generated diff data format.

 * "diff --binary" is used to signal that we want an e-mailable
   binary patch.  It implies --full-index and -p.

 * "apply --allow-binary-replacement" acquired a short synonym
   "apply --binary".

 * After the "GIT binary patch\n" header line there is a token
   to record which binary patch mechanism was used, so that we
   can extend it later.  Currently there are two mechanisms
   defined: "literal" and "delta".  The former records the
   deflated postimage and the latter records the deflated delta
   from the preimage to postimage.

   For purely implementation convenience, I added the deflated
   length after these "literal/delta" tokens (otherwise the
   decoding side needs to guess and reallocate the buffer while
   inflating).  Improvement patches are very welcomed.

Signed-off-by: Junio C Hamano <[email protected]>
  • Loading branch information
Junio C Hamano committed May 5, 2006
1 parent 051308f commit 0660626
Show file tree
Hide file tree
Showing 5 changed files with 304 additions and 96 deletions.
130 changes: 97 additions & 33 deletions apply.c
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,9 @@ struct patch {
char *new_name, *old_name, *def_name;
unsigned int old_mode, new_mode;
int is_rename, is_copy, is_new, is_delete, is_binary;
#define BINARY_DELTA_DEFLATED 1
#define BINARY_LITERAL_DEFLATED 2
unsigned long deflate_origlen;
int lines_added, lines_deleted;
int score;
struct fragment *fragments;
Expand Down Expand Up @@ -969,9 +972,11 @@ static inline int metadata_changes(struct patch *patch)

static int parse_binary(char *buffer, unsigned long size, struct patch *patch)
{
/* We have read "GIT binary patch\n"; what follows is a
* sequence of 'length-byte' followed by base-85 encoded
* delta data.
/* We have read "GIT binary patch\n"; what follows is a line
* that says the patch method (currently, either "deflated
* literal" or "deflated delta") and the length of data before
* deflating; a sequence of 'length-byte' followed by base-85
* encoded data follows.
*
* Each 5-byte sequence of base-85 encodes up to 4 bytes,
* and we would limit the patch line to 66 characters,
Expand All @@ -982,11 +987,27 @@ static int parse_binary(char *buffer, unsigned long size, struct patch *patch)
*/
int llen, used;
struct fragment *fragment;
char *delta = NULL;
char *data = NULL;

patch->is_binary = 1;
patch->fragments = fragment = xcalloc(1, sizeof(*fragment));
used = 0;

/* Grab the type of patch */
llen = linelen(buffer, size);
used = llen;
linenr++;

if (!strncmp(buffer, "delta ", 6)) {
patch->is_binary = BINARY_DELTA_DEFLATED;
patch->deflate_origlen = strtoul(buffer + 6, NULL, 10);
}
else if (!strncmp(buffer, "literal ", 8)) {
patch->is_binary = BINARY_LITERAL_DEFLATED;
patch->deflate_origlen = strtoul(buffer + 8, NULL, 10);
}
else
return error("unrecognized binary patch at line %d: %.*s",
linenr-1, llen-1, buffer);
buffer += llen;
while (1) {
int byte_length, max_byte_length, newsize;
llen = linelen(buffer, size);
Expand Down Expand Up @@ -1015,16 +1036,16 @@ static int parse_binary(char *buffer, unsigned long size, struct patch *patch)
byte_length <= max_byte_length - 4)
goto corrupt;
newsize = fragment->size + byte_length;
delta = xrealloc(delta, newsize);
if (decode_85(delta + fragment->size,
data = xrealloc(data, newsize);
if (decode_85(data + fragment->size,
buffer + 1,
byte_length))
goto corrupt;
fragment->size = newsize;
buffer += llen;
size -= llen;
}
fragment->patch = delta;
fragment->patch = data;
return used;
corrupt:
return error("corrupt binary patch at line %d: %.*s",
Expand Down Expand Up @@ -1425,6 +1446,61 @@ static int apply_one_fragment(struct buffer_desc *desc, struct fragment *frag)
return offset;
}

static char *inflate_it(const void *data, unsigned long size,
unsigned long inflated_size)
{
z_stream stream;
void *out;
int st;

memset(&stream, 0, sizeof(stream));

stream.next_in = (unsigned char *)data;
stream.avail_in = size;
stream.next_out = out = xmalloc(inflated_size);
stream.avail_out = inflated_size;
inflateInit(&stream);
st = inflate(&stream, Z_FINISH);
if ((st != Z_STREAM_END) || stream.total_out != inflated_size) {
free(out);
return NULL;
}
return out;
}

static int apply_binary_fragment(struct buffer_desc *desc, struct patch *patch)
{
unsigned long dst_size;
struct fragment *fragment = patch->fragments;
void *data;
void *result;

data = inflate_it(fragment->patch, fragment->size,
patch->deflate_origlen);
if (!data)
return error("corrupt patch data");
switch (patch->is_binary) {
case BINARY_DELTA_DEFLATED:
result = patch_delta(desc->buffer, desc->size,
data,
patch->deflate_origlen,
&dst_size);
free(desc->buffer);
desc->buffer = result;
free(data);
break;
case BINARY_LITERAL_DEFLATED:
free(desc->buffer);
desc->buffer = data;
dst_size = patch->deflate_origlen;
break;
}
if (!desc->buffer)
return -1;
desc->size = desc->alloc = dst_size;
return 0;
}

static int apply_binary(struct buffer_desc *desc, struct patch *patch)
{
const char *name = patch->old_name ? patch->old_name : patch->new_name;
Expand Down Expand Up @@ -1466,18 +1542,20 @@ static int apply_binary(struct buffer_desc *desc, struct patch *patch)
"'%s' but it is not empty", name);
}

if (desc->buffer) {
get_sha1_hex(patch->new_sha1_prefix, sha1);
if (!memcmp(sha1, null_sha1, 20)) {
free(desc->buffer);
desc->alloc = desc->size = 0;
}
get_sha1_hex(patch->new_sha1_prefix, sha1);
if (!memcmp(sha1, null_sha1, 20))
desc->buffer = NULL;
return 0; /* deletion patch */
}

if (has_sha1_file(sha1)) {
/* We already have the postimage */
char type[10];
unsigned long size;

free(desc->buffer);
desc->buffer = read_sha1_file(sha1, type, &size);
if (!desc->buffer)
return error("the necessary postimage %s for "
Expand All @@ -1486,28 +1564,13 @@ static int apply_binary(struct buffer_desc *desc, struct patch *patch)
desc->alloc = desc->size = size;
}
else {
char type[10];
unsigned long src_size, dst_size;
void *src;

get_sha1_hex(patch->old_sha1_prefix, sha1);
src = read_sha1_file(sha1, type, &src_size);
if (!src)
return error("the necessary preimage %s for "
"'%s' cannot be read",
patch->old_sha1_prefix, name);

/* patch->fragment->patch has the delta data and
* we should apply it to the preimage.
/* We have verified desc matches the preimage;
* apply the patch data to it, which is stored
* in the patch->fragments->{patch,size}.
*/
desc->buffer = patch_delta(src, src_size,
(void*) patch->fragments->patch,
patch->fragments->size,
&dst_size);
if (!desc->buffer)
if (apply_binary_fragment(desc, patch))
return error("binary patch does not apply to '%s'",
name);
desc->size = desc->alloc = dst_size;

/* verify that the result matches */
write_sha1_file_prepare(desc->buffer, desc->size, blob_type,
Expand Down Expand Up @@ -2102,7 +2165,8 @@ int main(int argc, char **argv)
diffstat = 1;
continue;
}
if (!strcmp(arg, "--allow-binary-replacement")) {
if (!strcmp(arg, "--allow-binary-replacement") ||
!strcmp(arg, "--binary")) {
allow_binary_replacement = 1;
continue;
}
Expand Down
134 changes: 134 additions & 0 deletions base85.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
#include "cache.h"

#undef DEBUG_85

#ifdef DEBUG_85
#define say(a) fprintf(stderr, a)
#define say1(a,b) fprintf(stderr, a, b)
#define say2(a,b,c) fprintf(stderr, a, b, c)
#else
#define say(a) do {} while(0)
#define say1(a,b) do {} while(0)
#define say2(a,b,c) do {} while(0)
#endif

static const char en85[] = {
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
'U', 'V', 'W', 'X', 'Y', 'Z',
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j',
'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't',
'u', 'v', 'w', 'x', 'y', 'z',
'!', '#', '$', '%', '&', '(', ')', '*', '+', '-',
';', '<', '=', '>', '?', '@', '^', '_', '`', '{',
'|', '}', '~'
};

static char de85[256];
static void prep_base85(void)
{
int i;
if (de85['Z'])
return;
for (i = 0; i < ARRAY_SIZE(en85); i++) {
int ch = en85[i];
de85[ch] = i + 1;
}
}

int decode_85(char *dst, char *buffer, int len)
{
prep_base85();

say2("decode 85 <%.*s>", len/4*5, buffer);
while (len) {
unsigned acc = 0;
int cnt;
for (cnt = 0; cnt < 5; cnt++, buffer++) {
int ch = *((unsigned char *)buffer);
int de = de85[ch];
if (!de)
return error("invalid base85 alphabet %c", ch);
de--;
if (cnt == 4) {
/*
* Detect overflow. The largest
* 5-letter possible is "|NsC0" to
* encode 0xffffffff, and "|NsC" gives
* 0x03030303 at this point (i.e.
* 0xffffffff = 0x03030303 * 85).
*/
if (0x03030303 < acc ||
(0x03030303 == acc && de))
error("invalid base85 sequence %.5s",
buffer-3);
}
acc = acc * 85 + de;
say1(" <%08x>", acc);
}
say1(" %08x", acc);
for (cnt = 0; cnt < 4 && len; cnt++, len--) {
*dst++ = (acc >> 24) & 0xff;
acc = acc << 8;
}
}
say("\n");

return 0;
}

void encode_85(char *buf, unsigned char *data, int bytes)
{
prep_base85();

say("encode 85");
while (bytes) {
unsigned acc = 0;
int cnt;
for (cnt = 0; cnt < 4 && bytes; cnt++, bytes--) {
int ch = *data++;
acc |= ch << ((3-cnt)*8);
}
say1(" %08x", acc);
for (cnt = 0; cnt < 5; cnt++) {
int val = acc % 85;
acc /= 85;
buf[4-cnt] = en85[val];
}
buf += 5;
}
say("\n");

*buf = 0;
}

#ifdef DEBUG_85
int main(int ac, char **av)
{
char buf[1024];

if (!strcmp(av[1], "-e")) {
int len = strlen(av[2]);
encode_85(buf, av[2], len);
if (len <= 26) len = len + 'A' - 1;
else len = len + 'a' - 26 + 1;
printf("encoded: %c%s\n", len, buf);
return 0;
}
if (!strcmp(av[1], "-d")) {
int len = *av[2];
if ('A' <= len && len <= 'Z') len = len - 'A' + 1;
else len = len - 'a' + 26 + 1;
decode_85(buf, av[2]+1, len);
printf("decoded: %.*s\n", len, buf);
return 0;
}
if (!strcmp(av[1], "-t")) {
char t[4] = { -1,-1,-1,-1 };
encode_85(buf, t, 4);
printf("encoded: D%s\n", buf);
return 0;
}
}
#endif
1 change: 1 addition & 0 deletions cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -365,5 +365,6 @@ extern void setup_pager(void);

/* base85 */
int decode_85(char *dst, char *line, int linelen);
void encode_85(char *buf, unsigned char *data, int bytes);

#endif /* CACHE_H */
Loading

0 comments on commit 0660626

Please sign in to comment.