Skip to content

Commit

Permalink
[PATCH] Tweak count-delta interface
Browse files Browse the repository at this point in the history
Make it return copied source and insertion separately, so that
later implementation of heuristics can use them more flexibly.

This does not change the heuristics implemented in
diffcore-rename nor diffcore-break in any way.

Signed-off-by: Junio C Hamano <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
Junio C Hamano authored and Linus Torvalds committed Jun 3, 2005
1 parent 5b86040 commit 355e76a
Show file tree
Hide file tree
Showing 5 changed files with 40 additions and 25 deletions.
30 changes: 16 additions & 14 deletions count-delta.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,18 @@ static unsigned long get_hdr_size(const unsigned char **datap)
/*
* NOTE. We do not _interpret_ delta fully. As an approximation, we
* just count the number of bytes that are copied from the source, and
* the number of literal data bytes that are inserted. Number of
* bytes that are _not_ copied from the source is deletion, and number
* of inserted literal bytes are addition, so sum of them is what we
* return. xdelta can express an edit that copies data inside of the
* destination which originally came from the source. We do not count
* that in the following routine, so we are undercounting the source
* material that remains in the final output that way.
* the number of literal data bytes that are inserted.
*
* Number of bytes that are _not_ copied from the source is deletion,
* and number of inserted literal bytes are addition, so sum of them
* is the extent of damage. xdelta can express an edit that copies
* data inside of the destination which originally came from the
* source. We do not count that in the following routine, so we are
* undercounting the source material that remains in the final output
* that way.
*/
unsigned long count_delta(void *delta_buf, unsigned long delta_size)
int count_delta(void *delta_buf, unsigned long delta_size,
unsigned long *src_copied, unsigned long *literal_added)
{
unsigned long copied_from_source, added_literal;
const unsigned char *data, *top;
Expand All @@ -46,7 +49,7 @@ unsigned long count_delta(void *delta_buf, unsigned long delta_size)

/* the smallest delta size possible is 6 bytes */
if (delta_size < 6)
return UINT_MAX;
return -1;

data = delta_buf;
top = delta_buf + delta_size;
Expand Down Expand Up @@ -83,13 +86,12 @@ unsigned long count_delta(void *delta_buf, unsigned long delta_size)

/* sanity check */
if (data != top || out != dst_size)
return UINT_MAX;
return -1;

/* delete size is what was _not_ copied from source.
* edit size is that and literal additions.
*/
if (src_size + added_literal < copied_from_source)
/* we ended up overcounting and underflowed */
return 0;
return (src_size - copied_from_source) + added_literal;
*src_copied = copied_from_source;
*literal_added = added_literal;
return 0;
}
3 changes: 2 additions & 1 deletion count-delta.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#ifndef COUNT_DELTA_H
#define COUNT_DELTA_H

unsigned long count_delta(void *, unsigned long);
int count_delta(void *, unsigned long,
unsigned long *src_copied, unsigned long *literal_added);

#endif
15 changes: 11 additions & 4 deletions diffcore-break.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ static int very_different(struct diff_filespec *src,
* want to get the filepair broken.
*/
void *delta;
unsigned long delta_size, base_size;
unsigned long delta_size, base_size, src_copied, literal_added;

if (!S_ISREG(src->mode) || !S_ISREG(dst->mode))
return 0; /* leave symlink rename alone */
Expand Down Expand Up @@ -61,10 +61,17 @@ static int very_different(struct diff_filespec *src,
return MAX_SCORE;

/* Estimate the edit size by interpreting delta. */
delta_size = count_delta(delta, delta_size);
if (count_delta(delta, delta_size, &src_copied, &literal_added)) {
free(delta);
return 0;
}
free(delta);
if (delta_size == UINT_MAX)
return 0; /* error in delta computation */

/* Extent of damage */
if (src->size + literal_added < src_copied)
delta_size = 0;
else
delta_size = (src->size - src_copied) + literal_added;

if (base_size < delta_size)
return MAX_SCORE;
Expand Down
15 changes: 11 additions & 4 deletions diffcore-rename.c
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ static int estimate_similarity(struct diff_filespec *src,
* call into this function in that case.
*/
void *delta;
unsigned long delta_size, base_size;
unsigned long delta_size, base_size, src_copied, literal_added;
int score;

/* We deal only with regular files. Symlink renames are handled
Expand Down Expand Up @@ -174,10 +174,17 @@ static int estimate_similarity(struct diff_filespec *src,
return 0;

/* Estimate the edit size by interpreting delta. */
delta_size = count_delta(delta, delta_size);
free(delta);
if (delta_size == UINT_MAX)
if (count_delta(delta, delta_size, &src_copied, &literal_added)) {
free(delta);
return 0;
}
free(delta);

/* Extent of damage */
if (src->size + literal_added < src_copied)
delta_size = 0;
else
delta_size = (src->size - src_copied) + literal_added;

/*
* Now we will give some score to it. 100% edit gets 0 points
Expand Down
2 changes: 0 additions & 2 deletions diffcore.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@
#define DEFAULT_RENAME_SCORE 30000 /* rename/copy similarity minimum (50%) */
#define DEFAULT_BREAK_SCORE 59400 /* minimum for break to happen (99%)*/

#define RENAME_DST_MATCHED 01

struct diff_filespec {
unsigned char sha1[20];
char *path;
Expand Down

0 comments on commit 355e76a

Please sign in to comment.