Skip to content

Commit

Permalink
combine-diff: coalesce lost lines optimally
Browse files Browse the repository at this point in the history
This replaces the greedy implementation to coalesce lost lines by using
dynamic programming to find the Longest Common Subsequence.

The O(n²) time complexity is obviously bigger than previous
implementation but it can produce shorter diff results (and most likely
easier to read).

List of lost lines is now doubly-linked because we reverse-read it when
reading the direction matrix.

Signed-off-by: Antoine Pelisse <[email protected]>
Signed-off-by: Junio C Hamano <[email protected]>
  • Loading branch information
apelisse authored and gitster committed Mar 25, 2013
1 parent fa04ae0 commit 99d3206
Show file tree
Hide file tree
Showing 2 changed files with 320 additions and 64 deletions.
255 changes: 191 additions & 64 deletions combine-diff.c
Original file line number Diff line number Diff line change
Expand Up @@ -74,16 +74,24 @@ static struct combine_diff_path *intersect_paths(struct combine_diff_path *curr,

/* Lines lost from parent */
struct lline {
struct lline *next;
struct lline *next, *prev;
int len;
unsigned long parent_map;
char line[FLEX_ARRAY];
};

/* Lines lost from current parent (before coalescing) */
struct plost {
struct lline *lost_head, *lost_tail;
int len;
};

/* Lines surviving in the merge result */
struct sline {
struct lline *lost_head, **lost_tail;
struct lline *next_lost;
/* Accumulated and coalesced lost lines */
struct lline *lost;
int lenlost;
struct plost plost;
char *bol;
int len;
/* bit 0 up to (N-1) are on if the parent has this line (i.e.
Expand All @@ -95,34 +103,6 @@ struct sline {
unsigned long *p_lno;
};

static char *grab_blob(const unsigned char *sha1, unsigned int mode,
unsigned long *size, struct userdiff_driver *textconv,
const char *path)
{
char *blob;
enum object_type type;

if (S_ISGITLINK(mode)) {
blob = xmalloc(100);
*size = snprintf(blob, 100,
"Subproject commit %s\n", sha1_to_hex(sha1));
} else if (is_null_sha1(sha1)) {
/* deleted blob */
*size = 0;
return xcalloc(1, 1);
} else if (textconv) {
struct diff_filespec *df = alloc_filespec(path);
fill_filespec(df, sha1, 1, mode);
*size = fill_textconv(textconv, df, &blob);
free_filespec(df);
} else {
blob = read_sha1_file(sha1, &type, size);
if (type != OBJ_BLOB)
die("object '%s' is not a blob!", sha1_to_hex(sha1));
}
return blob;
}

static int match_string_spaces(const char *line1, int len1,
const char *line2, int len2,
long flags)
Expand Down Expand Up @@ -163,36 +143,180 @@ static int match_string_spaces(const char *line1, int len1,
return 0;
}

static void append_lost(struct sline *sline, int n, const char *line, int len, long flags)
enum coalesce_direction { MATCH, BASE, NEW };

/* Coalesce new lines into base by finding LCS */
static struct lline *coalesce_lines(struct lline *base, int *lenbase,
struct lline *new, int lennew,
unsigned long parent, long flags)
{
struct lline *lline;
unsigned long this_mask = (1UL<<n);
if (line[len-1] == '\n')
len--;
int **lcs;
enum coalesce_direction **directions;
struct lline *baseend, *newend = NULL;
int i, j, origbaselen = *lenbase;

/* Check to see if we can squash things */
if (sline->lost_head) {
lline = sline->next_lost;
while (lline) {
if (match_string_spaces(lline->line, lline->len,
line, len, flags)) {
lline->parent_map |= this_mask;
sline->next_lost = lline->next;
return;
if (new == NULL)
return base;

if (base == NULL) {
*lenbase = lennew;
return new;
}

/*
* Coalesce new lines into base by finding the LCS
* - Create the table to run dynamic programing
* - Compute the LCS
* - Then reverse read the direction structure:
* - If we have MATCH, assign parent to base flag, and consume
* both baseend and newend
* - Else if we have BASE, consume baseend
* - Else if we have NEW, insert newend lline into base and
* consume newend
*/
lcs = xcalloc(origbaselen + 1, sizeof(int*));
directions = xcalloc(origbaselen + 1, sizeof(enum coalesce_direction*));
for (i = 0; i < origbaselen + 1; i++) {
lcs[i] = xcalloc(lennew + 1, sizeof(int));
directions[i] = xcalloc(lennew + 1, sizeof(enum coalesce_direction));
directions[i][0] = BASE;
}
for (j = 1; j < lennew + 1; j++)
directions[0][j] = NEW;

for (i = 1, baseend = base; i < origbaselen + 1; i++) {
for (j = 1, newend = new; j < lennew + 1; j++) {
if (match_string_spaces(baseend->line, baseend->len,
newend->line, newend->len, flags)) {
lcs[i][j] = lcs[i - 1][j - 1] + 1;
directions[i][j] = MATCH;
} else if (lcs[i][j - 1] >= lcs[i - 1][j]) {
lcs[i][j] = lcs[i][j - 1];
directions[i][j] = NEW;
} else {
lcs[i][j] = lcs[i - 1][j];
directions[i][j] = BASE;
}
if (newend->next)
newend = newend->next;
}
if (baseend->next)
baseend = baseend->next;
}

for (i = 0; i < origbaselen + 1; i++)
free(lcs[i]);
free(lcs);

/* At this point, baseend and newend point to the end of each lists */
i--;
j--;
while (i != 0 || j != 0) {
if (directions[i][j] == MATCH) {
baseend->parent_map |= 1<<parent;
baseend = baseend->prev;
newend = newend->prev;
i--;
j--;
} else if (directions[i][j] == NEW) {
struct lline *lline;

lline = newend;
/* Remove lline from new list and update newend */
if (lline->prev)
lline->prev->next = lline->next;
else
new = lline->next;
if (lline->next)
lline->next->prev = lline->prev;

newend = lline->prev;
j--;

/* Add lline to base list */
if (baseend) {
lline->next = baseend->next;
lline->prev = baseend;
if (lline->prev)
lline->prev->next = lline;
}
lline = lline->next;
else {
lline->next = base;
base = lline;
}
(*lenbase)++;

if (lline->next)
lline->next->prev = lline;

} else {
baseend = baseend->prev;
i--;
}
}

newend = new;
while (newend) {
struct lline *lline = newend;
newend = newend->next;
free(lline);
}

for (i = 0; i < origbaselen + 1; i++)
free(directions[i]);
free(directions);

return base;
}

static char *grab_blob(const unsigned char *sha1, unsigned int mode,
unsigned long *size, struct userdiff_driver *textconv,
const char *path)
{
char *blob;
enum object_type type;

if (S_ISGITLINK(mode)) {
blob = xmalloc(100);
*size = snprintf(blob, 100,
"Subproject commit %s\n", sha1_to_hex(sha1));
} else if (is_null_sha1(sha1)) {
/* deleted blob */
*size = 0;
return xcalloc(1, 1);
} else if (textconv) {
struct diff_filespec *df = alloc_filespec(path);
fill_filespec(df, sha1, 1, mode);
*size = fill_textconv(textconv, df, &blob);
free_filespec(df);
} else {
blob = read_sha1_file(sha1, &type, size);
if (type != OBJ_BLOB)
die("object '%s' is not a blob!", sha1_to_hex(sha1));
}
return blob;
}

static void append_lost(struct sline *sline, int n, const char *line, int len)
{
struct lline *lline;
unsigned long this_mask = (1UL<<n);
if (line[len-1] == '\n')
len--;

lline = xmalloc(sizeof(*lline) + len + 1);
lline->len = len;
lline->next = NULL;
lline->prev = sline->plost.lost_tail;
if (lline->prev)
lline->prev->next = lline;
else
sline->plost.lost_head = lline;
sline->plost.lost_tail = lline;
sline->plost.len++;
lline->parent_map = this_mask;
memcpy(lline->line, line, len);
lline->line[len] = 0;
*sline->lost_tail = lline;
sline->lost_tail = &lline->next;
sline->next_lost = NULL;
}

struct combine_diff_state {
Expand All @@ -203,7 +327,6 @@ struct combine_diff_state {
int n;
struct sline *sline;
struct sline *lost_bucket;
long flags;
};

static void consume_line(void *state_, char *line, unsigned long len)
Expand Down Expand Up @@ -236,14 +359,13 @@ static void consume_line(void *state_, char *line, unsigned long len)
xcalloc(state->num_parent,
sizeof(unsigned long));
state->sline[state->nb-1].p_lno[state->n] = state->ob;
state->lost_bucket->next_lost = state->lost_bucket->lost_head;
return;
}
if (!state->lost_bucket)
return; /* not in any hunk yet */
switch (line[0]) {
case '-':
append_lost(state->lost_bucket, state->n, line+1, len-1, state->flags);
append_lost(state->lost_bucket, state->n, line+1, len-1);
break;
case '+':
state->sline[state->lno-1].flag |= state->nmask;
Expand Down Expand Up @@ -276,7 +398,6 @@ static void combine_diff(const unsigned char *parent, unsigned int mode,
xpp.flags = flags;
memset(&xecfg, 0, sizeof(xecfg));
memset(&state, 0, sizeof(state));
state.flags = flags;
state.nmask = nmask;
state.sline = sline;
state.lno = 1;
Expand All @@ -298,8 +419,18 @@ static void combine_diff(const unsigned char *parent, unsigned int mode,
struct lline *ll;
sline[lno].p_lno[n] = p_lno;

/* Coalesce new lines */
if (sline[lno].plost.lost_head) {
struct sline *sl = &sline[lno];
sl->lost = coalesce_lines(sl->lost, &sl->lenlost,
sl->plost.lost_head,
sl->plost.len, n, flags);
sl->plost.lost_head = sl->plost.lost_tail = NULL;
sl->plost.len = 0;
}

/* How many lines would this sline advance the p_lno? */
ll = sline[lno].lost_head;
ll = sline[lno].lost;
while (ll) {
if (ll->parent_map & nmask)
p_lno++; /* '-' means parent had it */
Expand All @@ -319,7 +450,7 @@ static int interesting(struct sline *sline, unsigned long all_mask)
/* If some parents lost lines here, or if we have added to
* some parent, it is interesting.
*/
return ((sline->flag & all_mask) || sline->lost_head);
return ((sline->flag & all_mask) || sline->lost);
}

static unsigned long adjust_hunk_tail(struct sline *sline,
Expand Down Expand Up @@ -502,7 +633,7 @@ static int make_hunks(struct sline *sline, unsigned long cnt,
has_interesting = 0;
for (j = i; j < hunk_end && !has_interesting; j++) {
unsigned long this_diff = sline[j].flag & all_mask;
struct lline *ll = sline[j].lost_head;
struct lline *ll = sline[j].lost;
if (this_diff) {
/* This has some changes. Is it the
* same as others?
Expand Down Expand Up @@ -656,7 +787,7 @@ static void dump_sline(struct sline *sline, const char *line_prefix,
int j;
unsigned long p_mask;
struct sline *sl = &sline[lno++];
ll = (sl->flag & no_pre_delete) ? NULL : sl->lost_head;
ll = (sl->flag & no_pre_delete) ? NULL : sl->lost;
while (ll) {
printf("%s%s", line_prefix, c_old);
for (j = 0; j < num_parent; j++) {
Expand Down Expand Up @@ -707,7 +838,7 @@ static void reuse_combine_diff(struct sline *sline, unsigned long cnt,
jmask = (1UL<<j);

for (lno = 0; lno <= cnt; lno++) {
struct lline *ll = sline->lost_head;
struct lline *ll = sline->lost;
sline->p_lno[i] = sline->p_lno[j];
while (ll) {
if (ll->parent_map & jmask)
Expand Down Expand Up @@ -966,10 +1097,6 @@ static void show_patch_diff(struct combine_diff_path *elem, int num_parent,

sline = xcalloc(cnt+2, sizeof(*sline));
sline[0].bol = result;
for (lno = 0; lno <= cnt + 1; lno++) {
sline[lno].lost_tail = &sline[lno].lost_head;
sline[lno].flag = 0;
}
for (lno = 0, cp = result; cp < result + result_size; cp++) {
if (*cp == '\n') {
sline[lno].len = cp - sline[lno].bol;
Expand Down Expand Up @@ -1019,8 +1146,8 @@ static void show_patch_diff(struct combine_diff_path *elem, int num_parent,
free(result);

for (lno = 0; lno < cnt; lno++) {
if (sline[lno].lost_head) {
struct lline *ll = sline[lno].lost_head;
if (sline[lno].lost) {
struct lline *ll = sline[lno].lost;
while (ll) {
struct lline *tmp = ll;
ll = ll->next;
Expand Down
Loading

0 comments on commit 99d3206

Please sign in to comment.