Skip to content

Commit c3a0282

Browse files
committed
Merge branch 'ds/line-log-on-bloom'
"git log -L..." now takes advantage of the "which paths are touched by this commit?" info stored in the commit-graph system. * ds/line-log-on-bloom: line-log: integrate with changed-path Bloom filters line-log: try to use generation number-based topo-ordering line-log: more responsive, incremental 'git log -L' t4211-line-log: add tests for parent oids line-log: remove unused fields from 'struct line_log_data'
2 parents 2051400 + f32dde8 commit c3a0282

File tree

6 files changed

+152
-13
lines changed

6 files changed

+152
-13
lines changed

bloom.c

+5
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,11 @@ void fill_bloom_key(const char *data,
138138
key->hashes[i] = hash0 + i * hash1;
139139
}
140140

141+
void clear_bloom_key(struct bloom_key *key)
142+
{
143+
FREE_AND_NULL(key->hashes);
144+
}
145+
141146
void add_key_to_filter(const struct bloom_key *key,
142147
struct bloom_filter *filter,
143148
const struct bloom_filter_settings *settings)

bloom.h

+1
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ void fill_bloom_key(const char *data,
7272
size_t len,
7373
struct bloom_key *key,
7474
const struct bloom_filter_settings *settings);
75+
void clear_bloom_key(struct bloom_key *key);
7576

7677
void add_key_to_filter(const struct bloom_key *key,
7778
struct bloom_filter *filter,

line-log.c

+40-3
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "userdiff.h"
1616
#include "line-log.h"
1717
#include "argv-array.h"
18+
#include "bloom.h"
1819

1920
static void range_set_grow(struct range_set *rs, size_t extra)
2021
{
@@ -1146,6 +1147,37 @@ int line_log_print(struct rev_info *rev, struct commit *commit)
11461147
return 1;
11471148
}
11481149

1150+
static int bloom_filter_check(struct rev_info *rev,
1151+
struct commit *commit,
1152+
struct line_log_data *range)
1153+
{
1154+
struct bloom_filter *filter;
1155+
struct bloom_key key;
1156+
int result = 0;
1157+
1158+
if (!commit->parents)
1159+
return 1;
1160+
1161+
if (!rev->bloom_filter_settings ||
1162+
!(filter = get_bloom_filter(rev->repo, commit, 0)))
1163+
return 1;
1164+
1165+
if (!range)
1166+
return 0;
1167+
1168+
while (!result && range) {
1169+
fill_bloom_key(range->path, strlen(range->path), &key, rev->bloom_filter_settings);
1170+
1171+
if (bloom_filter_contains(filter, &key, rev->bloom_filter_settings))
1172+
result = 1;
1173+
1174+
clear_bloom_key(&key);
1175+
range = range->next;
1176+
}
1177+
1178+
return result;
1179+
}
1180+
11491181
static int process_ranges_ordinary_commit(struct rev_info *rev, struct commit *commit,
11501182
struct line_log_data *range)
11511183
{
@@ -1159,6 +1191,7 @@ static int process_ranges_ordinary_commit(struct rev_info *rev, struct commit *c
11591191

11601192
queue_diffs(range, &rev->diffopt, &queue, commit, parent);
11611193
changed = process_all_files(&parent_range, rev, &queue, range);
1194+
11621195
if (parent)
11631196
add_line_range(rev, parent, parent_range);
11641197
free_line_log_data(parent_range);
@@ -1227,13 +1260,17 @@ static int process_ranges_merge_commit(struct rev_info *rev, struct commit *comm
12271260
/* NEEDSWORK leaking like a sieve */
12281261
}
12291262

1230-
static int process_ranges_arbitrary_commit(struct rev_info *rev, struct commit *commit)
1263+
int line_log_process_ranges_arbitrary_commit(struct rev_info *rev, struct commit *commit)
12311264
{
12321265
struct line_log_data *range = lookup_line_range(rev, commit);
12331266
int changed = 0;
12341267

12351268
if (range) {
1236-
if (!commit->parents || !commit->parents->next)
1269+
if (commit->parents && !bloom_filter_check(rev, commit, range)) {
1270+
struct line_log_data *prange = line_log_data_copy(range);
1271+
add_line_range(rev, commit->parents->item, prange);
1272+
clear_commit_line_range(rev, commit);
1273+
} else if (!commit->parents || !commit->parents->next)
12371274
changed = process_ranges_ordinary_commit(rev, commit, range);
12381275
else
12391276
changed = process_ranges_merge_commit(rev, commit, range);
@@ -1270,7 +1307,7 @@ int line_log_filter(struct rev_info *rev)
12701307
while (list) {
12711308
struct commit_list *to_free = NULL;
12721309
commit = list->item;
1273-
if (process_ranges_arbitrary_commit(rev, commit)) {
1310+
if (line_log_process_ranges_arbitrary_commit(rev, commit)) {
12741311
*pp = list;
12751312
pp = &list->next;
12761313
} else

line-log.h

+2-3
Original file line numberDiff line numberDiff line change
@@ -46,17 +46,16 @@ void sort_and_merge_range_set(struct range_set *);
4646
struct line_log_data {
4747
struct line_log_data *next;
4848
char *path;
49-
char status;
5049
struct range_set ranges;
51-
int arg_alloc, arg_nr;
52-
const char **args;
5350
struct diff_filepair *pair;
5451
struct diff_ranges diff;
5552
};
5653

5754
void line_log_init(struct rev_info *rev, const char *prefix, struct string_list *args);
5855

5956
int line_log_filter(struct rev_info *rev);
57+
int line_log_process_ranges_arbitrary_commit(struct rev_info *rev,
58+
struct commit *commit);
6059

6160
int line_log_print(struct rev_info *rev, struct commit *commit);
6261

revision.c

+36-7
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ static const char *term_good;
3939

4040
implement_shared_commit_slab(revision_sources, char *);
4141

42+
static inline int want_ancestry(const struct rev_info *revs);
43+
4244
void show_object_with_name(FILE *out, struct object *obj, const char *name)
4345
{
4446
const char *p;
@@ -687,6 +689,9 @@ static void prepare_to_use_bloom_filter(struct rev_info *revs)
687689
if (!revs->bloom_filter_settings)
688690
return;
689691

692+
if (!revs->pruning.pathspec.nr)
693+
return;
694+
690695
pi = &revs->pruning.pathspec.items[0];
691696
last_index = pi->len - 1;
692697

@@ -2810,6 +2815,12 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, struct s
28102815
if (revs->diffopt.objfind)
28112816
revs->simplify_history = 0;
28122817

2818+
if (revs->line_level_traverse) {
2819+
if (want_ancestry(revs))
2820+
revs->limited = 1;
2821+
revs->topo_order = 1;
2822+
}
2823+
28132824
if (revs->topo_order && !generation_numbers_enabled(the_repository))
28142825
revs->limited = 1;
28152826

@@ -2829,11 +2840,6 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, struct s
28292840

28302841
revs->diffopt.abbrev = revs->abbrev;
28312842

2832-
if (revs->line_level_traverse) {
2833-
revs->limited = 1;
2834-
revs->topo_order = 1;
2835-
}
2836-
28372843
diff_setup_done(&revs->diffopt);
28382844

28392845
grep_commit_pattern_type(GREP_PATTERN_TYPE_UNSPECIFIED,
@@ -3521,7 +3527,7 @@ int prepare_revision_walk(struct rev_info *revs)
35213527
FOR_EACH_OBJECT_PROMISOR_ONLY);
35223528
}
35233529

3524-
if (revs->pruning.pathspec.nr == 1 && !revs->reflog_info)
3530+
if (!revs->reflog_info)
35253531
prepare_to_use_bloom_filter(revs);
35263532
if (revs->no_walk != REVISION_WALK_NO_WALK_UNSORTED)
35273533
commit_list_sort_by_date(&revs->commits);
@@ -3534,7 +3540,14 @@ int prepare_revision_walk(struct rev_info *revs)
35343540
sort_in_topological_order(&revs->commits, revs->sort_order);
35353541
} else if (revs->topo_order)
35363542
init_topo_walk(revs);
3537-
if (revs->line_level_traverse)
3543+
if (revs->line_level_traverse && want_ancestry(revs))
3544+
/*
3545+
* At the moment we can only do line-level log with parent
3546+
* rewriting by performing this expensive pre-filtering step.
3547+
* If parent rewriting is not requested, then we rather
3548+
* perform the line-level log filtering during the regular
3549+
* history traversal.
3550+
*/
35383551
line_log_filter(revs);
35393552
if (revs->simplify_merges)
35403553
simplify_merges(revs);
@@ -3745,6 +3758,22 @@ enum commit_action get_commit_action(struct rev_info *revs, struct commit *commi
37453758
return commit_ignore;
37463759
if (commit->object.flags & UNINTERESTING)
37473760
return commit_ignore;
3761+
if (revs->line_level_traverse && !want_ancestry(revs)) {
3762+
/*
3763+
* In case of line-level log with parent rewriting
3764+
* prepare_revision_walk() already took care of all line-level
3765+
* log filtering, and there is nothing left to do here.
3766+
*
3767+
* If parent rewriting was not requested, then this is the
3768+
* place to perform the line-level log filtering. Notably,
3769+
* this check, though expensive, must come before the other,
3770+
* cheaper filtering conditions, because the tracked line
3771+
* ranges must be adjusted even when the commit will end up
3772+
* being ignored based on other conditions.
3773+
*/
3774+
if (!line_log_process_ranges_arbitrary_commit(revs, commit))
3775+
return commit_ignore;
3776+
}
37483777
if (revs->min_age != -1 &&
37493778
comparison_date(revs, commit) > revs->min_age)
37503779
return commit_ignore;

t/t4211-line-log.sh

+68
Original file line numberDiff line numberDiff line change
@@ -215,4 +215,72 @@ test_expect_success 'fancy rename following #2' '
215215
test_cmp expect actual
216216
'
217217

218+
# Create the following linear history, where each commit does what its
219+
# subject line promises:
220+
#
221+
# * 66c6410 Modify func2() in file.c
222+
# * 50834e5 Modify other-file
223+
# * fe5851c Modify func1() in file.c
224+
# * 8c7c7dd Add other-file
225+
# * d5f4417 Add func1() and func2() in file.c
226+
test_expect_success 'setup for checking line-log and parent oids' '
227+
git checkout --orphan parent-oids &&
228+
git reset --hard &&
229+
230+
cat >file.c <<-\EOF &&
231+
int func1()
232+
{
233+
return F1;
234+
}
235+
236+
int func2()
237+
{
238+
return F2;
239+
}
240+
EOF
241+
git add file.c &&
242+
test_tick &&
243+
git commit -m "Add func1() and func2() in file.c" &&
244+
245+
echo 1 >other-file &&
246+
git add other-file &&
247+
git commit -m "Add other-file" &&
248+
249+
sed -e "s/F1/F1 + 1/" file.c >tmp &&
250+
mv tmp file.c &&
251+
git commit -a -m "Modify func1() in file.c" &&
252+
253+
echo 2 >other-file &&
254+
git commit -a -m "Modify other-file" &&
255+
256+
sed -e "s/F2/F2 + 2/" file.c >tmp &&
257+
mv tmp file.c &&
258+
git commit -a -m "Modify func2() in file.c" &&
259+
260+
head_oid=$(git rev-parse --short HEAD) &&
261+
prev_oid=$(git rev-parse --short HEAD^) &&
262+
root_oid=$(git rev-parse --short HEAD~4)
263+
'
264+
265+
# Parent oid should be from immediate parent.
266+
test_expect_success 'parent oids without parent rewriting' '
267+
cat >expect <<-EOF &&
268+
$head_oid $prev_oid Modify func2() in file.c
269+
$root_oid Add func1() and func2() in file.c
270+
EOF
271+
git log --format="%h %p %s" --no-patch -L:func2:file.c >actual &&
272+
test_cmp expect actual
273+
'
274+
275+
# Parent oid should be from the most recent ancestor touching func2(),
276+
# i.e. in this case from the root commit.
277+
test_expect_success 'parent oids with parent rewriting' '
278+
cat >expect <<-EOF &&
279+
$head_oid $root_oid Modify func2() in file.c
280+
$root_oid Add func1() and func2() in file.c
281+
EOF
282+
git log --format="%h %p %s" --no-patch -L:func2:file.c --parents >actual &&
283+
test_cmp expect actual
284+
'
285+
218286
test_done

0 commit comments

Comments
 (0)