Skip to content

Commit de6dda0

Browse files
committedJul 30, 2020
Merge branch 'sg/commit-graph-cleanups' into master
The changed-path Bloom filter is improved using ideas from an independent implementation. * sg/commit-graph-cleanups: commit-graph: simplify write_commit_graph_file() #2 commit-graph: simplify write_commit_graph_file() #1 commit-graph: simplify parse_commit_graph() #2 commit-graph: simplify parse_commit_graph() #1 commit-graph: clean up #includes diff.h: drop diff_tree_oid() & friends' return value commit-slab: add a function to deep free entries on the slab commit-graph-format.txt: all multi-byte numbers are in network byte order commit-graph: fix parsing the Chunk Lookup table tree-walk.c: don't match submodule entries for 'submod/anything'
2 parents 47ae905 + 7fbfe07 commit de6dda0

13 files changed

+117
-108
lines changed
 

‎Documentation/technical/commit-graph-format.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ the body into "chunks" and provide a binary lookup table at the beginning
3232
of the body. The header includes certain values, such as number of chunks
3333
and hash type.
3434

35-
All 4-byte numbers are in network order.
35+
All multi-byte numbers are in network byte order.
3636

3737
HEADER:
3838

‎commit-graph.c

+48-64
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
1-
#include "cache.h"
2-
#include "config.h"
3-
#include "dir.h"
41
#include "git-compat-util.h"
2+
#include "config.h"
53
#include "lockfile.h"
64
#include "pack.h"
75
#include "packfile.h"
@@ -285,8 +283,7 @@ struct commit_graph *parse_commit_graph(void *graph_map, size_t graph_size)
285283
const unsigned char *data, *chunk_lookup;
286284
uint32_t i;
287285
struct commit_graph *graph;
288-
uint64_t last_chunk_offset;
289-
uint32_t last_chunk_id;
286+
uint64_t next_chunk_offset;
290287
uint32_t graph_signature;
291288
unsigned char graph_version, hash_version;
292289

@@ -326,24 +323,26 @@ struct commit_graph *parse_commit_graph(void *graph_map, size_t graph_size)
326323
graph->data = graph_map;
327324
graph->data_len = graph_size;
328325

329-
last_chunk_id = 0;
330-
last_chunk_offset = 8;
326+
if (graph_size < GRAPH_HEADER_SIZE +
327+
(graph->num_chunks + 1) * GRAPH_CHUNKLOOKUP_WIDTH +
328+
GRAPH_FANOUT_SIZE + the_hash_algo->rawsz) {
329+
error(_("commit-graph file is too small to hold %u chunks"),
330+
graph->num_chunks);
331+
free(graph);
332+
return NULL;
333+
}
334+
331335
chunk_lookup = data + 8;
336+
next_chunk_offset = get_be64(chunk_lookup + 4);
332337
for (i = 0; i < graph->num_chunks; i++) {
333338
uint32_t chunk_id;
334-
uint64_t chunk_offset;
339+
uint64_t chunk_offset = next_chunk_offset;
335340
int chunk_repeated = 0;
336341

337-
if (data + graph_size - chunk_lookup <
338-
GRAPH_CHUNKLOOKUP_WIDTH) {
339-
error(_("commit-graph chunk lookup table entry missing; file may be incomplete"));
340-
goto free_and_return;
341-
}
342-
343342
chunk_id = get_be32(chunk_lookup + 0);
344-
chunk_offset = get_be64(chunk_lookup + 4);
345343

346344
chunk_lookup += GRAPH_CHUNKLOOKUP_WIDTH;
345+
next_chunk_offset = get_be64(chunk_lookup + 4);
347346

348347
if (chunk_offset > graph_size - the_hash_algo->rawsz) {
349348
error(_("commit-graph improper chunk offset %08x%08x"), (uint32_t)(chunk_offset >> 32),
@@ -362,8 +361,11 @@ struct commit_graph *parse_commit_graph(void *graph_map, size_t graph_size)
362361
case GRAPH_CHUNKID_OIDLOOKUP:
363362
if (graph->chunk_oid_lookup)
364363
chunk_repeated = 1;
365-
else
364+
else {
366365
graph->chunk_oid_lookup = data + chunk_offset;
366+
graph->num_commits = (next_chunk_offset - chunk_offset)
367+
/ graph->hash_len;
368+
}
367369
break;
368370

369371
case GRAPH_CHUNKID_DATA:
@@ -417,15 +419,6 @@ struct commit_graph *parse_commit_graph(void *graph_map, size_t graph_size)
417419
error(_("commit-graph chunk id %08x appears multiple times"), chunk_id);
418420
goto free_and_return;
419421
}
420-
421-
if (last_chunk_id == GRAPH_CHUNKID_OIDLOOKUP)
422-
{
423-
graph->num_commits = (chunk_offset - last_chunk_offset)
424-
/ graph->hash_len;
425-
}
426-
427-
last_chunk_id = chunk_id;
428-
last_chunk_offset = chunk_offset;
429422
}
430423

431424
if (graph->chunk_bloom_indexes && graph->chunk_bloom_data) {
@@ -1586,17 +1579,22 @@ static int write_graph_chunk_base(struct hashfile *f,
15861579
return 0;
15871580
}
15881581

1582+
struct chunk_info {
1583+
uint32_t id;
1584+
uint64_t size;
1585+
};
1586+
15891587
static int write_commit_graph_file(struct write_commit_graph_context *ctx)
15901588
{
15911589
uint32_t i;
15921590
int fd;
15931591
struct hashfile *f;
15941592
struct lock_file lk = LOCK_INIT;
1595-
uint32_t chunk_ids[MAX_NUM_CHUNKS + 1];
1596-
uint64_t chunk_offsets[MAX_NUM_CHUNKS + 1];
1593+
struct chunk_info chunks[MAX_NUM_CHUNKS + 1];
15971594
const unsigned hashsz = the_hash_algo->rawsz;
15981595
struct strbuf progress_title = STRBUF_INIT;
15991596
int num_chunks = 3;
1597+
uint64_t chunk_offset;
16001598
struct object_id file_hash;
16011599
const struct bloom_filter_settings bloom_settings = DEFAULT_BLOOM_FILTER_SETTINGS;
16021600

@@ -1644,51 +1642,34 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx)
16441642
f = hashfd(lk.tempfile->fd, lk.tempfile->filename.buf);
16451643
}
16461644

1647-
chunk_ids[0] = GRAPH_CHUNKID_OIDFANOUT;
1648-
chunk_ids[1] = GRAPH_CHUNKID_OIDLOOKUP;
1649-
chunk_ids[2] = GRAPH_CHUNKID_DATA;
1645+
chunks[0].id = GRAPH_CHUNKID_OIDFANOUT;
1646+
chunks[0].size = GRAPH_FANOUT_SIZE;
1647+
chunks[1].id = GRAPH_CHUNKID_OIDLOOKUP;
1648+
chunks[1].size = hashsz * ctx->commits.nr;
1649+
chunks[2].id = GRAPH_CHUNKID_DATA;
1650+
chunks[2].size = (hashsz + 16) * ctx->commits.nr;
16501651
if (ctx->num_extra_edges) {
1651-
chunk_ids[num_chunks] = GRAPH_CHUNKID_EXTRAEDGES;
1652+
chunks[num_chunks].id = GRAPH_CHUNKID_EXTRAEDGES;
1653+
chunks[num_chunks].size = 4 * ctx->num_extra_edges;
16521654
num_chunks++;
16531655
}
16541656
if (ctx->changed_paths) {
1655-
chunk_ids[num_chunks] = GRAPH_CHUNKID_BLOOMINDEXES;
1657+
chunks[num_chunks].id = GRAPH_CHUNKID_BLOOMINDEXES;
1658+
chunks[num_chunks].size = sizeof(uint32_t) * ctx->commits.nr;
16561659
num_chunks++;
1657-
chunk_ids[num_chunks] = GRAPH_CHUNKID_BLOOMDATA;
1660+
chunks[num_chunks].id = GRAPH_CHUNKID_BLOOMDATA;
1661+
chunks[num_chunks].size = sizeof(uint32_t) * 3
1662+
+ ctx->total_bloom_filter_data_size;
16581663
num_chunks++;
16591664
}
16601665
if (ctx->num_commit_graphs_after > 1) {
1661-
chunk_ids[num_chunks] = GRAPH_CHUNKID_BASE;
1666+
chunks[num_chunks].id = GRAPH_CHUNKID_BASE;
1667+
chunks[num_chunks].size = hashsz * (ctx->num_commit_graphs_after - 1);
16621668
num_chunks++;
16631669
}
16641670

1665-
chunk_ids[num_chunks] = 0;
1666-
1667-
chunk_offsets[0] = 8 + (num_chunks + 1) * GRAPH_CHUNKLOOKUP_WIDTH;
1668-
chunk_offsets[1] = chunk_offsets[0] + GRAPH_FANOUT_SIZE;
1669-
chunk_offsets[2] = chunk_offsets[1] + hashsz * ctx->commits.nr;
1670-
chunk_offsets[3] = chunk_offsets[2] + (hashsz + 16) * ctx->commits.nr;
1671-
1672-
num_chunks = 3;
1673-
if (ctx->num_extra_edges) {
1674-
chunk_offsets[num_chunks + 1] = chunk_offsets[num_chunks] +
1675-
4 * ctx->num_extra_edges;
1676-
num_chunks++;
1677-
}
1678-
if (ctx->changed_paths) {
1679-
chunk_offsets[num_chunks + 1] = chunk_offsets[num_chunks] +
1680-
sizeof(uint32_t) * ctx->commits.nr;
1681-
num_chunks++;
1682-
1683-
chunk_offsets[num_chunks + 1] = chunk_offsets[num_chunks] +
1684-
sizeof(uint32_t) * 3 + ctx->total_bloom_filter_data_size;
1685-
num_chunks++;
1686-
}
1687-
if (ctx->num_commit_graphs_after > 1) {
1688-
chunk_offsets[num_chunks + 1] = chunk_offsets[num_chunks] +
1689-
hashsz * (ctx->num_commit_graphs_after - 1);
1690-
num_chunks++;
1691-
}
1671+
chunks[num_chunks].id = 0;
1672+
chunks[num_chunks].size = 0;
16921673

16931674
hashwrite_be32(f, GRAPH_SIGNATURE);
16941675

@@ -1697,13 +1678,16 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx)
16971678
hashwrite_u8(f, num_chunks);
16981679
hashwrite_u8(f, ctx->num_commit_graphs_after - 1);
16991680

1681+
chunk_offset = 8 + (num_chunks + 1) * GRAPH_CHUNKLOOKUP_WIDTH;
17001682
for (i = 0; i <= num_chunks; i++) {
17011683
uint32_t chunk_write[3];
17021684

1703-
chunk_write[0] = htonl(chunk_ids[i]);
1704-
chunk_write[1] = htonl(chunk_offsets[i] >> 32);
1705-
chunk_write[2] = htonl(chunk_offsets[i] & 0xffffffff);
1685+
chunk_write[0] = htonl(chunks[i].id);
1686+
chunk_write[1] = htonl(chunk_offset >> 32);
1687+
chunk_write[2] = htonl(chunk_offset & 0xffffffff);
17061688
hashwrite(f, chunk_write, 12);
1689+
1690+
chunk_offset += chunks[i].size;
17071691
}
17081692

17091693
if (ctx->report_progress) {

‎commit-graph.h

+3-3
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,6 @@
22
#define COMMIT_GRAPH_H
33

44
#include "git-compat-util.h"
5-
#include "repository.h"
6-
#include "string-list.h"
7-
#include "cache.h"
85
#include "object-store.h"
96
#include "oidset.h"
107

@@ -23,6 +20,9 @@ void git_test_write_commit_graph_or_die(void);
2320

2421
struct commit;
2522
struct bloom_filter_settings;
23+
struct repository;
24+
struct raw_object_store;
25+
struct string_list;
2626

2727
char *get_commit_graph_filename(struct object_directory *odb);
2828
int open_commit_graph(const char *graph_file, int *fd, struct stat *st);

‎commit-slab-decl.h

+1
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ struct slabname { \
3232
void init_ ##slabname## _with_stride(struct slabname *s, unsigned stride); \
3333
void init_ ##slabname(struct slabname *s); \
3434
void clear_ ##slabname(struct slabname *s); \
35+
void deep_clear_ ##slabname(struct slabname *s, void (*free_fn)(elemtype *ptr)); \
3536
elemtype *slabname## _at_peek(struct slabname *s, const struct commit *c, int add_if_missing); \
3637
elemtype *slabname## _at(struct slabname *s, const struct commit *c); \
3738
elemtype *slabname## _peek(struct slabname *s, const struct commit *c)

‎commit-slab-impl.h

+13
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,19 @@ scope void clear_ ##slabname(struct slabname *s) \
3838
FREE_AND_NULL(s->slab); \
3939
} \
4040
\
41+
scope void deep_clear_ ##slabname(struct slabname *s, void (*free_fn)(elemtype *)) \
42+
{ \
43+
unsigned int i; \
44+
for (i = 0; i < s->slab_count; i++) { \
45+
unsigned int j; \
46+
if (!s->slab[i]) \
47+
continue; \
48+
for (j = 0; j < s->slab_size; j++) \
49+
free_fn(&s->slab[i][j * s->stride]); \
50+
} \
51+
clear_ ##slabname(s); \
52+
} \
53+
\
4154
scope elemtype *slabname## _at_peek(struct slabname *s, \
4255
const struct commit *c, \
4356
int add_if_missing) \

‎commit-slab.h

+10
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,16 @@
4747
*
4848
* Call this function before the slab falls out of scope to avoid
4949
* leaking memory.
50+
*
51+
* - void deep_clear_indegree(struct indegree *, void (*free_fn)(int*))
52+
*
53+
* Empties the slab, similar to clear_indegree(), but in addition it
54+
* calls the given 'free_fn' for each slab entry to release any
55+
* additional memory that might be owned by the entry (but not the
56+
* entry itself!).
57+
* Note that 'free_fn' might be called even for entries for which no
58+
* indegree_at() call has been made; in this case 'free_fn' is invoked
59+
* with a pointer to a zero-initialized location.
5060
*/
5161

5262
#define define_commit_slab(slabname, elemtype) \

‎diff.h

+5-5
Original file line numberDiff line numberDiff line change
@@ -431,11 +431,11 @@ struct combine_diff_path *diff_tree_paths(
431431
struct combine_diff_path *p, const struct object_id *oid,
432432
const struct object_id **parents_oid, int nparent,
433433
struct strbuf *base, struct diff_options *opt);
434-
int diff_tree_oid(const struct object_id *old_oid,
435-
const struct object_id *new_oid,
436-
const char *base, struct diff_options *opt);
437-
int diff_root_tree_oid(const struct object_id *new_oid, const char *base,
438-
struct diff_options *opt);
434+
void diff_tree_oid(const struct object_id *old_oid,
435+
const struct object_id *new_oid,
436+
const char *base, struct diff_options *opt);
437+
void diff_root_tree_oid(const struct object_id *new_oid, const char *base,
438+
struct diff_options *opt);
439439

440440
struct combine_diff_path {
441441
struct combine_diff_path *next;

‎revision.c

+3-6
Original file line numberDiff line numberDiff line change
@@ -791,9 +791,7 @@ static int rev_compare_tree(struct rev_info *revs,
791791

792792
tree_difference = REV_TREE_SAME;
793793
revs->pruning.flags.has_changes = 0;
794-
if (diff_tree_oid(&t1->object.oid, &t2->object.oid, "",
795-
&revs->pruning) < 0)
796-
return REV_TREE_DIFFERENT;
794+
diff_tree_oid(&t1->object.oid, &t2->object.oid, "", &revs->pruning);
797795

798796
if (!nth_parent)
799797
if (bloom_ret == 1 && tree_difference == REV_TREE_SAME)
@@ -804,17 +802,16 @@ static int rev_compare_tree(struct rev_info *revs,
804802

805803
static int rev_same_tree_as_empty(struct rev_info *revs, struct commit *commit)
806804
{
807-
int retval;
808805
struct tree *t1 = get_commit_tree(commit);
809806

810807
if (!t1)
811808
return 0;
812809

813810
tree_difference = REV_TREE_SAME;
814811
revs->pruning.flags.has_changes = 0;
815-
retval = diff_tree_oid(NULL, &t1->object.oid, "", &revs->pruning);
812+
diff_tree_oid(NULL, &t1->object.oid, "", &revs->pruning);
816813

817-
return retval >= 0 && (tree_difference == REV_TREE_SAME);
814+
return tree_difference == REV_TREE_SAME;
818815
}
819816

820817
struct treesame_state {

‎shallow.c

+5-9
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,10 @@ void rollback_shallow_file(struct repository *r, struct shallow_lock *lk)
110110
* supports a "valid" flag.
111111
*/
112112
define_commit_slab(commit_depth, int *);
113+
static void free_depth_in_slab(int **ptr)
114+
{
115+
FREE_AND_NULL(*ptr);
116+
}
113117
struct commit_list *get_shallow_commits(struct object_array *heads, int depth,
114118
int shallow_flag, int not_shallow_flag)
115119
{
@@ -176,15 +180,7 @@ struct commit_list *get_shallow_commits(struct object_array *heads, int depth,
176180
}
177181
}
178182
}
179-
for (i = 0; i < depths.slab_count; i++) {
180-
int j;
181-
182-
if (!depths.slab[i])
183-
continue;
184-
for (j = 0; j < depths.slab_size; j++)
185-
free(depths.slab[i][j]);
186-
}
187-
clear_commit_depth(&depths);
183+
deep_clear_commit_depth(&depths, free_depth_in_slab);
188184

189185
return result;
190186
}

‎t/t4010-diff-pathspec.sh

+3-1
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,9 @@ test_expect_success 'setup submodules' '
125125
test_expect_success 'diff-tree ignores trailing slash on submodule path' '
126126
git diff --name-only HEAD^ HEAD submod >expect &&
127127
git diff --name-only HEAD^ HEAD submod/ >actual &&
128-
test_cmp expect actual
128+
test_cmp expect actual &&
129+
git diff --name-only HEAD^ HEAD -- submod/whatever >actual &&
130+
test_must_be_empty actual
129131
'
130132

131133
test_expect_success 'diff multiple wildcard pathspecs' '

‎t/t5318-commit-graph.sh

+3-2
Original file line numberDiff line numberDiff line change
@@ -529,7 +529,7 @@ test_expect_success 'detect bad hash version' '
529529
'
530530

531531
test_expect_success 'detect low chunk count' '
532-
corrupt_graph_and_verify $GRAPH_BYTE_CHUNK_COUNT "\02" \
532+
corrupt_graph_and_verify $GRAPH_BYTE_CHUNK_COUNT "\01" \
533533
"missing the .* chunk"
534534
'
535535

@@ -615,7 +615,8 @@ test_expect_success 'detect invalid checksum hash' '
615615

616616
test_expect_success 'detect incorrect chunk count' '
617617
corrupt_graph_and_verify $GRAPH_BYTE_CHUNK_COUNT "\377" \
618-
"chunk lookup table entry missing" $GRAPH_CHUNK_LOOKUP_OFFSET
618+
"commit-graph file is too small to hold [0-9]* chunks" \
619+
$GRAPH_CHUNK_LOOKUP_OFFSET
619620
'
620621

621622
test_expect_success 'git fsck (checks commit-graph)' '

0 commit comments

Comments
 (0)