Skip to content

Commit

Permalink
commit-graph: reuse existing Bloom filters during write
Browse files Browse the repository at this point in the history
Add logic to
a) parse Bloom filter information from the commit graph file and,
b) re-use existing Bloom filters.

See Documentation/technical/commit-graph-format for the format in which
the Bloom filter information is written to the commit graph file.

To read Bloom filter for a given commit with lexicographic position
'i' we need to:
1. Read BIDX[i] which essentially gives us the starting index in BDAT for
   filter of commit i+1. It is essentially the index past the end
   of the filter of commit i. It is called end_index in the code.

2. For i>0, read BIDX[i-1] which will give us the starting index in BDAT
   for filter of commit i. It is called the start_index in the code.
   For the first commit, where i = 0, Bloom filter data starts at the
   beginning, just past the header in the BDAT chunk. Hence, start_index
   will be 0.

3. The length of the filter will be end_index - start_index, because
   BIDX[i] gives the cumulative 8-byte words including the ith
   commit's filter.

We toggle whether Bloom filters should be recomputed based on the
compute_if_not_present flag.

Helped-by: Derrick Stolee <[email protected]>
Signed-off-by: Garima Singh <[email protected]>
Signed-off-by: Junio C Hamano <[email protected]>
  • Loading branch information
garimasi514 authored and gitster committed Apr 6, 2020
1 parent 76ffbca commit 1217c03
Show file tree
Hide file tree
Showing 4 changed files with 55 additions and 6 deletions.
49 changes: 48 additions & 1 deletion bloom.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
#include "diffcore.h"
#include "revision.h"
#include "hashmap.h"
#include "commit-graph.h"
#include "commit.h"

define_commit_slab(bloom_filter_slab, struct bloom_filter);

Expand All @@ -26,6 +28,36 @@ static inline unsigned char get_bitmask(uint32_t pos)
return ((unsigned char)1) << (pos & (BITS_PER_WORD - 1));
}

static int load_bloom_filter_from_graph(struct commit_graph *g,
struct bloom_filter *filter,
struct commit *c)
{
uint32_t lex_pos, start_index, end_index;

while (c->graph_pos < g->num_commits_in_base)
g = g->base_graph;

/* The commit graph commit 'c' lives in doesn't carry bloom filters. */
if (!g->chunk_bloom_indexes)
return 0;

lex_pos = c->graph_pos - g->num_commits_in_base;

end_index = get_be32(g->chunk_bloom_indexes + 4 * lex_pos);

if (lex_pos > 0)
start_index = get_be32(g->chunk_bloom_indexes + 4 * (lex_pos - 1));
else
start_index = 0;

filter->len = end_index - start_index;
filter->data = (unsigned char *)(g->chunk_bloom_data +
sizeof(unsigned char) * start_index +
BLOOMDATA_CHUNK_HEADER_SIZE);

return 1;
}

/*
* Calculate the murmur3 32-bit hash value for the given data
* using the given seed.
Expand Down Expand Up @@ -127,7 +159,8 @@ void init_bloom_filters(void)
}

struct bloom_filter *get_bloom_filter(struct repository *r,
struct commit *c)
struct commit *c,
int compute_if_not_present)
{
struct bloom_filter *filter;
struct bloom_filter_settings settings = DEFAULT_BLOOM_FILTER_SETTINGS;
Expand All @@ -140,6 +173,20 @@ struct bloom_filter *get_bloom_filter(struct repository *r,

filter = bloom_filter_slab_at(&bloom_filters, c);

if (!filter->data) {
load_commit_graph_info(r, c);
if (c->graph_pos != COMMIT_NOT_FROM_GRAPH &&
r->objects->commit_graph->chunk_bloom_indexes) {
if (load_bloom_filter_from_graph(r->objects->commit_graph, filter, c))
return filter;
else
return NULL;
}
}

if (filter->data || !compute_if_not_present)
return filter;

repo_diff_setup(r, &diffopt);
diffopt.flags.recursive = 1;
diffopt.max_changes = max_changes;
Expand Down
4 changes: 3 additions & 1 deletion bloom.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ struct bloom_filter_settings {

#define DEFAULT_BLOOM_FILTER_SETTINGS { 1, 7, 10 }
#define BITS_PER_WORD 8
#define BLOOMDATA_CHUNK_HEADER_SIZE 3 * sizeof(uint32_t)

/*
* A bloom_filter struct represents a data segment to
Expand Down Expand Up @@ -79,6 +80,7 @@ void add_key_to_filter(const struct bloom_key *key,
void init_bloom_filters(void);

struct bloom_filter *get_bloom_filter(struct repository *r,
struct commit *c);
struct commit *c,
int compute_if_not_present);

#endif
6 changes: 3 additions & 3 deletions commit-graph.c
Original file line number Diff line number Diff line change
Expand Up @@ -1086,7 +1086,7 @@ static void write_graph_chunk_bloom_indexes(struct hashfile *f,
ctx->commits.nr);

while (list < last) {
struct bloom_filter *filter = get_bloom_filter(ctx->r, *list);
struct bloom_filter *filter = get_bloom_filter(ctx->r, *list, 0);
cur_pos += filter->len;
display_progress(progress, ++i);
hashwrite_be32(f, cur_pos);
Expand Down Expand Up @@ -1115,7 +1115,7 @@ static void write_graph_chunk_bloom_data(struct hashfile *f,
hashwrite_be32(f, settings->bits_per_entry);

while (list < last) {
struct bloom_filter *filter = get_bloom_filter(ctx->r, *list);
struct bloom_filter *filter = get_bloom_filter(ctx->r, *list, 0);
display_progress(progress, ++i);
hashwrite(f, filter->data, filter->len * sizeof(unsigned char));
list++;
Expand Down Expand Up @@ -1296,7 +1296,7 @@ static void compute_bloom_filters(struct write_commit_graph_context *ctx)

for (i = 0; i < ctx->commits.nr; i++) {
struct commit *c = sorted_commits[i];
struct bloom_filter *filter = get_bloom_filter(ctx->r, c);
struct bloom_filter *filter = get_bloom_filter(ctx->r, c, 1);
ctx->total_bloom_filter_data_size += sizeof(unsigned char) * filter->len;
display_progress(progress, i + 1);
}
Expand Down
2 changes: 1 addition & 1 deletion t/helper/test-bloom.c
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ static void get_bloom_filter_for_commit(const struct object_id *commit_oid)
struct bloom_filter *filter;
setup_git_directory();
c = lookup_commit(the_repository, commit_oid);
filter = get_bloom_filter(the_repository, c);
filter = get_bloom_filter(the_repository, c, 1);
print_bloom_filter(filter);
}

Expand Down

0 comments on commit 1217c03

Please sign in to comment.