Skip to content

Commit

Permalink
Optimize memory and CPU for building new Bloom filter (facebook#6175)
Browse files Browse the repository at this point in the history
Summary:
The filter bits builder collects all the hashes to add in memory before adding them (because the number of keys is not known until we've walked over all the keys). Existing code uses a std::vector for this, which can mean up to 2x than necessary space allocated (and not freed) and up to ~2x write amplification in memory. Using std::deque uses close to minimal space (for large filters, the only time it matters), no write amplification, frees memory while building, and no need for large contiguous memory area. The only cost is more calls to allocator, which does not appear to matter, at least in benchmark test.

For now, this change only applies to the new (format_version=5) Bloom filter implementation, to ease before-and-after comparison downstream.

Temporary memory use during build is about the only way the new Bloom filter could regress vs. the old (because of upgrade to 64-bit hash) and that should only matter for full filters. This change should largely mitigate that potential regression.
Pull Request resolved: facebook#6175

Test Plan:
Using filter_bench with -new_builder option and 6M keys per filter is like large full filter (improvement). 10k keys and no -new_builder is like partitioned filters (about the same). (Corresponding configurations run simultaneously on devserver.)

std::vector impl (before)

    $ /usr/bin/time -v ./filter_bench -impl=2 -quick -new_builder -working_mem_size_mb=1000 -
    average_keys_per_filter=6000000
    Build avg ns/key: 52.2027
    Maximum resident set size (kbytes): 1105016
    $ /usr/bin/time -v ./filter_bench -impl=2 -quick -working_mem_size_mb=1000 -
    average_keys_per_filter=10000
    Build avg ns/key: 30.5694
    Maximum resident set size (kbytes): 1208152

std::deque impl (after)

    $ /usr/bin/time -v ./filter_bench -impl=2 -quick -new_builder -working_mem_size_mb=1000 -
    average_keys_per_filter=6000000
    Build avg ns/key: 39.0697
    Maximum resident set size (kbytes): 1087196
    $ /usr/bin/time -v ./filter_bench -impl=2 -quick -working_mem_size_mb=1000 -
    average_keys_per_filter=10000
    Build avg ns/key: 30.9348
    Maximum resident set size (kbytes): 1207980

Differential Revision: D19053431

Pulled By: pdillinger

fbshipit-source-id: 2888e748723a19d9ea40403934f13cbb8483430c
  • Loading branch information
pdillinger authored and facebook-github-bot committed Dec 16, 2019
1 parent ad34fab commit a92bd0a
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 11 deletions.
17 changes: 11 additions & 6 deletions table/block_based/filter_policy.cc
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
// found in the LICENSE file. See the AUTHORS file for names of contributors.

#include <array>
#include <deque>

#include "rocksdb/filter_policy.h"

Expand Down Expand Up @@ -41,7 +42,7 @@ class FastLocalBloomBitsBuilder : public BuiltinFilterBitsBuilder {

virtual void AddKey(const Slice& key) override {
uint64_t hash = GetSliceHash64(key);
if (hash_entries_.size() == 0 || hash != hash_entries_.back()) {
if (hash_entries_.empty() || hash != hash_entries_.back()) {
hash_entries_.push_back(hash);
}
}
Expand Down Expand Up @@ -71,7 +72,7 @@ class FastLocalBloomBitsBuilder : public BuiltinFilterBitsBuilder {

const char* const_data = data;
buf->reset(const_data);
hash_entries_.clear();
assert(hash_entries_.empty());

return Slice(data, len_with_metadata);
}
Expand All @@ -92,7 +93,7 @@ class FastLocalBloomBitsBuilder : public BuiltinFilterBitsBuilder {
}

private:
void AddAllEntries(char* data, uint32_t len) const {
void AddAllEntries(char* data, uint32_t len) {
// Simple version without prefetching:
//
// for (auto h : hash_entries_) {
Expand All @@ -111,7 +112,8 @@ class FastLocalBloomBitsBuilder : public BuiltinFilterBitsBuilder {
// Prime the buffer
size_t i = 0;
for (; i <= kBufferMask && i < num_entries; ++i) {
uint64_t h = hash_entries_[i];
uint64_t h = hash_entries_.front();
hash_entries_.pop_front();
FastLocalBloomImpl::PrepareHash(Lower32of64(h), len, data,
/*out*/ &byte_offsets[i]);
hashes[i] = Upper32of64(h);
Expand All @@ -125,7 +127,8 @@ class FastLocalBloomBitsBuilder : public BuiltinFilterBitsBuilder {
FastLocalBloomImpl::AddHashPrepared(hash_ref, num_probes_,
data + byte_offset_ref);
// And buffer
uint64_t h = hash_entries_[i];
uint64_t h = hash_entries_.front();
hash_entries_.pop_front();
FastLocalBloomImpl::PrepareHash(Lower32of64(h), len, data,
/*out*/ &byte_offset_ref);
hash_ref = Upper32of64(h);
Expand All @@ -140,7 +143,9 @@ class FastLocalBloomBitsBuilder : public BuiltinFilterBitsBuilder {

int millibits_per_key_;
int num_probes_;
std::vector<uint64_t> hash_entries_;
// A deque avoids unnecessary copying of already-saved values
// and has near-minimal peak memory use.
std::deque<uint64_t> hash_entries_;
};

// See description in FastLocalBloomImpl
Expand Down
16 changes: 11 additions & 5 deletions util/filter_bench.cc
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,9 @@ DEFINE_bool(use_plain_table_bloom, false,
"Use PlainTableBloom structure and interface rather than "
"FilterBitsReader/FullFilterBlockReader");

DEFINE_bool(new_builder, false,
"Whether to create a new builder for each new filter");

DEFINE_uint32(impl, 0,
"Select filter implementation. Without -use_plain_table_bloom:"
"0 = full filter, 1 = block-based filter. With "
Expand Down Expand Up @@ -278,11 +281,6 @@ void FilterBench::Go() {
}
}

std::unique_ptr<FilterBitsBuilder> builder;
if (!FLAGS_use_plain_table_bloom && FLAGS_impl != 1) {
builder.reset(GetBuilder());
}

uint32_t variance_mask = 1;
while (variance_mask * variance_mask * 4 < FLAGS_average_keys_per_filter) {
variance_mask = variance_mask * 2 + 1;
Expand All @@ -300,6 +298,8 @@ void FilterBench::Go() {

std::cout << "Building..." << std::endl;

std::unique_ptr<FilterBitsBuilder> builder;

size_t total_memory_used = 0;
size_t total_keys_added = 0;

Expand All @@ -325,10 +325,16 @@ void FilterBench::Go() {
}
info.filter_ = info.plain_table_bloom_->GetRawData();
} else {
if (!builder) {
builder.reset(GetBuilder());
}
for (uint32_t i = 0; i < keys_to_add; ++i) {
builder->AddKey(kms_[0].Get(filter_id, i));
}
info.filter_ = builder->Finish(&info.owner_);
if (FLAGS_new_builder) {
builder.reset();
}
info.reader_.reset(
table_options_.filter_policy->GetFilterBitsReader(info.filter_));
CachableEntry<ParsedFullFilterBlock> block(
Expand Down

0 comments on commit a92bd0a

Please sign in to comment.