Skip to content

Commit

Permalink
DataBlockHashIndex: Remove the division from EstimateSize() (facebook…
Browse files Browse the repository at this point in the history
…#4293)

Summary:
`BlockBasedTableBuilder::Add()` eventually calls
`DataBlockHashIndexBuilder::EstimateSize()`. The previous implementation
divides the `num_keys` by the `util_ratio_` to get an estimizted `num_buckets`.
Such division is expensive as it happens in every
`BlockBasedTableBuilder::Add()`.

This diff estimates the `num_buckets` by double addition instead of double
division. Specifically, in each `Add()`, we add `bucket_per_key_`
(inverse of `util_ratio_`) to the current `estimiated_num_buckets_`. The cost is
that we are gonna have the `estimated_num_buckets_` stored as one extra field
in the DataBlockHashIndexBuilder.
Pull Request resolved: facebook#4293

Differential Revision: D9412472

Pulled By: fgwu

fbshipit-source-id: 2925c8509a401e7bd3c1ab1d9e9c7244755c277a
  • Loading branch information
fgwu authored and facebook-github-bot committed Aug 21, 2018
1 parent 7188bd3 commit 6d37fdb
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 10 deletions.
1 change: 0 additions & 1 deletion table/block_builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,6 @@ size_t BlockBuilder::EstimateSizeAfterKV(const Slice& key, const Slice& value)
estimate += VarintLength(value.size()); // varint for value length.
}

// TODO(fwu): add the delta of the DataBlockHashIndex
return estimate;
}

Expand Down
8 changes: 5 additions & 3 deletions table/data_block_hash_index.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,13 @@ void DataBlockHashIndexBuilder::Add(const Slice& key,
uint32_t hash_value = GetSliceHash(key);
hash_and_restart_pairs_.emplace_back(hash_value,
static_cast<uint8_t>(restart_index));
estimated_num_buckets_ += bucket_per_key_;
}

void DataBlockHashIndexBuilder::Finish(std::string& buffer) {
assert(Valid());
uint16_t num_buckets = static_cast<uint16_t>(
static_cast<double>(hash_and_restart_pairs_.size()) / util_ratio_);
uint16_t num_buckets = static_cast<uint16_t>(estimated_num_buckets_);

if (num_buckets == 0) {
num_buckets = 1; // sanity check
}
Expand Down Expand Up @@ -66,8 +67,9 @@ void DataBlockHashIndexBuilder::Finish(std::string& buffer) {
}

void DataBlockHashIndexBuilder::Reset() {
hash_and_restart_pairs_.clear();
estimated_num_buckets_ = 0;
valid_ = true;
hash_and_restart_pairs_.clear();
}

void DataBlockHashIndex::Initialize(const char* data, uint16_t size,
Expand Down
16 changes: 10 additions & 6 deletions table/data_block_hash_index.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,23 +72,26 @@ const double kDefaultUtilRatio = 0.75;

class DataBlockHashIndexBuilder {
public:
DataBlockHashIndexBuilder() : util_ratio_(0), valid_(false) {}
DataBlockHashIndexBuilder()
: bucket_per_key_(-1 /*uninitialized marker*/),
estimated_num_buckets_(0),
valid_(false) {}

void Initialize(double util_ratio) {
if (util_ratio <= 0) {
util_ratio = kDefaultUtilRatio; // sanity check
}
util_ratio_ = util_ratio;
bucket_per_key_ = 1 / util_ratio;
valid_ = true;
}

inline bool Valid() const { return valid_ && util_ratio_ > 0; }
inline bool Valid() const { return valid_ && bucket_per_key_ > 0; }
void Add(const Slice& key, const size_t restart_index);
void Finish(std::string& buffer);
void Reset();
inline size_t EstimateSize() const {
uint16_t estimated_num_buckets = static_cast<uint16_t>(
static_cast<double>(hash_and_restart_pairs_.size()) / util_ratio_);
uint16_t estimated_num_buckets =
static_cast<uint16_t>(estimated_num_buckets_);

// Maching the num_buckets number in DataBlockHashIndexBuilder::Finish.
estimated_num_buckets |= 1;
Expand All @@ -98,7 +101,8 @@ class DataBlockHashIndexBuilder {
}

private:
double util_ratio_;
double bucket_per_key_; // is the multiplicative inverse of util_ratio_
double estimated_num_buckets_;

// Now the only usage for `valid_` is to mark false when the inserted
// restart_index is larger than supported. In this case HashIndex is not
Expand Down

0 comments on commit 6d37fdb

Please sign in to comment.