Skip to content

Commit

Permalink
Improve data block construction performance (facebook#9040)
Browse files Browse the repository at this point in the history
Summary:
... by bypassing tracking of last_key in BlockBuilder when
last_key is already known (for BlockBasedTableBuilder::data_block).

I tried extracting a base class of BlockBuilder without the last_key
tracking at all, but that became complicated by NewFlushBlockPolicy() in
the public API referencing BlockBuilder, which would need to be the base
class, and I don't want to replace nearly all the internal references to
BlockBuilder.

Possible follow-up:
* Investigate / consider using AddWithLastKey in more places

This improvement should stack with facebook#9039

Pull Request resolved: facebook#9040

Test Plan:
TEST_TMPDIR=/dev/shm/rocksdb1 ./db_bench -benchmarks=fillseq -memtablerep=vector -allow_concurrent_memtable_write=false -num=50000000
Compiled with DEBUG_LEVEL=0
Test vs. control runs simulaneous for better accuracy, units = ops/sec

Run 1: 278929 vs. 267799 (+4.2%)
Run 2: 281836 vs. 267432 (+5.4%)
Run 3: 278279 vs. 270454 (+2.9%)

(This benchmark is chosen to have detectable signal-to-noise, not to
represent expected improvement percent on real workloads.)

Reviewed By: mrambacher

Differential Revision: D31706033

Pulled By: pdillinger

fbshipit-source-id: 8a50fe6fefdd67b6d7665ffa687bbdcf5ad0d5ec
  • Loading branch information
pdillinger authored and facebook-github-bot committed Oct 19, 2021
1 parent 0534393 commit b234a3f
Show file tree
Hide file tree
Showing 4 changed files with 74 additions and 16 deletions.
3 changes: 3 additions & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@
* Add `file_temperature` to `IngestExternalFileArg` such that when ingesting SST files, we are able to indicate the temperature of the this batch of files.
* If `DB::Close()` failed with a non aborted status, calling `DB::Close()` again will return the original status instead of Status::OK.

### Performance Improvements
* Improved CPU efficiency of building block-based table (SST) files (#9039 and #9040).

## 6.25.0 (2021-09-20)
### Bug Fixes
* Allow secondary instance to refresh iterator. Assign read seq after referencing SuperVersion.
Expand Down
2 changes: 1 addition & 1 deletion table/block_based/block_based_table_builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -961,8 +961,8 @@ void BlockBasedTableBuilder::Add(const Slice& key, const Slice& value) {
}
}

r->data_block.AddWithLastKey(key, value, r->last_key);
r->last_key.assign(key.data(), key.size());
r->data_block.Add(key, value);
if (r->state == Rep::State::kBuffered) {
// Buffered keys will be replayed from data_block_buffers during
// `Finish()` once compression dictionary has been finalized.
Expand Down
62 changes: 47 additions & 15 deletions table/block_based/block_builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,9 @@ void BlockBuilder::Reset() {
if (data_block_hash_index_builder_.Valid()) {
data_block_hash_index_builder_.Reset();
}
#ifndef NDEBUG
add_with_last_key_called_ = false;
#endif
}

void BlockBuilder::SwapAndReset(std::string& buffer) {
Expand Down Expand Up @@ -138,33 +141,62 @@ Slice BlockBuilder::Finish() {

void BlockBuilder::Add(const Slice& key, const Slice& value,
const Slice* const delta_value) {
// Ensure no unsafe mixing of Add and AddWithLastKey
assert(!add_with_last_key_called_);

AddWithLastKeyImpl(key, value, last_key_, delta_value, buffer_.size());
if (use_delta_encoding_) {
// Update state
// We used to just copy the changed data, but it appears to be
// faster to just copy the whole thing.
last_key_.assign(key.data(), key.size());
}
}

void BlockBuilder::AddWithLastKey(const Slice& key, const Slice& value,
const Slice& last_key_param,
const Slice* const delta_value) {
// Ensure no unsafe mixing of Add and AddWithLastKey
assert(last_key_.empty());
#ifndef NDEBUG
add_with_last_key_called_ = false;
#endif

// Here we make sure to use an empty `last_key` on first call after creation
// or Reset. This is more convenient for the caller and we can be more
// clever inside BlockBuilder. On this hot code path, we want to avoid
// conditional jumps like `buffer_.empty() ? ... : ...` so we can use a
// fast min operation instead, with an assertion to be sure our logic is
// sound.
size_t buffer_size = buffer_.size();
size_t last_key_size = last_key_param.size();
assert(buffer_size == 0 || buffer_size >= last_key_size);

Slice last_key(last_key_param.data(), std::min(buffer_size, last_key_size));

AddWithLastKeyImpl(key, value, last_key, delta_value, buffer_size);
}

inline void BlockBuilder::AddWithLastKeyImpl(const Slice& key,
const Slice& value,
const Slice& last_key,
const Slice* const delta_value,
size_t buffer_size) {
assert(!finished_);
assert(counter_ <= block_restart_interval_);
assert(!use_value_delta_encoding_ || delta_value);
size_t shared = 0; // number of bytes shared with prev key
if (counter_ >= block_restart_interval_) {
// Restart compression
restarts_.push_back(static_cast<uint32_t>(buffer_.size()));
restarts_.push_back(static_cast<uint32_t>(buffer_size));
estimate_ += sizeof(uint32_t);
counter_ = 0;

if (use_delta_encoding_) {
// Update state
last_key_.assign(key.data(), key.size());
}
} else if (use_delta_encoding_) {
Slice last_key_piece(last_key_);
// See how much sharing to do with previous string
shared = key.difference_offset(last_key_piece);

// Update state
// We used to just copy the changed data here, but it appears to be
// faster to just copy the whole thing.
last_key_.assign(key.data(), key.size());
shared = key.difference_offset(last_key);
}

const size_t non_shared = key.size() - shared;
const size_t curr_size = buffer_.size();

if (use_value_delta_encoding_) {
// Add "<shared><non_shared>" to buffer_
Expand Down Expand Up @@ -194,7 +226,7 @@ void BlockBuilder::Add(const Slice& key, const Slice& value,
}

counter_++;
estimate_ += buffer_.size() - curr_size;
estimate_ += buffer_.size() - buffer_size;
}

} // namespace ROCKSDB_NAMESPACE
23 changes: 23 additions & 0 deletions table/block_based/block_builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,24 @@ class BlockBuilder {

// REQUIRES: Finish() has not been called since the last call to Reset().
// REQUIRES: key is larger than any previously added key
// DO NOT mix with AddWithLastKey() between Resets. For efficiency, use
// AddWithLastKey() in contexts where previous added key is already known
// and delta encoding might be used.
void Add(const Slice& key, const Slice& value,
const Slice* const delta_value = nullptr);

// A faster version of Add() if the previous key is already known for all
// Add()s.
// REQUIRES: Finish() has not been called since the last call to Reset().
// REQUIRES: key is larger than any previously added key
// REQUIRES: if AddWithLastKey has been called since last Reset(), last_key
// is the key from most recent AddWithLastKey. (For convenience, last_key
// is ignored on first call after creation or Reset().)
// DO NOT mix with Add() between Resets.
void AddWithLastKey(const Slice& key, const Slice& value,
const Slice& last_key,
const Slice* const delta_value = nullptr);

// Finish building the block and return a slice that refers to the
// block contents. The returned slice will remain valid for the
// lifetime of this builder or until Reset() is called.
Expand All @@ -60,6 +75,11 @@ class BlockBuilder {
bool empty() const { return buffer_.empty(); }

private:
inline void AddWithLastKeyImpl(const Slice& key, const Slice& value,
const Slice& last_key,
const Slice* const delta_value,
size_t buffer_size);

const int block_restart_interval_;
// TODO(myabandeh): put it into a separate IndexBlockBuilder
const bool use_delta_encoding_;
Expand All @@ -73,6 +93,9 @@ class BlockBuilder {
bool finished_; // Has Finish() been called?
std::string last_key_;
DataBlockHashIndexBuilder data_block_hash_index_builder_;
#ifndef NDEBUG
bool add_with_last_key_called_ = false;
#endif
};

} // namespace ROCKSDB_NAMESPACE

0 comments on commit b234a3f

Please sign in to comment.