Skip to content

Commit

Permalink
Improving condition for bottommost level during compaction
Browse files Browse the repository at this point in the history
Summary: The diff modifies the condition checked to determine the bottommost level during compaction. Previously, absence of files in higher levels alone was used as the condition. Now, the function additionally evaluates if the higher levels have files which have non-overlapping key ranges, then the level can be safely considered as the bottommost level.

Test Plan: Unit test cases added and passing. However, unit tests of universal compaction are failing as a result of the changes made in this diff. Need to understand why that is happening.

Reviewers: igor

Subscribers: dhruba, sdong, lgalanis, meyering

Differential Revision: https://reviews.facebook.net/D46473
  • Loading branch information
Mayank Pundir committed Sep 17, 2015
1 parent 9aca7cd commit a5e312a
Show file tree
Hide file tree
Showing 4 changed files with 203 additions and 3 deletions.
51 changes: 49 additions & 2 deletions db/compaction.cc
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,28 @@ void Compaction::SetInputVersion(Version* _input_version) {
edit_.SetColumnFamily(cfd_->GetID());
}

void Compaction::GetBoundaryKeys(
VersionStorageInfo* vstorage,
const std::vector<CompactionInputFiles>& inputs, Slice* smallest_user_key,
Slice* largest_user_key) {
bool initialized = false;
const Comparator* ucmp = vstorage->InternalComparator()->user_comparator();
for (uint32_t i = 0; i < inputs.size(); ++i) {
if (inputs[i].files.empty()) {
continue;
}
const Slice& start_user_key = inputs[i].files[0]->smallest.user_key();
if (!initialized || ucmp->Compare(start_user_key, *smallest_user_key) < 0) {
*smallest_user_key = start_user_key;
}
const Slice& end_user_key = inputs[i].files.back()->largest.user_key();
if (!initialized || ucmp->Compare(end_user_key, *largest_user_key) > 0) {
*largest_user_key = end_user_key;
}
initialized = true;
}
}

// helper function to determine if compaction is creating files at the
// bottommost level
bool Compaction::IsBottommostLevel(
Expand All @@ -50,15 +72,40 @@ bool Compaction::IsBottommostLevel(
return false;
}

// checks whether there are files living beyond the output_level.
Slice smallest_key, largest_key;
GetBoundaryKeys(vstorage, inputs, &smallest_key, &largest_key);

// Checks whether there are files living beyond the output_level.
// If lower levels have files, it checks for overlap between files
// if the compaction process and those files.
// Bottomlevel optimizations can be made if there are no files in
// lower levels or if there is no overlap with the files in
// the lower levels.
for (int i = output_level + 1; i < vstorage->num_levels(); i++) {
if (vstorage->NumLevelFiles(i) > 0) {
// It is not the bottommost level if there are files in higher
// levels when the output level is 0 or if there are files in
// higher levels which overlap with files to be compacted.
// output_level == 0 means that we want it to be considered
// s the bottommost level only if the last file on the level
// is a part of the files to be compacted - this is verified by
// the first if condition in this function
if (vstorage->NumLevelFiles(i) > 0 &&
(output_level == 0 ||
vstorage->OverlapInLevel(i, &smallest_key, &largest_key))) {
return false;
}
}
return true;
}

// test function to validate the functionality of IsBottommostLevel()
// function -- determines if compaction with inputs and storage is bottommost
bool Compaction::TEST_IsBottommostLevel(
int output_level, VersionStorageInfo* vstorage,
const std::vector<CompactionInputFiles>& inputs) {
return IsBottommostLevel(output_level, vstorage, inputs);
}

bool Compaction::IsFullCompaction(
VersionStorageInfo* vstorage,
const std::vector<CompactionInputFiles>& inputs) {
Expand Down
12 changes: 12 additions & 0 deletions db/compaction.h
Original file line number Diff line number Diff line change
Expand Up @@ -204,15 +204,27 @@ class Compaction {
// Should this compaction be broken up into smaller ones run in parallel?
bool ShouldFormSubcompactions() const;

// test function to validate the functionality of IsBottommostLevel()
// function -- determines if compaction with inputs and storage is bottommost
static bool TEST_IsBottommostLevel(
int output_level, VersionStorageInfo* vstorage,
const std::vector<CompactionInputFiles>& inputs);

private:
// mark (or clear) all files that are being compacted
void MarkFilesBeingCompacted(bool mark_as_compacted);

// get the smallest and largest key present in files to be compacted
static void GetBoundaryKeys(VersionStorageInfo* vstorage,
const std::vector<CompactionInputFiles>& inputs,
Slice* smallest_key, Slice* largest_key);

// helper function to determine if compaction with inputs and storage is
// bottommost
static bool IsBottommostLevel(
int output_level, VersionStorageInfo* vstorage,
const std::vector<CompactionInputFiles>& inputs);

static bool IsFullCompaction(VersionStorageInfo* vstorage,
const std::vector<CompactionInputFiles>& inputs);

Expand Down
8 changes: 7 additions & 1 deletion db/compaction_picker.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1322,7 +1322,13 @@ Compaction* UniversalCompactionPicker::PickCompaction(
&largest_seqno);
if (is_first) {
is_first = false;
} else {
} else if (prev_smallest_seqno > 0) {
// A level is considered as the bottommost level if there are
// no files in higher levels or if files in higher levels do
// not overlap with the files being compacted. Sequence numbers
// of files in bottommost level can be set to 0 to help
// compression. As a result, the following assert may not hold
// if the prev_smallest_seqno is 0.
assert(prev_smallest_seqno > largest_seqno);
}
prev_smallest_seqno = smallest_seqno;
Expand Down
135 changes: 135 additions & 0 deletions db/compaction_picker_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.

#include "db/compaction.h"
#include "db/compaction_picker.h"
#include <limits>
#include <string>
Expand Down Expand Up @@ -35,6 +36,9 @@ class CompactionPickerTest : public testing::Test {
CompactionOptionsFIFO fifo_options_;
std::unique_ptr<VersionStorageInfo> vstorage_;
std::vector<std::unique_ptr<FileMetaData>> files_;
// input files to compaction process.
std::vector<CompactionInputFiles> input_files_;
int compaction_level_start_;

CompactionPickerTest()
: ucmp_(BytewiseComparator()),
Expand Down Expand Up @@ -66,6 +70,7 @@ class CompactionPickerTest : public testing::Test {
void DeleteVersionStorage() {
vstorage_.reset();
files_.clear();
input_files_.clear();
}

void Add(int level, uint32_t file_number, const char* smallest,
Expand All @@ -85,6 +90,31 @@ class CompactionPickerTest : public testing::Test {
files_.emplace_back(f);
}

void setCompactionInputFilesLevels(int level_count, int start_level) {
input_files_.resize(level_count);
for (int i = 0; i < level_count; ++i) {
input_files_[i].level = start_level + i;
}
compaction_level_start_ = start_level;
}

void AddToCompactionFiles(int level, uint32_t file_number,
const char* smallest, const char* largest,
uint64_t file_size = 0, uint32_t path_id = 0,
SequenceNumber smallest_seq = 100,
SequenceNumber largest_seq = 100) {
assert(level < vstorage_->num_levels());
FileMetaData* f = new FileMetaData;
f->fd = FileDescriptor(file_number, path_id, file_size);
f->smallest = InternalKey(smallest, smallest_seq, kTypeValue);
f->largest = InternalKey(largest, largest_seq, kTypeValue);
f->smallest_seqno = smallest_seq;
f->largest_seqno = largest_seq;
f->compensated_file_size = file_size;
f->refs = 0;
input_files_[level - compaction_level_start_].files.emplace_back(f);
}

void UpdateVersionStorageInfo() {
vstorage_->CalculateBaseBytes(ioptions_, mutable_cf_options_);
vstorage_->UpdateFilesBySize();
Expand Down Expand Up @@ -637,6 +667,111 @@ TEST_F(CompactionPickerTest, EstimateCompactionBytesNeededDynamicLevel) {
vstorage_->estimated_compaction_needed_bytes());
}

TEST_F(CompactionPickerTest, IsBottommostLevelTest) {
// case 1: Higher levels are empty
NewVersionStorage(6, kCompactionStyleLevel);
Add(0, 1U, "a", "c");
Add(0, 2U, "y", "z");
Add(1, 3U, "d", "e");
Add(1, 4U, "l", "p");
Add(2, 5U, "g", "i");
Add(2, 6U, "x", "z");
UpdateVersionStorageInfo();
setCompactionInputFilesLevels(2, 1);
AddToCompactionFiles(1, 3U, "d", "e");
AddToCompactionFiles(2, 5U, "g", "i");
bool result =
Compaction::TEST_IsBottommostLevel(2, vstorage_.get(), input_files_);
ASSERT_TRUE(result);

// case 2: Higher levels have no overlap
DeleteVersionStorage();
NewVersionStorage(6, kCompactionStyleLevel);
Add(0, 1U, "a", "c");
Add(0, 2U, "y", "z");
Add(1, 3U, "d", "e");
Add(1, 4U, "l", "p");
Add(2, 5U, "g", "i");
Add(2, 6U, "x", "z");
Add(3, 7U, "k", "p");
Add(3, 8U, "t", "w");
Add(4, 9U, "a", "b");
Add(5, 10U, "c", "cc");
UpdateVersionStorageInfo();
setCompactionInputFilesLevels(2, 1);
AddToCompactionFiles(1, 3U, "d", "e");
AddToCompactionFiles(2, 5U, "g", "i");
result = Compaction::TEST_IsBottommostLevel(2, vstorage_.get(), input_files_);
ASSERT_TRUE(result);

// case 3.1: Higher levels (level 3) have overlap
DeleteVersionStorage();
NewVersionStorage(6, kCompactionStyleLevel);
Add(0, 1U, "a", "c");
Add(0, 2U, "y", "z");
Add(1, 3U, "d", "e");
Add(1, 4U, "l", "p");
Add(2, 5U, "g", "i");
Add(2, 6U, "x", "z");
Add(3, 7U, "e", "g");
Add(3, 8U, "h", "k");
Add(4, 9U, "a", "b");
Add(5, 10U, "c", "cc");
UpdateVersionStorageInfo();
setCompactionInputFilesLevels(2, 1);
AddToCompactionFiles(1, 3U, "d", "e");
AddToCompactionFiles(2, 5U, "g", "i");
result = Compaction::TEST_IsBottommostLevel(2, vstorage_.get(), input_files_);
ASSERT_FALSE(result);

// case 3.1: Higher levels (level 5) have overlap
DeleteVersionStorage();
NewVersionStorage(6, kCompactionStyleLevel);
Add(0, 1U, "a", "c");
Add(0, 2U, "y", "z");
Add(1, 3U, "d", "e");
Add(1, 4U, "l", "p");
Add(2, 5U, "g", "i");
Add(2, 6U, "x", "z");
Add(3, 7U, "j", "k");
Add(3, 8U, "l", "m");
Add(4, 9U, "a", "b");
Add(5, 10U, "c", "cc");
Add(5, 11U, "h", "k");
Add(5, 12U, "y", "yy");
Add(5, 13U, "z", "zz");
UpdateVersionStorageInfo();
setCompactionInputFilesLevels(2, 1);
AddToCompactionFiles(1, 3U, "d", "i");
AddToCompactionFiles(2, 5U, "g", "i");
result = Compaction::TEST_IsBottommostLevel(2, vstorage_.get(), input_files_);
ASSERT_FALSE(result);

// case 3.1: Higher levels (level 5) have overlap
DeleteVersionStorage();
NewVersionStorage(6, kCompactionStyleLevel);
Add(0, 1U, "a", "c");
Add(0, 2U, "y", "z");
Add(1, 3U, "d", "e");
Add(1, 4U, "l", "p");
Add(2, 5U, "g", "i");
Add(2, 6U, "x", "z");
Add(3, 7U, "j", "k");
Add(3, 8U, "l", "m");
Add(4, 9U, "a", "b");
Add(5, 10U, "c", "cc");
Add(5, 11U, "ccc", "d");
Add(5, 12U, "y", "yy");
Add(5, 13U, "z", "zz");
UpdateVersionStorageInfo();
setCompactionInputFilesLevels(2, 1);
AddToCompactionFiles(1, 3U, "d", "i");
AddToCompactionFiles(2, 5U, "g", "i");
result = Compaction::TEST_IsBottommostLevel(2, vstorage_.get(), input_files_);
ASSERT_FALSE(result);
DeleteVersionStorage();
}

} // namespace rocksdb

int main(int argc, char** argv) {
Expand Down

0 comments on commit a5e312a

Please sign in to comment.