Skip to content

Commit

Permalink
Properly determine a truncated CompactRange stop key (facebook#4496)
Browse files Browse the repository at this point in the history
Summary:
When a CompactRange() call for a level is truncated before the end key
is reached, because it exceeds max_compaction_bytes, we need to properly
set the compaction_end parameter to indicate the stop key. The next
CompactRange will use that as the begin key. We set it to the smallest
key of the next file in the level after expanding inputs to get a clean
cut.

Previously, we were setting it before expanding inputs. So we could end
up recompacting some files. In a pathological case, where a single key
has many entries spanning all the files in the level (possibly due to
merge operands without a partial merge operator, thus resulting in
compaction output identical to the input), this would result in
an endless loop over the same set of files.
Pull Request resolved: facebook#4496

Differential Revision: D10395026

Pulled By: anand1976

fbshipit-source-id: f0c2f89fee29b4b3be53b6467b53abba8e9146a9
  • Loading branch information
anand1976 authored and facebook-github-bot committed Oct 16, 2018
1 parent e633983 commit 1e38458
Show file tree
Hide file tree
Showing 6 changed files with 88 additions and 15 deletions.
1 change: 1 addition & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
### Bug Fixes
* Fix corner case where a write group leader blocked due to write stall blocks other writers in queue with WriteOptions::no_slowdown set.
* Fix in-memory range tombstone truncation to avoid erroneously covering newer keys at a lower level, and include range tombstones in compacted files whose largest key is the range tombstone's start key.
* Properly set the stop key for a truncated manual CompactRange
* Fix slow flush/compaction when DB contains many snapshots. The problem became noticeable to us in DBs with 100,000+ snapshots, though it will affect others at different thresholds.

## 5.17.0 (10/05/2018)
Expand Down
16 changes: 11 additions & 5 deletions db/compaction_picker.cc
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,8 @@ void CompactionPicker::GetRange(const std::vector<CompactionInputFiles>& inputs,

bool CompactionPicker::ExpandInputsToCleanCut(const std::string& /*cf_name*/,
VersionStorageInfo* vstorage,
CompactionInputFiles* inputs) {
CompactionInputFiles* inputs,
InternalKey** next_smallest) {
// This isn't good compaction
assert(!inputs->empty());

Expand All @@ -242,7 +243,8 @@ bool CompactionPicker::ExpandInputsToCleanCut(const std::string& /*cf_name*/,
GetRange(*inputs, &smallest, &largest);
inputs->clear();
vstorage->GetOverlappingInputs(level, &smallest, &largest, &inputs->files,
hint_index, &hint_index);
hint_index, &hint_index, true,
next_smallest);
} while (inputs->size() > old_size);

// we started off with inputs non-empty and the previous loop only grew
Expand Down Expand Up @@ -649,7 +651,6 @@ Compaction* CompactionPicker::CompactRange(
uint64_t s = inputs[i]->compensated_file_size;
total += s;
if (total >= limit) {
**compaction_end = inputs[i + 1]->smallest;
covering_the_whole_range = false;
inputs.files.resize(i + 1);
break;
Expand All @@ -658,16 +659,21 @@ Compaction* CompactionPicker::CompactRange(
}
assert(output_path_id < static_cast<uint32_t>(ioptions_.cf_paths.size()));

if (ExpandInputsToCleanCut(cf_name, vstorage, &inputs) == false) {
InternalKey key_storage;
InternalKey* next_smallest = &key_storage;
if (ExpandInputsToCleanCut(cf_name, vstorage, &inputs, &next_smallest) ==
false) {
// manual compaction is now multi-threaded, so it can
// happen that ExpandWhileOverlapping fails
// we handle it higher in RunManualCompaction
*manual_conflict = true;
return nullptr;
}

if (covering_the_whole_range) {
if (covering_the_whole_range || !next_smallest) {
*compaction_end = nullptr;
} else {
**compaction_end = *next_smallest;
}

CompactionInputFiles output_level_inputs;
Expand Down
3 changes: 2 additions & 1 deletion db/compaction_picker.h
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,8 @@ class CompactionPicker {
// Will return false if it is impossible to apply this compaction.
bool ExpandInputsToCleanCut(const std::string& cf_name,
VersionStorageInfo* vstorage,
CompactionInputFiles* inputs);
CompactionInputFiles* inputs,
InternalKey** next_smallest = nullptr);

// Returns true if any one of the parent files are being compacted
bool IsRangeInCompaction(VersionStorageInfo* vstorage,
Expand Down
44 changes: 44 additions & 0 deletions db/db_compaction_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3961,6 +3961,50 @@ INSTANTIATE_TEST_CASE_P(
CompactionPri::kOldestSmallestSeqFirst,
CompactionPri::kMinOverlappingRatio));

class NoopMergeOperator : public MergeOperator {
public:
NoopMergeOperator() {}

virtual bool FullMergeV2(const MergeOperationInput& /*merge_in*/,
MergeOperationOutput* merge_out) const override {
std::string val("bar");
merge_out->new_value = val;
return true;
}

virtual const char* Name() const override { return "Noop"; }
};

TEST_F(DBCompactionTest, PartialManualCompaction) {
Options opts = CurrentOptions();
opts.num_levels = 3;
opts.level0_file_num_compaction_trigger = 10;
opts.compression = kNoCompression;
opts.merge_operator.reset(new NoopMergeOperator());
opts.target_file_size_base = 10240;
DestroyAndReopen(opts);

Random rnd(301);
for (auto i = 0; i < 8; ++i) {
for (auto j = 0; j < 10; ++j) {
Merge("foo", RandomString(&rnd, 1024));
}
Flush();
}

MoveFilesToLevel(2);

std::string prop;
EXPECT_TRUE(dbfull()->GetProperty(DB::Properties::kLiveSstFilesSize, &prop));
uint64_t max_compaction_bytes = atoi(prop.c_str()) / 2;
ASSERT_OK(dbfull()->SetOptions(
{{"max_compaction_bytes", std::to_string(max_compaction_bytes)}}));

CompactRangeOptions cro;
cro.bottommost_level_compaction = BottommostLevelCompaction::kForce;
dbfull()->CompactRange(cro, nullptr, nullptr);
}

#endif // !defined(ROCKSDB_LITE)
} // namespace rocksdb

Expand Down
26 changes: 22 additions & 4 deletions db/version_set.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2046,7 +2046,7 @@ bool VersionStorageInfo::OverlapInLevel(int level,
void VersionStorageInfo::GetOverlappingInputs(
int level, const InternalKey* begin, const InternalKey* end,
std::vector<FileMetaData*>* inputs, int hint_index, int* file_index,
bool expand_range) const {
bool expand_range, InternalKey** next_smallest) const {
if (level >= num_non_empty_levels_) {
// this level is empty, no overlapping inputs
return;
Expand All @@ -2058,11 +2058,17 @@ void VersionStorageInfo::GetOverlappingInputs(
}
const Comparator* user_cmp = user_comparator_;
if (level > 0) {
GetOverlappingInputsRangeBinarySearch(level, begin, end, inputs,
hint_index, file_index);
GetOverlappingInputsRangeBinarySearch(level, begin, end, inputs, hint_index,
file_index, false, next_smallest);
return;
}

if (next_smallest) {
// next_smallest key only makes sense for non-level 0, where files are
// non-overlapping
*next_smallest = nullptr;
}

Slice user_begin, user_end;
if (begin != nullptr) {
user_begin = begin->user_key();
Expand Down Expand Up @@ -2162,7 +2168,7 @@ void VersionStorageInfo::GetCleanInputsWithinInterval(
void VersionStorageInfo::GetOverlappingInputsRangeBinarySearch(
int level, const InternalKey* begin, const InternalKey* end,
std::vector<FileMetaData*>* inputs, int hint_index, int* file_index,
bool within_interval) const {
bool within_interval, InternalKey** next_smallest) const {
assert(level > 0);
int min = 0;
int mid = 0;
Expand Down Expand Up @@ -2198,6 +2204,9 @@ void VersionStorageInfo::GetOverlappingInputsRangeBinarySearch(

// If there were no overlapping files, return immediately.
if (!foundOverlap) {
if (next_smallest) {
next_smallest = nullptr;
}
return;
}
// returns the index where an overlap is found
Expand All @@ -2218,6 +2227,15 @@ void VersionStorageInfo::GetOverlappingInputsRangeBinarySearch(
for (int i = start_index; i <= end_index; i++) {
inputs->push_back(files_[level][i]);
}

if (next_smallest != nullptr) {
// Provide the next key outside the range covered by inputs
if (++end_index < static_cast<int>(files_[level].size())) {
**next_smallest = files_[level][end_index]->smallest;
} else {
*next_smallest = nullptr;
}
}
}

// Store in *start_index and *end_index the range of all files in
Expand Down
13 changes: 8 additions & 5 deletions db/version_set.h
Original file line number Diff line number Diff line change
Expand Up @@ -188,9 +188,11 @@ class VersionStorageInfo {
std::vector<FileMetaData*>* inputs,
int hint_index = -1, // index of overlap file
int* file_index = nullptr, // return index of overlap file
bool expand_range = true) // if set, returns files which overlap the
const; // range and overlap each other. If false,
bool expand_range = true, // if set, returns files which overlap the
// range and overlap each other. If false,
// then just files intersecting the range
InternalKey** next_smallest = nullptr) // if non-null, returns the
const; // smallest key of next file not included
void GetCleanInputsWithinInterval(
int level, const InternalKey* begin, // nullptr means before all keys
const InternalKey* end, // nullptr means after all keys
Expand All @@ -200,14 +202,15 @@ class VersionStorageInfo {
const;

void GetOverlappingInputsRangeBinarySearch(
int level, // level > 0
int level, // level > 0
const InternalKey* begin, // nullptr means before all keys
const InternalKey* end, // nullptr means after all keys
std::vector<FileMetaData*>* inputs,
int hint_index, // index of overlap file
int* file_index, // return index of overlap file
bool within_interval = false) // if set, force the inputs within interval
const;
bool within_interval = false, // if set, force the inputs within interval
InternalKey** next_smallest = nullptr) // if non-null, returns the
const; // smallest key of next file not included

void ExtendFileRangeOverlappingInterval(
int level,
Expand Down

0 comments on commit 1e38458

Please sign in to comment.