Skip to content

Commit

Permalink
Reuse file iterators in tailing iterator when memtable is flushed
Browse files Browse the repository at this point in the history
Summary:
Under a tailing workload, there were increased block cache
misses when a memtable was flushed because we were rebuilding iterators
in that case since the version set changed. This was exacerbated in the
case of iterate_upper_bound, since file iterators which were over the
iterate_upper_bound would have been deleted and are now brought back as
part of the Rebuild, only to be deleted again. We now renew the iterators
and only build iterators for files which are added and delete file
iterators for files which are deleted.
Refer to https://reviews.facebook.net/D50463 for previous version

Test Plan: DBTestTailingIterator.TailingIteratorTrimSeekToNext

Reviewers: anthony, IslamAbdelRahman, igor, tnovak, yhchiang, sdong

Reviewed By: sdong

Subscribers: yhchiang, march, dhruba, leveldb, lovro

Differential Revision: https://reviews.facebook.net/D50679
  • Loading branch information
rven1 committed Nov 13, 2015
1 parent 2ae4d7d commit 7824444
Show file tree
Hide file tree
Showing 7 changed files with 501 additions and 28 deletions.
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,7 @@ target_link_libraries(rocksdb${ARTIFACT_SUFFIX} ${LIBS})

set(APPS
db/db_bench.cc
db/forward_iterator_bench.cc
db/memtablerep_bench.cc
table/table_reader_bench.cc
tools/db_stress.cc
Expand Down
5 changes: 4 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,7 @@ TOOLS = \
rocksdb_dump \
rocksdb_undump

BENCHMARKS = db_bench table_reader_bench cache_bench memtablerep_bench
BENCHMARKS = db_bench table_reader_bench cache_bench memtablerep_bench forward_iterator_bench

# if user didn't config LIBNAME, set the default
ifeq ($(LIBNAME),)
Expand Down Expand Up @@ -978,6 +978,9 @@ ldb_cmd_test: tools/ldb_cmd_test.o $(LIBOBJECTS) $(TESTHARNESS)
ldb: tools/ldb.o $(LIBOBJECTS)
$(AM_LINK)

forward_iterator_bench: db/forward_iterator_bench.o $(LIBOBJECTS) $(TESTHARNESS)
$(AM_LINK)

#-------------------------------------------------
# make install related stuff
INSTALL_PATH ?= /usr/local
Expand Down
10 changes: 10 additions & 0 deletions db/db_tailing_iter_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,8 @@ TEST_F(DBTestTailingIterator, TailingIteratorTrimSeekToNext) {
std::unique_ptr<Iterator> iterh(db_->NewIterator(read_options, handles_[1]));
std::string value(1024, 'a');
bool file_iters_deleted = false;
bool file_iters_renewed_null = false;
bool file_iters_renewed_copy = false;
rocksdb::SyncPoint::GetInstance()->SetCallBack(
"ForwardIterator::SeekInternal:Return", [&](void* arg) {
ForwardIterator* fiter = reinterpret_cast<ForwardIterator*>(arg);
Expand All @@ -152,6 +154,12 @@ TEST_F(DBTestTailingIterator, TailingIteratorTrimSeekToNext) {
ASSERT_TRUE(!file_iters_deleted ||
fiter->TEST_CheckDeletedIters(&deleted_iters, &num_iters));
});
rocksdb::SyncPoint::GetInstance()->SetCallBack(
"ForwardIterator::RenewIterators:Null",
[&](void* arg) { file_iters_renewed_null = true; });
rocksdb::SyncPoint::GetInstance()->SetCallBack(
"ForwardIterator::RenewIterators:Copy",
[&](void* arg) { file_iters_renewed_copy = true; });
rocksdb::SyncPoint::GetInstance()->EnableProcessing();
const int num_records = 1000;
for (int i = 1; i < num_records; ++i) {
Expand Down Expand Up @@ -203,6 +211,8 @@ TEST_F(DBTestTailingIterator, TailingIteratorTrimSeekToNext) {
ASSERT_LE(num_iters, 1);
file_iters_deleted = false;
}
ASSERT_TRUE(file_iters_renewed_null);
ASSERT_TRUE(file_iters_renewed_copy);
iter = 0;
itern = 0;
iterh = 0;
Expand Down
133 changes: 106 additions & 27 deletions db/forward_iterator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,23 @@ ForwardIterator::~ForwardIterator() {
Cleanup(true);
}

void ForwardIterator::SVCleanup() {
if (sv_ != nullptr && sv_->Unref()) {
// Job id == 0 means that this is not our background process, but rather
// user thread
JobContext job_context(0);
db_->mutex_.Lock();
sv_->Cleanup();
db_->FindObsoleteFiles(&job_context, false, true);
db_->mutex_.Unlock();
delete sv_;
if (job_context.HaveSomethingToDelete()) {
db_->PurgeObsoleteFiles(job_context);
}
job_context.Clean();
}
}

void ForwardIterator::Cleanup(bool release_sv) {
if (mutable_iter_ != nullptr) {
mutable_iter_->~InternalIterator();
Expand All @@ -162,20 +179,7 @@ void ForwardIterator::Cleanup(bool release_sv) {
level_iters_.clear();

if (release_sv) {
if (sv_ != nullptr && sv_->Unref()) {
// Job id == 0 means that this is not our background process, but rather
// user thread
JobContext job_context(0);
db_->mutex_.Lock();
sv_->Cleanup();
db_->FindObsoleteFiles(&job_context, false, true);
db_->mutex_.Unlock();
delete sv_;
if (job_context.HaveSomethingToDelete()) {
db_->PurgeObsoleteFiles(job_context);
}
job_context.Clean();
}
SVCleanup();
}
}

Expand All @@ -185,9 +189,10 @@ bool ForwardIterator::Valid() const {
}

void ForwardIterator::SeekToFirst() {
if (sv_ == nullptr ||
sv_ ->version_number != cfd_->GetSuperVersionNumber()) {
if (sv_ == nullptr) {
RebuildIterators(true);
} else if (sv_->version_number != cfd_->GetSuperVersionNumber()) {
RenewIterators();
} else if (immutable_status_.IsIncomplete()) {
ResetIncompleteIterators();
}
Expand All @@ -205,9 +210,10 @@ void ForwardIterator::Seek(const Slice& internal_key) {
if (IsOverUpperBound(internal_key)) {
valid_ = false;
}
if (sv_ == nullptr ||
sv_ ->version_number != cfd_->GetSuperVersionNumber()) {
if (sv_ == nullptr) {
RebuildIterators(true);
} else if (sv_->version_number != cfd_->GetSuperVersionNumber()) {
RenewIterators();
} else if (immutable_status_.IsIncomplete()) {
ResetIncompleteIterators();
}
Expand All @@ -227,7 +233,9 @@ void ForwardIterator::SeekInternal(const Slice& internal_key,
// an option to turn it off.
if (seek_to_first || NeedToSeekImmutable(internal_key)) {
immutable_status_ = Status::OK();
if (has_iter_trimmed_for_upper_bound_) {
if ((has_iter_trimmed_for_upper_bound_) &&
(cfd_->internal_comparator().InternalKeyComparator::Compare(
prev_key_.GetKey(), internal_key) > 0)) {
// Some iterators are trimmed. Need to rebuild.
RebuildIterators(true);
// Already seeked mutable iter, so seek again
Expand Down Expand Up @@ -393,7 +401,11 @@ void ForwardIterator::Next() {
std::string current_key = key().ToString();
Slice old_key(current_key.data(), current_key.size());

RebuildIterators(true);
if (sv_ == nullptr) {
RebuildIterators(true);
} else {
RenewIterators();
}
SeekInternal(old_key, false);
if (!valid_ || key().compare(old_key) != 0) {
return;
Expand Down Expand Up @@ -485,26 +497,93 @@ void ForwardIterator::RebuildIterators(bool refresh_sv) {
read_options_, *cfd_->soptions(), cfd_->internal_comparator(), l0->fd));
}
level_iters_.reserve(vstorage->num_levels() - 1);
BuildLevelIterators(vstorage);
current_ = nullptr;
is_prev_set_ = false;
}

void ForwardIterator::RenewIterators() {
SuperVersion* svnew;
assert(sv_);
svnew = cfd_->GetReferencedSuperVersion(&(db_->mutex_));

if (mutable_iter_ != nullptr) {
mutable_iter_->~InternalIterator();
}
for (auto* m : imm_iters_) {
m->~InternalIterator();
}
imm_iters_.clear();

mutable_iter_ = svnew->mem->NewIterator(read_options_, &arena_);
svnew->imm->AddIterators(read_options_, &imm_iters_, &arena_);

const auto* vstorage = sv_->current->storage_info();
const auto& l0_files = vstorage->LevelFiles(0);
const auto* vstorage_new = svnew->current->storage_info();
const auto& l0_files_new = vstorage_new->LevelFiles(0);
uint32_t iold, inew;
bool found;
std::vector<InternalIterator*> l0_iters_new;
l0_iters_new.reserve(l0_files_new.size());

for (inew = 0; inew < l0_files_new.size(); inew++) {
found = false;
for (iold = 0; iold < l0_files.size(); iold++) {
if (l0_files[iold] == l0_files_new[inew]) {
found = true;
break;
}
}
if (found) {
if (l0_iters_[iold] == nullptr) {
l0_iters_new.push_back(nullptr);
TEST_SYNC_POINT_CALLBACK("ForwardIterator::RenewIterators:Null", this);
} else {
l0_iters_new.push_back(l0_iters_[iold]);
l0_iters_[iold] = nullptr;
TEST_SYNC_POINT_CALLBACK("ForwardIterator::RenewIterators:Copy", this);
}
continue;
}
l0_iters_new.push_back(cfd_->table_cache()->NewIterator(
read_options_, *cfd_->soptions(), cfd_->internal_comparator(),
l0_files_new[inew]->fd));
}

for (auto* f : l0_iters_) {
delete f;
}
l0_iters_.clear();
l0_iters_ = l0_iters_new;

for (auto* l : level_iters_) {
delete l;
}
BuildLevelIterators(vstorage_new);
current_ = nullptr;
is_prev_set_ = false;
SVCleanup();
sv_ = svnew;
}

void ForwardIterator::BuildLevelIterators(const VersionStorageInfo* vstorage) {
for (int32_t level = 1; level < vstorage->num_levels(); ++level) {
const auto& level_files = vstorage->LevelFiles(level);

if ((level_files.empty()) ||
((read_options_.iterate_upper_bound != nullptr) &&
(user_comparator_->Compare(*read_options_.iterate_upper_bound,
level_files[0]->smallest.user_key()) <
0))) {
level_iters_.push_back(nullptr);
level_iters_[level - 1] = nullptr;
if (!level_files.empty()) {
has_iter_trimmed_for_upper_bound_ = true;
}
} else {
level_iters_.push_back(
new LevelIterator(cfd_, read_options_, level_files));
level_iters_[level - 1] =
new LevelIterator(cfd_, read_options_, level_files);
}
}

current_ = nullptr;
is_prev_set_ = false;
}

void ForwardIterator::ResetIncompleteIterators() {
Expand Down
4 changes: 4 additions & 0 deletions db/forward_iterator.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ class Env;
struct SuperVersion;
class ColumnFamilyData;
class LevelIterator;
class VersionStorageInfo;
struct FileMetaData;

class MinIterComparator {
Expand Down Expand Up @@ -74,7 +75,10 @@ class ForwardIterator : public InternalIterator {

private:
void Cleanup(bool release_sv);
void SVCleanup();
void RebuildIterators(bool refresh_sv);
void RenewIterators();
void BuildLevelIterators(const VersionStorageInfo* vstorage);
void ResetIncompleteIterators();
void SeekInternal(const Slice& internal_key, bool seek_to_first);
void UpdateCurrent();
Expand Down
Loading

0 comments on commit 7824444

Please sign in to comment.