Skip to content

Commit

Permalink
new Prev() prefix support using SeekForPrev()
Browse files Browse the repository at this point in the history
Summary:
1) The previous solution for Prev() prefix support is not clean.
Since I add api SeekForPrev(), now the Prev() can be symmetric to Next().
and we do not need SeekToLast() to be called in Prev() any more.

Also, Next() will Seek(prefix_seek_key_) to solve the problem of possible inconsistency between db_iter and merge_iter when
there is merge_operator. And prefix_seek_key is only refreshed when change direction to forward.

2) This diff also solves the bug of Iterator::SeekToLast() with iterate_upper_bound_ with prefix extractor.

add test cases for the above two cases.

There are some tests for the SeekToLast() in Prev(), I will clean them later.

Test Plan: make all check

Reviewers: IslamAbdelRahman, andrewkr, yiwu, sdong

Reviewed By: sdong

Subscribers: andrewkr, dhruba, leveldb

Differential Revision: https://reviews.facebook.net/D63933
  • Loading branch information
lightmark committed Oct 11, 2016
1 parent 991b585 commit 447f171
Show file tree
Hide file tree
Showing 7 changed files with 310 additions and 62 deletions.
3 changes: 2 additions & 1 deletion db/db_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4345,7 +4345,8 @@ Iterator* DBImpl::NewIterator(const ReadOptions& read_options,
env_, *cfd->ioptions(), cfd->user_comparator(), snapshot,
sv->mutable_cf_options.max_sequential_skip_in_iterations,
sv->version_number, read_options.iterate_upper_bound,
read_options.prefix_same_as_start, read_options.pin_data);
read_options.prefix_same_as_start, read_options.pin_data,
read_options.total_order_seek);

InternalIterator* internal_iter =
NewInternalIterator(read_options, cfd, sv, db_iter->GetArena());
Expand Down
97 changes: 54 additions & 43 deletions db/db_iter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,8 @@ class DBIter: public Iterator {
InternalIterator* iter, SequenceNumber s, bool arena_mode,
uint64_t max_sequential_skip_in_iterations, uint64_t version_number,
const Slice* iterate_upper_bound = nullptr,
bool prefix_same_as_start = false, bool pin_data = false)
bool prefix_same_as_start = false, bool pin_data = false,
bool total_order_seek = false)
: arena_mode_(arena_mode),
env_(env),
logger_(ioptions.info_log),
Expand All @@ -120,7 +121,8 @@ class DBIter: public Iterator {
version_number_(version_number),
iterate_upper_bound_(iterate_upper_bound),
prefix_same_as_start_(prefix_same_as_start),
pin_thru_lifetime_(pin_data) {
pin_thru_lifetime_(pin_data),
total_order_seek_(total_order_seek) {
RecordTick(statistics_, NO_ITERATORS);
prefix_extractor_ = ioptions.prefix_extractor;
max_skip_ = max_sequential_skip_in_iterations;
Expand Down Expand Up @@ -204,6 +206,7 @@ class DBIter: public Iterator {
virtual void SeekToLast() override;

private:
void ReverseToForward();
void ReverseToBackward();
void PrevInternal();
void FindParseableKey(ParsedInternalKey* ikey, Direction direction);
Expand Down Expand Up @@ -256,6 +259,7 @@ class DBIter: public Iterator {
Direction direction_;
bool valid_;
bool current_entry_is_merged_;
// for prefix seek mode to support prev()
Statistics* statistics_;
uint64_t max_skip_;
uint64_t version_number_;
Expand All @@ -266,6 +270,7 @@ class DBIter: public Iterator {
// Means that we will pin all data blocks we read as long the Iterator
// is not deleted, will be true if ReadOptions::pin_data is true
const bool pin_thru_lifetime_;
const bool total_order_seek_;
// List of operands for merge operator.
MergeContext merge_context_;
LocalStatistics local_stats_;
Expand Down Expand Up @@ -294,11 +299,7 @@ void DBIter::Next() {
// Release temporarily pinned blocks from last operation
ReleaseTempPinnedData();
if (direction_ == kReverse) {
FindNextUserKey();
direction_ = kForward;
if (!iter_->Valid()) {
iter_->SeekToFirst();
}
ReverseToForward();
} else if (iter_->Valid() && !current_entry_is_merged_) {
// If the current value is not a merge, the iter position is the
// current key, which is already returned. We can safely issue a
Expand Down Expand Up @@ -506,7 +507,29 @@ void DBIter::Prev() {
}
}

void DBIter::ReverseToForward() {
if (prefix_extractor_ != nullptr && !total_order_seek_) {
IterKey last_key;
last_key.SetInternalKey(ParsedInternalKey(
saved_key_.GetKey(), kMaxSequenceNumber, kValueTypeForSeek));
Slice db_iter_key = last_key.GetKey();
iter_->ResetPrefixSeekKey(&db_iter_key);
}
FindNextUserKey();
direction_ = kForward;
if (!iter_->Valid()) {
iter_->SeekToFirst();
}
}

void DBIter::ReverseToBackward() {
if (prefix_extractor_ != nullptr && !total_order_seek_) {
IterKey last_key;
last_key.SetInternalKey(
ParsedInternalKey(saved_key_.GetKey(), 0, kValueTypeForSeekForPrev));
Slice db_iter_key = last_key.GetKey();
iter_->ResetPrefixSeekKey(&db_iter_key);
}
if (current_entry_is_merged_) {
// Not placed in the same key. Need to call Prev() until finding the
// previous key.
Expand Down Expand Up @@ -794,7 +817,6 @@ void DBIter::Seek(const Slice& target) {
PERF_TIMER_GUARD(seek_internal_seek_time);
iter_->Seek(saved_key_.GetKey());
}

RecordTick(statistics_, NUMBER_DB_SEEK);
if (iter_->Valid()) {
if (prefix_extractor_ && prefix_same_as_start_) {
Expand All @@ -815,6 +837,7 @@ void DBIter::Seek(const Slice& target) {
} else {
valid_ = false;
}

if (valid_ && prefix_extractor_ && prefix_same_as_start_) {
prefix_start_buf_.SetKey(prefix_start_key_);
prefix_start_key_ = prefix_start_buf_.GetKey();
Expand Down Expand Up @@ -911,25 +934,15 @@ void DBIter::SeekToLast() {
// it will seek to the last key before the
// ReadOptions.iterate_upper_bound
if (iter_->Valid() && iterate_upper_bound_ != nullptr) {
saved_key_.SetKey(*iterate_upper_bound_, false /* copy */);
std::string last_key;
AppendInternalKey(&last_key,
ParsedInternalKey(saved_key_.GetKey(), kMaxSequenceNumber,
kValueTypeForSeek));

iter_->Seek(last_key);

if (!iter_->Valid()) {
iter_->SeekToLast();
} else {
iter_->Prev();
if (!iter_->Valid()) {
valid_ = false;
return;
}
SeekForPrev(*iterate_upper_bound_);
if (!Valid()) {
return;
} else if (user_comparator_->Equal(*iterate_upper_bound_, key())) {
Prev();
}
} else {
PrevInternal();
}
PrevInternal();
if (statistics_ != nullptr) {
RecordTick(statistics_, NUMBER_DB_SEEK);
if (valid_) {
Expand All @@ -943,18 +956,16 @@ void DBIter::SeekToLast() {
}
}

Iterator* NewDBIterator(Env* env, const ImmutableCFOptions& ioptions,
const Comparator* user_key_comparator,
InternalIterator* internal_iter,
const SequenceNumber& sequence,
uint64_t max_sequential_skip_in_iterations,
uint64_t version_number,
const Slice* iterate_upper_bound,
bool prefix_same_as_start, bool pin_data) {
DBIter* db_iter =
new DBIter(env, ioptions, user_key_comparator, internal_iter, sequence,
false, max_sequential_skip_in_iterations, version_number,
iterate_upper_bound, prefix_same_as_start, pin_data);
Iterator* NewDBIterator(
Env* env, const ImmutableCFOptions& ioptions,
const Comparator* user_key_comparator, InternalIterator* internal_iter,
const SequenceNumber& sequence, uint64_t max_sequential_skip_in_iterations,
uint64_t version_number, const Slice* iterate_upper_bound,
bool prefix_same_as_start, bool pin_data, bool total_order_seek) {
DBIter* db_iter = new DBIter(
env, ioptions, user_key_comparator, internal_iter, sequence, false,
max_sequential_skip_in_iterations, version_number, iterate_upper_bound,
prefix_same_as_start, pin_data, total_order_seek);
return db_iter;
}

Expand Down Expand Up @@ -993,15 +1004,15 @@ ArenaWrappedDBIter* NewArenaWrappedDbIterator(
Env* env, const ImmutableCFOptions& ioptions,
const Comparator* user_key_comparator, const SequenceNumber& sequence,
uint64_t max_sequential_skip_in_iterations, uint64_t version_number,
const Slice* iterate_upper_bound, bool prefix_same_as_start,
bool pin_data) {
const Slice* iterate_upper_bound, bool prefix_same_as_start, bool pin_data,
bool total_order_seek) {
ArenaWrappedDBIter* iter = new ArenaWrappedDBIter();
Arena* arena = iter->GetArena();
auto mem = arena->AllocateAligned(sizeof(DBIter));
DBIter* db_iter =
new (mem) DBIter(env, ioptions, user_key_comparator, nullptr, sequence,
true, max_sequential_skip_in_iterations, version_number,
iterate_upper_bound, prefix_same_as_start, pin_data);
DBIter* db_iter = new (mem) DBIter(
env, ioptions, user_key_comparator, nullptr, sequence, true,
max_sequential_skip_in_iterations, version_number, iterate_upper_bound,
prefix_same_as_start, pin_data, total_order_seek);

iter->SetDBIter(db_iter);

Expand Down
6 changes: 4 additions & 2 deletions db/db_iter.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ extern Iterator* NewDBIterator(
const Comparator* user_key_comparator, InternalIterator* internal_iter,
const SequenceNumber& sequence, uint64_t max_sequential_skip_in_iterations,
uint64_t version_number, const Slice* iterate_upper_bound = nullptr,
bool prefix_same_as_start = false, bool pin_data = false);
bool prefix_same_as_start = false, bool pin_data = false,
bool total_order_seek = false);

// A wrapper iterator which wraps DB Iterator and the arena, with which the DB
// iterator is supposed be allocated. This class is used as an entry point of
Expand Down Expand Up @@ -78,6 +79,7 @@ extern ArenaWrappedDBIter* NewArenaWrappedDbIterator(
const Comparator* user_key_comparator, const SequenceNumber& sequence,
uint64_t max_sequential_skip_in_iterations, uint64_t version_number,
const Slice* iterate_upper_bound = nullptr,
bool prefix_same_as_start = false, bool pin_data = false);
bool prefix_same_as_start = false, bool pin_data = false,
bool total_order_seek = false);

} // namespace rocksdb
4 changes: 2 additions & 2 deletions db/db_iter_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -357,7 +357,7 @@ TEST_F(DBIteratorTest, DBIteratorPrevNext) {
db_iter->SeekToLast();

ASSERT_TRUE(db_iter->Valid());
ASSERT_EQ(static_cast<int>(perf_context.internal_key_skipped_count), 1);
ASSERT_EQ(static_cast<int>(perf_context.internal_key_skipped_count), 7);
ASSERT_EQ(db_iter->key().ToString(), "b");

SetPerfLevel(kDisable);
Expand Down Expand Up @@ -480,7 +480,7 @@ TEST_F(DBIteratorTest, DBIteratorPrevNext) {
db_iter->SeekToLast();

ASSERT_TRUE(db_iter->Valid());
ASSERT_EQ(static_cast<int>(perf_context.internal_delete_skipped_count), 0);
ASSERT_EQ(static_cast<int>(perf_context.internal_delete_skipped_count), 1);
ASSERT_EQ(db_iter->key().ToString(), "b");

SetPerfLevel(kDisable);
Expand Down
Loading

0 comments on commit 447f171

Please sign in to comment.