From d394a6bb48b79b13822702cd5af8a0d85a2fd8f8 Mon Sep 17 00:00:00 2001 From: anand1976 <33647610+anand1976@users.noreply.github.com> Date: Mon, 20 Nov 2017 21:12:55 -0800 Subject: [PATCH] Add a ticker stat for number of keys skipped during iteration Summary: This diff adds a new ticker stat, NUMBER_ITER_SKIP, to count the number of internal keys skipped during iteration. Keys can be skipped due to deletes, or lower sequence number, or higher sequence number than the one requested. Also, fix the issue when StatisticsData is naturally aligned on cacheline boundary, padding becomes a zero size array, which the Windows compiler doesn't like. So add a cacheline worth of padding in that case to keep it happy. We cannot conditionally add padding as gcc doesn't allow using sizeof in preprocessor directives. Closes https://github.com/facebook/rocksdb/pull/3177 Differential Revision: D6353897 Pulled By: anand1976 fbshipit-source-id: 441d5a09af9c4e22e7355242dfc0c7b27aa0a6c2 --- db/db_iter.cc | 13 +++++- db/db_iterator_test.cc | 81 ++++++++++++++++++++++++++++++++++++ include/rocksdb/statistics.h | 4 ++ java/rocksjni/portal.h | 6 ++- monitoring/statistics.h | 3 +- 5 files changed, 103 insertions(+), 4 deletions(-) diff --git a/db/db_iter.cc b/db/db_iter.cc index b12fd756ff5..825a6b84097 100644 --- a/db/db_iter.cc +++ b/db/db_iter.cc @@ -76,6 +76,7 @@ class DBIter final: public Iterator { prev_count_ = 0; prev_found_count_ = 0; bytes_read_ = 0; + skip_count_ = 0; } void BumpGlobalStatistics(Statistics* global_statistics) { @@ -84,6 +85,7 @@ class DBIter final: public Iterator { RecordTick(global_statistics, NUMBER_DB_PREV, prev_count_); RecordTick(global_statistics, NUMBER_DB_PREV_FOUND, prev_found_count_); RecordTick(global_statistics, ITER_BYTES_READ, bytes_read_); + RecordTick(global_statistics, NUMBER_ITER_SKIP, skip_count_); PERF_COUNTER_ADD(iter_read_bytes, bytes_read_); ResetCounters(); } @@ -98,6 +100,8 @@ class DBIter final: public Iterator { uint64_t prev_found_count_; // Map to Tickers::ITER_BYTES_READ uint64_t bytes_read_; + // Map to Tickers::NUMBER_ITER_SKIP + uint64_t skip_count_; }; DBIter(Env* _env, const ReadOptions& read_options, @@ -147,6 +151,7 @@ class DBIter final: public Iterator { // Compiler warning issue filed: // https://github.com/facebook/rocksdb/issues/3013 RecordTick(statistics_, NO_ITERATORS, uint64_t(-1)); + ResetInternalKeysSkippedCounter(); local_stats_.BumpGlobalStatistics(statistics_); if (!arena_mode_) { delete iter_; @@ -267,6 +272,10 @@ class DBIter final: public Iterator { } inline void ResetInternalKeysSkippedCounter() { + local_stats_.skip_count_ += num_internal_keys_skipped_; + if (valid_) { + local_stats_.skip_count_--; + } num_internal_keys_skipped_ = 0; } @@ -1143,6 +1152,7 @@ void DBIter::Seek(const Slice& target) { } if (statistics_ != nullptr) { if (valid_) { + // Decrement since we don't want to count this key as skipped RecordTick(statistics_, NUMBER_DB_SEEK_FOUND); RecordTick(statistics_, ITER_BYTES_READ, key().size() + value().size()); PERF_COUNTER_ADD(iter_read_bytes, key().size() + value().size()); @@ -1269,7 +1279,8 @@ void DBIter::SeekToLast() { if (!Valid()) { return; } else if (user_comparator_->Equal(*iterate_upper_bound_, key())) { - Prev(); + ReleaseTempPinnedData(); + PrevInternal(); } } else { PrevInternal(); diff --git a/db/db_iterator_test.cc b/db/db_iterator_test.cc index d62862a953e..746946a28e4 100644 --- a/db/db_iterator_test.cc +++ b/db/db_iterator_test.cc @@ -2076,6 +2076,87 @@ TEST_F(DBIteratorTest, TableFilter) { } } +TEST_F(DBIteratorTest, SkipStatistics) { + Options options = CurrentOptions(); + options.statistics = rocksdb::CreateDBStatistics(); + DestroyAndReopen(options); + + int skip_count = 0; + + // write a bunch of kvs to the database. + ASSERT_OK(Put("a", "1")); + ASSERT_OK(Put("b", "1")); + ASSERT_OK(Put("c", "1")); + ASSERT_OK(Flush()); + ASSERT_OK(Put("d", "1")); + ASSERT_OK(Put("e", "1")); + ASSERT_OK(Put("f", "1")); + ASSERT_OK(Put("a", "2")); + ASSERT_OK(Put("b", "2")); + ASSERT_OK(Flush()); + ASSERT_OK(Delete("d")); + ASSERT_OK(Delete("e")); + ASSERT_OK(Delete("f")); + + Iterator* iter = db_->NewIterator(ReadOptions()); + int count = 0; + for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { + ASSERT_OK(iter->status()); + count++; + } + ASSERT_EQ(count, 3); + delete iter; + skip_count += 8; // 3 deletes + 3 original keys + 2 lower in sequence + ASSERT_EQ(skip_count, TestGetTickerCount(options, NUMBER_ITER_SKIP)); + + iter = db_->NewIterator(ReadOptions()); + count = 0; + for (iter->SeekToLast(); iter->Valid(); iter->Prev()) { + ASSERT_OK(iter->status()); + count++; + } + ASSERT_EQ(count, 3); + delete iter; + skip_count += 8; // Same as above, but in reverse order + ASSERT_EQ(skip_count, TestGetTickerCount(options, NUMBER_ITER_SKIP)); + + ASSERT_OK(Put("aa", "1")); + ASSERT_OK(Put("ab", "1")); + ASSERT_OK(Put("ac", "1")); + ASSERT_OK(Put("ad", "1")); + ASSERT_OK(Flush()); + ASSERT_OK(Delete("ab")); + ASSERT_OK(Delete("ac")); + ASSERT_OK(Delete("ad")); + + ReadOptions ro; + Slice prefix("b"); + ro.iterate_upper_bound = &prefix; + + iter = db_->NewIterator(ro); + count = 0; + for(iter->Seek("aa"); iter->Valid(); iter->Next()) { + ASSERT_OK(iter->status()); + count++; + } + ASSERT_EQ(count, 1); + delete iter; + skip_count += 6; // 3 deletes + 3 original keys + ASSERT_EQ(skip_count, TestGetTickerCount(options, NUMBER_ITER_SKIP)); + + iter = db_->NewIterator(ro); + count = 0; + for(iter->SeekToLast(); iter->Valid(); iter->Prev()) { + ASSERT_OK(iter->status()); + count++; + } + ASSERT_EQ(count, 2); + delete iter; + // 3 deletes + 3 original keys + 2 keys of "b" + lower sequence of "a" + skip_count += 9; + ASSERT_EQ(skip_count, TestGetTickerCount(options, NUMBER_ITER_SKIP)); +} + } // namespace rocksdb int main(int argc, char** argv) { diff --git a/include/rocksdb/statistics.h b/include/rocksdb/statistics.h index 731ff780963..a41ea7d4b44 100644 --- a/include/rocksdb/statistics.h +++ b/include/rocksdb/statistics.h @@ -223,6 +223,9 @@ enum Tickers : uint32_t { // Number of refill intervals where rate limiter's bytes are fully consumed. NUMBER_RATE_LIMITER_DRAINS, + // Number of internal keys skipped by Iterator + NUMBER_ITER_SKIP, + TICKER_ENUM_MAX }; @@ -328,6 +331,7 @@ const std::vector> TickersNameMap = { {READ_AMP_ESTIMATE_USEFUL_BYTES, "rocksdb.read.amp.estimate.useful.bytes"}, {READ_AMP_TOTAL_READ_BYTES, "rocksdb.read.amp.total.read.bytes"}, {NUMBER_RATE_LIMITER_DRAINS, "rocksdb.number.rate_limiter.drains"}, + {NUMBER_ITER_SKIP, "rocksdb.number.iter.skip"}, }; /** diff --git a/java/rocksjni/portal.h b/java/rocksjni/portal.h index 81104802756..7fd7874bf87 100644 --- a/java/rocksjni/portal.h +++ b/java/rocksjni/portal.h @@ -2523,8 +2523,10 @@ class TickerTypeJni { return 0x5B; case rocksdb::Tickers::NUMBER_RATE_LIMITER_DRAINS: return 0x5C; - case rocksdb::Tickers::TICKER_ENUM_MAX: + case rocksdb::Tickers::NUMBER_ITER_SKIP: return 0x5D; + case rocksdb::Tickers::TICKER_ENUM_MAX: + return 0x5E; default: // undefined/default @@ -2723,6 +2725,8 @@ class TickerTypeJni { case 0x5C: return rocksdb::Tickers::NUMBER_RATE_LIMITER_DRAINS; case 0x5D: + return rocksdb::Tickers::NUMBER_ITER_SKIP; + case 0x5E: return rocksdb::Tickers::TICKER_ENUM_MAX; default: diff --git a/monitoring/statistics.h b/monitoring/statistics.h index 6e915215deb..3bf20d4cfe0 100644 --- a/monitoring/statistics.h +++ b/monitoring/statistics.h @@ -76,8 +76,7 @@ class StatisticsImpl : public Statistics { padding[(CACHE_LINE_SIZE - (INTERNAL_TICKER_ENUM_MAX * sizeof(std::atomic_uint_fast64_t) + INTERNAL_HISTOGRAM_ENUM_MAX * sizeof(HistogramImpl)) % - CACHE_LINE_SIZE) % - CACHE_LINE_SIZE] ROCKSDB_FIELD_UNUSED; + CACHE_LINE_SIZE)] ROCKSDB_FIELD_UNUSED; }; static_assert(sizeof(StatisticsData) % 64 == 0, "Expected 64-byte aligned");