Skip to content

Commit

Permalink
Persistent Stats: persist stats history to disk (facebook#5046)
Browse files Browse the repository at this point in the history
Summary:
This PR continues the work in facebook#4748 and facebook#4535 by adding a new DBOption `persist_stats_to_disk` which instructs RocksDB to persist stats history to RocksDB itself. When statistics is enabled, and  both options `stats_persist_period_sec` and `persist_stats_to_disk` are set, RocksDB will periodically write stats to a built-in column family in the following form: key -> (timestamp in microseconds)#(stats name), value -> stats value. The existing API `GetStatsHistory` will detect the current value of `persist_stats_to_disk` and either read from in-memory data structure or from the hidden column family on disk.
Pull Request resolved: facebook#5046

Differential Revision: D15863138

Pulled By: miasantreble

fbshipit-source-id: bb82abdb3f2ca581aa42531734ac799f113e931b
  • Loading branch information
miasantreble authored and facebook-github-bot committed Jun 17, 2019
1 parent ee294c2 commit 671d15c
Show file tree
Hide file tree
Showing 25 changed files with 1,143 additions and 332 deletions.
4 changes: 3 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -521,7 +521,6 @@ set(SOURCES
db/flush_scheduler.cc
db/forward_iterator.cc
db/internal_stats.cc
db/in_memory_stats_history.cc
db/logs_with_prep_tracker.cc
db/log_reader.cc
db/log_writer.cc
Expand Down Expand Up @@ -568,10 +567,12 @@ set(SOURCES
memtable/write_buffer_manager.cc
monitoring/histogram.cc
monitoring/histogram_windowing.cc
monitoring/in_memory_stats_history.cc
monitoring/instrumented_mutex.cc
monitoring/iostats_context.cc
monitoring/perf_context.cc
monitoring/perf_level.cc
monitoring/persistent_stats_history.cc
monitoring/statistics.cc
monitoring/thread_status_impl.cc
monitoring/thread_status_updater.cc
Expand Down Expand Up @@ -955,6 +956,7 @@ if(WITH_TESTS)
monitoring/histogram_test.cc
monitoring/iostats_context_test.cc
monitoring/statistics_test.cc
monitoring/stats_history_test.cc
options/options_settable_test.cc
options/options_test.cc
table/block_based/block_based_filter_block_test.cc
Expand Down
4 changes: 4 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -548,6 +548,7 @@ TESTS = \
ldb_cmd_test \
persistent_cache_test \
statistics_test \
stats_history_test \
lru_cache_test \
object_registry_test \
repair_test \
Expand Down Expand Up @@ -1566,6 +1567,9 @@ persistent_cache_test: utilities/persistent_cache/persistent_cache_test.o db/db
statistics_test: monitoring/statistics_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(AM_LINK)

stats_history_test: monitoring/stats_history_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS)
$(AM_LINK)

lru_cache_test: cache/lru_cache_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(AM_LINK)

Expand Down
8 changes: 7 additions & 1 deletion TARGETS
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,6 @@ cpp_library(
"db/flush_job.cc",
"db/flush_scheduler.cc",
"db/forward_iterator.cc",
"db/in_memory_stats_history.cc",
"db/internal_stats.cc",
"db/log_reader.cc",
"db/log_writer.cc",
Expand Down Expand Up @@ -163,10 +162,12 @@ cpp_library(
"memtable/write_buffer_manager.cc",
"monitoring/histogram.cc",
"monitoring/histogram_windowing.cc",
"monitoring/in_memory_stats_history.cc",
"monitoring/instrumented_mutex.cc",
"monitoring/iostats_context.cc",
"monitoring/perf_context.cc",
"monitoring/perf_level.cc",
"monitoring/persistent_stats_history.cc",
"monitoring/statistics.cc",
"monitoring/thread_status_impl.cc",
"monitoring/thread_status_updater.cc",
Expand Down Expand Up @@ -971,6 +972,11 @@ ROCKS_TESTS = [
"monitoring/statistics_test.cc",
"serial",
],
[
"stats_history_test",
"monitoring/stats_history_test.cc",
"serial",
],
[
"stringappend_test",
"utilities/merge_operators/string_append/stringappend_test.cc",
Expand Down
91 changes: 72 additions & 19 deletions db/db_impl/db_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@
#include "db/external_sst_file_ingestion_job.h"
#include "db/flush_job.h"
#include "db/forward_iterator.h"
#include "db/in_memory_stats_history.h"
#include "db/job_context.h"
#include "db/log_reader.h"
#include "db/log_writer.h"
Expand All @@ -58,8 +57,10 @@
#include "logging/logging.h"
#include "memtable/hash_linklist_rep.h"
#include "memtable/hash_skiplist_rep.h"
#include "monitoring/in_memory_stats_history.h"
#include "monitoring/iostats_context_imp.h"
#include "monitoring/perf_context_imp.h"
#include "monitoring/persistent_stats_history.h"
#include "monitoring/thread_status_updater.h"
#include "monitoring/thread_status_util.h"
#include "options/cf_options.h"
Expand Down Expand Up @@ -98,6 +99,9 @@

namespace rocksdb {
const std::string kDefaultColumnFamilyName("default");
const std::string kPersistentStatsColumnFamilyName(
"___rocksdb_stats_history___");
const int kMicrosInSecond = 1000 * 1000;
void DumpRocksDBBuildVersion(Logger* log);

CompressionType GetCompressionFlush(
Expand Down Expand Up @@ -162,6 +166,7 @@ DBImpl::DBImpl(const DBOptions& options, const std::string& dbname,
logfile_number_(0),
log_dir_synced_(false),
log_empty_(true),
persist_stats_cf_handle_(nullptr),
log_sync_cv_(&mutex_),
total_log_size_(0),
is_snapshot_supported_(true),
Expand Down Expand Up @@ -482,10 +487,17 @@ Status DBImpl::CloseHelper() {
}
}

if (default_cf_handle_ != nullptr) {
if (default_cf_handle_ != nullptr || persist_stats_cf_handle_ != nullptr) {
// we need to delete handle outside of lock because it does its own locking
mutex_.Unlock();
delete default_cf_handle_;
if (default_cf_handle_) {
delete default_cf_handle_;
default_cf_handle_ = nullptr;
}
if (persist_stats_cf_handle_) {
delete persist_stats_cf_handle_;
persist_stats_cf_handle_ = nullptr;
}
mutex_.Lock();
}

Expand Down Expand Up @@ -634,22 +646,22 @@ void DBImpl::StartTimedTasks() {
if (!thread_dump_stats_) {
thread_dump_stats_.reset(new rocksdb::RepeatableThread(
[this]() { DBImpl::DumpStats(); }, "dump_st", env_,
stats_dump_period_sec * 1000000));
static_cast<uint64_t>(stats_dump_period_sec) * kMicrosInSecond));
}
}
stats_persist_period_sec = mutable_db_options_.stats_persist_period_sec;
if (stats_persist_period_sec > 0) {
if (!thread_persist_stats_) {
thread_persist_stats_.reset(new rocksdb::RepeatableThread(
[this]() { DBImpl::PersistStats(); }, "pst_st", env_,
stats_persist_period_sec * 1000000));
static_cast<uint64_t>(stats_persist_period_sec) * kMicrosInSecond));
}
}
}
}

// esitmate the total size of stats_history_
size_t DBImpl::EstiamteStatsHistorySize() const {
size_t DBImpl::EstimateInMemoryStatsHistorySize() const {
size_t size_total =
sizeof(std::map<uint64_t, std::map<std::string, uint64_t>>);
if (stats_history_.size() == 0) return size_total;
Expand All @@ -671,7 +683,7 @@ void DBImpl::PersistStats() {
if (shutdown_initiated_) {
return;
}
uint64_t now_micros = env_->NowMicros();
uint64_t now_seconds = env_->NowMicros() / kMicrosInSecond;
Statistics* statistics = immutable_db_options_.statistics.get();
if (!statistics) {
return;
Expand All @@ -682,12 +694,40 @@ void DBImpl::PersistStats() {
stats_history_size_limit = mutable_db_options_.stats_history_buffer_size;
}

// TODO(Zhongyi): also persist immutable_db_options_.statistics
{
std::map<std::string, uint64_t> stats_map;
if (!statistics->getTickerMap(&stats_map)) {
return;
std::map<std::string, uint64_t> stats_map;
if (!statistics->getTickerMap(&stats_map)) {
return;
}

if (immutable_db_options_.persist_stats_to_disk) {
WriteBatch batch;
if (stats_slice_initialized_) {
for (const auto& stat : stats_map) {
char key[100];
int length =
EncodePersistentStatsKey(now_seconds, stat.first, 100, key);
// calculate the delta from last time
if (stats_slice_.find(stat.first) != stats_slice_.end()) {
uint64_t delta = stat.second - stats_slice_[stat.first];
batch.Put(persist_stats_cf_handle_, Slice(key, std::min(100, length)),
ToString(delta));
}
}
}
stats_slice_initialized_ = true;
std::swap(stats_slice_, stats_map);
WriteOptions wo;
wo.low_pri = true;
wo.no_slowdown = true;
wo.sync = false;
Status s = Write(wo, &batch);
if (!s.ok()) {
ROCKS_LOG_INFO(immutable_db_options_.info_log,
"Writing to persistent stats CF failed -- %s\n",
s.ToString().c_str());
}
// TODO(Zhongyi): add purging for persisted data
} else {
InstrumentedMutexLock l(&stats_history_mutex_);
// calculate the delta from last time
if (stats_slice_initialized_) {
Expand All @@ -697,17 +737,19 @@ void DBImpl::PersistStats() {
stats_delta[stat.first] = stat.second - stats_slice_[stat.first];
}
}
stats_history_[now_micros] = stats_delta;
stats_history_[now_seconds] = stats_delta;
}
stats_slice_initialized_ = true;
std::swap(stats_slice_, stats_map);
TEST_SYNC_POINT("DBImpl::PersistStats:StatsCopied");

// delete older stats snapshots to control memory consumption
bool purge_needed = EstiamteStatsHistorySize() > stats_history_size_limit;
bool purge_needed =
EstimateInMemoryStatsHistorySize() > stats_history_size_limit;
while (purge_needed && !stats_history_.empty()) {
stats_history_.erase(stats_history_.begin());
purge_needed = EstiamteStatsHistorySize() > stats_history_size_limit;
purge_needed =
EstimateInMemoryStatsHistorySize() > stats_history_size_limit;
}
}
// TODO: persist stats to disk
Expand Down Expand Up @@ -741,8 +783,13 @@ Status DBImpl::GetStatsHistory(
if (!stats_iterator) {
return Status::InvalidArgument("stats_iterator not preallocated.");
}
stats_iterator->reset(
new InMemoryStatsHistoryIterator(start_time, end_time, this));
if (immutable_db_options_.persist_stats_to_disk) {
stats_iterator->reset(
new PersistentStatsHistoryIterator(start_time, end_time, this));
} else {
stats_iterator->reset(
new InMemoryStatsHistoryIterator(start_time, end_time, this));
}
return (*stats_iterator)->status();
}

Expand Down Expand Up @@ -946,7 +993,8 @@ Status DBImpl::SetDBOptions(
if (new_options.stats_dump_period_sec > 0) {
thread_dump_stats_.reset(new rocksdb::RepeatableThread(
[this]() { DBImpl::DumpStats(); }, "dump_st", env_,
new_options.stats_dump_period_sec * 1000000));
static_cast<uint64_t>(new_options.stats_dump_period_sec) *
kMicrosInSecond));
} else {
thread_dump_stats_.reset();
}
Expand All @@ -961,7 +1009,8 @@ Status DBImpl::SetDBOptions(
if (new_options.stats_persist_period_sec > 0) {
thread_persist_stats_.reset(new rocksdb::RepeatableThread(
[this]() { DBImpl::PersistStats(); }, "pst_st", env_,
new_options.stats_persist_period_sec * 1000000));
static_cast<uint64_t>(new_options.stats_persist_period_sec) *
kMicrosInSecond));
} else {
thread_persist_stats_.reset();
}
Expand Down Expand Up @@ -1373,6 +1422,10 @@ ColumnFamilyHandle* DBImpl::DefaultColumnFamily() const {
return default_cf_handle_;
}

ColumnFamilyHandle* DBImpl::PersistentStatsColumnFamily() const {
return persist_stats_cf_handle_;
}

Status DBImpl::Get(const ReadOptions& read_options,
ColumnFamilyHandle* column_family, const Slice& key,
PinnableSlice* value) {
Expand Down
26 changes: 24 additions & 2 deletions db/db_impl/db_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ class Arena;
class ArenaWrappedDBIter;
class InMemoryStatsHistoryIterator;
class MemTable;
class PersistentStatsHistoryIterator;
class TableCache;
class TaskLimiterToken;
class Version;
Expand Down Expand Up @@ -268,6 +269,8 @@ class DBImpl : public DB {

ColumnFamilyHandle* DefaultColumnFamily() const override;

ColumnFamilyHandle* PersistentStatsColumnFamily() const;

virtual Status Close() override;

Status GetStatsHistory(
Expand Down Expand Up @@ -822,7 +825,7 @@ class DBImpl : public DB {
void TEST_WaitForDumpStatsRun(std::function<void()> callback) const;
void TEST_WaitForPersistStatsRun(std::function<void()> callback) const;
bool TEST_IsPersistentStatsEnabled() const;
size_t TEST_EstiamteStatsHistorySize() const;
size_t TEST_EstimateInMemoryStatsHistorySize() const;

#endif // NDEBUG

Expand Down Expand Up @@ -1016,6 +1019,7 @@ class DBImpl : public DB {
friend class DBTest_MixedSlowdownOptionsStop_Test;
friend class DBCompactionTest_CompactBottomLevelFilesWithDeletions_Test;
friend class DBCompactionTest_CompactionDuringShutdown_Test;
friend class StatsHistoryTest_PersistentStatsCreateColumnFamilies_Test;
#ifndef NDEBUG
friend class DBTest2_ReadCallbackTest_Test;
friend class WriteCallbackTest_WriteWithCallbackTest_Test;
Expand Down Expand Up @@ -1176,6 +1180,21 @@ class DBImpl : public DB {
PrepickedCompaction* prepicked_compaction;
};

// Initialize the built-in column family for persistent stats. Depending on
// whether on-disk persistent stats have been enabled before, it may either
// create a new column family and column family handle or just a column family
// handle.
// Required: DB mutex held
Status InitPersistStatsColumnFamily();

// Persistent Stats column family has two format version key which are used
// for compatibility check. Write format version if it's created for the
// first time, read format version and check compatibility if recovering
// from disk. This function requires DB mutex held at entrance but may
// release and re-acquire DB mutex in the process.
// Required: DB mutex held
Status PersistentStatsProcessFormatVersion();

Status ResumeImpl();

void MaybeIgnoreError(Status* s) const;
Expand Down Expand Up @@ -1424,7 +1443,7 @@ class DBImpl : public DB {

void PrintStatistics();

size_t EstiamteStatsHistorySize() const;
size_t EstimateInMemoryStatsHistorySize() const;

// persist stats to column family "_persistent_stats"
void PersistStats();
Expand Down Expand Up @@ -1571,6 +1590,9 @@ class DBImpl : public DB {
// expesnive mutex_ lock during WAL write, which update log_empty_.
bool log_empty_;

ColumnFamilyHandleImpl* persist_stats_cf_handle_;

bool persistent_stats_cfd_exists_ = true;

// Without two_write_queues, read and writes to alive_log_files_ are
// protected by mutex_. However since back() is never popped, and push_back()
Expand Down
4 changes: 2 additions & 2 deletions db/db_impl/db_impl_debug.cc
Original file line number Diff line number Diff line change
Expand Up @@ -262,8 +262,8 @@ bool DBImpl::TEST_IsPersistentStatsEnabled() const {
return thread_persist_stats_ && thread_persist_stats_->IsRunning();
}

size_t DBImpl::TEST_EstiamteStatsHistorySize() const {
return EstiamteStatsHistorySize();
size_t DBImpl::TEST_EstimateInMemoryStatsHistorySize() const {
return EstimateInMemoryStatsHistorySize();
}
} // namespace rocksdb
#endif // NDEBUG
Loading

0 comments on commit 671d15c

Please sign in to comment.