Skip to content

Commit

Permalink
Enable RocksDB to persist Options file.
Browse files Browse the repository at this point in the history
Summary:
This patch allows rocksdb to persist options into a file on
DB::Open, SetOptions, and Create / Drop ColumnFamily.
Options files are created under the same directory as the rocksdb
instance.

In addition, this patch also adds a fail_if_missing_options_file in DBOptions
that makes any function call return non-ok status when it is not able to
persist options properly.

  // If true, then DB::Open / CreateColumnFamily / DropColumnFamily
  // / SetOptions will fail if options file is not detected or properly
  // persisted.
  //
  // DEFAULT: false
  bool fail_if_missing_options_file;

Options file names are formatted as OPTIONS-<number>, and RocksDB
will always keep the latest two options files.

Test Plan:
Add options_file_test.

options_test
column_family_test

Reviewers: igor, IslamAbdelRahman, sdong, anthony

Reviewed By: anthony

Subscribers: dhruba

Differential Revision: https://reviews.facebook.net/D48285
  • Loading branch information
yhchiang committed Nov 11, 2015
1 parent 7ed2c3e commit e114f0a
Show file tree
Hide file tree
Showing 18 changed files with 455 additions and 18 deletions.
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,7 @@ set(TESTS
db/memtable_list_test.cc
db/merge_test.cc
db/merge_helper_test.cc
db/options_file_test.cc
db/perf_context_test.cc
db/plain_table_db_test.cc
db/prefix_test.cc
Expand Down
1 change: 1 addition & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
* Introduce CreateLoggerFromOptions(), this function create a Logger for provided DBOptions.
* Add GetAggregatedIntProperty(), which returns the sum of the GetIntProperty of all the column families.
* Add MemoryUtil in rocksdb/utilities/memory.h. It currently offers a way to get the memory usage by type from a list rocksdb instances.
* RocksDB will now persist options under the same directory as the RocksDB database on successful DB::Open, CreateColumnFamily, DropColumnFamily, and SetOptions.
### Public API Changes
* CompactionFilter::Context includes information of Column Family ID
* The need-compaction hint given by TablePropertiesCollector::NeedCompact() will be persistent and recoverable after DB recovery. This introduces a breaking format change. If you use this experimental feature, including NewCompactOnDeletionCollectorFactory() in the new version, you may not be able to directly downgrade the DB back to version 4.0 or lower.
Expand Down
4 changes: 4 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,7 @@ TESTS = \
memory_test \
merge_test \
merger_test \
options_file_test \
redis_test \
reduce_levels_test \
plain_table_db_test \
Expand Down Expand Up @@ -892,6 +893,9 @@ merge_test: db/merge_test.o $(LIBOBJECTS) $(TESTHARNESS)
merger_test: table/merger_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(AM_LINK)

options_file_test: db/options_file_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(AM_LINK)

deletefile_test: db/deletefile_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(AM_LINK)

Expand Down
1 change: 1 addition & 0 deletions db/column_family_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ class ColumnFamilyTest : public testing::Test {
env_ = new EnvCounter(Env::Default());
dbname_ = test::TmpDir() + "/column_family_test";
db_options_.create_if_missing = true;
db_options_.fail_if_options_file_error = true;
db_options_.env = env_;
DestroyDB(dbname_, Options(db_options_, column_family_options_));
}
Expand Down
170 changes: 169 additions & 1 deletion db/db_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <algorithm>
#include <climits>
#include <cstdio>
#include <map>
#include <set>
#include <stdexcept>
#include <string>
Expand Down Expand Up @@ -84,6 +85,8 @@
#include "util/log_buffer.h"
#include "util/logging.h"
#include "util/mutexlock.h"
#include "util/options_helper.h"
#include "util/options_parser.h"
#include "util/perf_context_imp.h"
#include "util/stop_watch.h"
#include "util/string_util.h"
Expand Down Expand Up @@ -734,8 +737,12 @@ void DBImpl::PurgeObsoleteFiles(const JobContext& state) {
// Also, SetCurrentFile creates a temp file when writing out new
// manifest, which is equal to state.pending_manifest_file_number. We
// should not delete that file
//
// TODO(yhchiang): carefully modify the third condition to safely
// remove the temp options files.
keep = (sst_live_map.find(number) != sst_live_map.end()) ||
(number == state.pending_manifest_file_number);
(number == state.pending_manifest_file_number) ||
(to_delete.find(kOptionsFileNamePrefix) != std::string::npos);
break;
case kInfoLogFile:
keep = true;
Expand All @@ -747,6 +754,7 @@ void DBImpl::PurgeObsoleteFiles(const JobContext& state) {
case kDBLockFile:
case kIdentityFile:
case kMetaDatabase:
case kOptionsFile:
keep = true;
break;
}
Expand Down Expand Up @@ -1922,6 +1930,19 @@ Status DBImpl::SetOptions(ColumnFamilyHandle* column_family,
new_options = *cfd->GetLatestMutableCFOptions();
}
}
if (s.ok()) {
Status persist_options_status = WriteOptionsFile();
if (!persist_options_status.ok()) {
if (db_options_.fail_if_options_file_error) {
s = Status::IOError(
"SetOptions succeeded, but unable to persist options",
persist_options_status.ToString());
}
Warn(db_options_.info_log,
"Unable to persist options in SetOptions() -- %s",
persist_options_status.ToString().c_str());
}
}

Log(InfoLogLevel::INFO_LEVEL, db_options_.info_log,
"SetOptions() on column family [%s], inputs:",
Expand Down Expand Up @@ -3458,6 +3479,18 @@ Status DBImpl::CreateColumnFamily(const ColumnFamilyOptions& cf_options,

// this is outside the mutex
if (s.ok()) {
Status persist_options_status = WriteOptionsFile();
if (!persist_options_status.ok()) {
if (db_options_.fail_if_options_file_error) {
s = Status::IOError(
"ColumnFamily has been created, but unable to persist"
"options in CreateColumnFamily()",
persist_options_status.ToString().c_str());
}
Warn(db_options_.info_log,
"Unable to persist options in CreateColumnFamily() -- %s",
persist_options_status.ToString().c_str());
}
NewThreadStatusCfInfo(
reinterpret_cast<ColumnFamilyHandleImpl*>(*handle)->cfd());
}
Expand Down Expand Up @@ -3515,6 +3548,18 @@ Status DBImpl::DropColumnFamily(ColumnFamilyHandle* column_family) {
auto* mutable_cf_options = cfd->GetLatestMutableCFOptions();
max_total_in_memory_state_ -= mutable_cf_options->write_buffer_size *
mutable_cf_options->max_write_buffer_number;
auto options_persist_status = WriteOptionsFile();
if (!options_persist_status.ok()) {
if (db_options_.fail_if_options_file_error) {
s = Status::IOError(
"ColumnFamily has been dropped, but unable to persist "
"options in DropColumnFamily()",
options_persist_status.ToString().c_str());
}
Warn(db_options_.info_log,
"Unable to persist options in DropColumnFamily() -- %s",
options_persist_status.ToString().c_str());
}
Log(InfoLogLevel::INFO_LEVEL, db_options_.info_log,
"Dropped column family with id %u\n",
cfd->GetID());
Expand Down Expand Up @@ -4931,13 +4976,27 @@ Status DB::Open(const DBOptions& db_options, const std::string& dbname,
impl);
LogFlush(impl->db_options_.info_log);

auto persist_options_status = impl->WriteOptionsFile();
if (!persist_options_status.ok()) {
if (db_options.fail_if_options_file_error) {
s = Status::IOError(
"DB::Open() failed --- Unable to persist Options file",
persist_options_status.ToString());
}
Warn(impl->db_options_.info_log,
"Unable to persist options in DB::Open() -- %s",
persist_options_status.ToString().c_str());
}
}
if (s.ok()) {
*dbptr = impl;
} else {
for (auto* h : *handles) {
delete h;
}
handles->clear();
delete impl;
*dbptr = nullptr;
}
return s;
}
Expand Down Expand Up @@ -5034,6 +5093,7 @@ Status DestroyDB(const std::string& dbname, const Options& options) {
}
}
}

// ignore case where no archival directory is present.
env->DeleteDir(archivedir);

Expand All @@ -5045,6 +5105,114 @@ Status DestroyDB(const std::string& dbname, const Options& options) {
return result;
}

Status DBImpl::WriteOptionsFile() {
#ifndef ROCKSDB_LITE
std::string file_name;
Status s = WriteOptionsToTempFile(&file_name);
if (!s.ok()) {
return s;
}
s = RenameTempFileToOptionsFile(file_name);
return s;
#else
return Status::OK();
#endif // !ROCKSDB_LITE
}

Status DBImpl::WriteOptionsToTempFile(std::string* file_name) {
#ifndef ROCKSDB_LITE
std::vector<std::string> cf_names;
std::vector<ColumnFamilyOptions> cf_opts;
{
InstrumentedMutexLock l(&mutex_);
// This part requires mutex to protect the column family options
for (auto cfd : *versions_->GetColumnFamilySet()) {
if (cfd->IsDropped()) {
continue;
}
cf_names.push_back(cfd->GetName());
cf_opts.push_back(BuildColumnFamilyOptions(
*cfd->options(), *cfd->GetLatestMutableCFOptions()));
}
}
*file_name = TempOptionsFileName(GetName(), versions_->NewFileNumber());

Status s = PersistRocksDBOptions(GetDBOptions(), cf_names, cf_opts,
*file_name, GetEnv());
return s;
#else
return Status::OK();
#endif // !ROCKSDB_LITE
}

#ifndef ROCKSDB_LITE
namespace {
void DeleteOptionsFilesHelper(const std::map<uint64_t, std::string>& filenames,
const size_t num_files_to_keep,
const std::shared_ptr<Logger>& info_log,
Env* env) {
if (filenames.size() <= num_files_to_keep) {
return;
}
for (auto iter = std::next(filenames.begin(), num_files_to_keep);
iter != filenames.end(); ++iter) {
if (!env->DeleteFile(iter->second).ok()) {
Warn(info_log, "Unable to delete options file %s", iter->second.c_str());
}
}
}
} // namespace
#endif // !ROCKSDB_LITE

Status DBImpl::DeleteObsoleteOptionsFiles() {
#ifndef ROCKSDB_LITE
options_files_mutex_.AssertHeld();

std::vector<std::string> filenames;
// use ordered map to store keep the filenames sorted from the newest
// to the oldest.
std::map<uint64_t, std::string> options_filenames;
Status s;
s = GetEnv()->GetChildren(GetName(), &filenames);
if (!s.ok()) {
return s;
}
for (auto& filename : filenames) {
uint64_t file_number;
FileType type;
if (ParseFileName(filename, &file_number, &type) && type == kOptionsFile) {
options_filenames.insert(
{std::numeric_limits<uint64_t>::max() - file_number,
GetName() + "/" + filename});
}
}

// Keeps the latest 2 Options file
const size_t kNumOptionsFilesKept = 2;
DeleteOptionsFilesHelper(options_filenames, kNumOptionsFilesKept,
db_options_.info_log, GetEnv());
return Status::OK();
#else
return Status::OK();
#endif // !ROCKSDB_LITE
}

Status DBImpl::RenameTempFileToOptionsFile(const std::string& file_name) {
#ifndef ROCKSDB_LITE
InstrumentedMutexLock l(&options_files_mutex_);
Status s;
std::string options_file_name =
OptionsFileName(GetName(), versions_->NewFileNumber());
// Retry if the file name happen to conflict with an existing one.
s = GetEnv()->RenameFile(file_name, options_file_name);

DeleteObsoleteOptionsFiles();
return s;
#else
return Status::OK();
#endif // !ROCKSDB_LITE
}

#if ROCKSDB_USING_THREAD_STATUS

void DBImpl::NewThreadStatusCfInfo(
Expand Down
12 changes: 12 additions & 0 deletions db/db_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -398,6 +398,13 @@ class DBImpl : public DB {
SuperVersion* super_version,
Arena* arena);

// The following options file related functions should not be
// called while DB mutex is held.
Status WriteOptionsFile();
Status WriteOptionsToTempFile(std::string* file_name);
Status RenameTempFileToOptionsFile(const std::string& file_name);
Status DeleteObsoleteOptionsFiles();

void NotifyOnFlushCompleted(ColumnFamilyData* cfd, FileMetaData* file_meta,
const MutableCFOptions& mutable_cf_options,
int job_id, TableProperties prop);
Expand Down Expand Up @@ -552,8 +559,13 @@ class DBImpl : public DB {
// Lock over the persistent DB state. Non-nullptr iff successfully acquired.
FileLock* db_lock_;

// The mutex for options file related operations.
// NOTE: should never acquire options_file_mutex_ and mutex_ at the
// same time.
InstrumentedMutex options_files_mutex_;
// State below is protected by mutex_
InstrumentedMutex mutex_;

std::atomic<bool> shutting_down_;
// This condition variable is signaled on these conditions:
// * whenever bg_compaction_scheduled_ goes down to 0
Expand Down
1 change: 1 addition & 0 deletions db/db_test_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -368,6 +368,7 @@ Options DBTestBase::CurrentOptions(
}
options.env = env_;
options.create_if_missing = true;
options.fail_if_options_file_error = true;
return options;
}

Expand Down
Loading

0 comments on commit e114f0a

Please sign in to comment.