Skip to content

Commit

Permalink
Allow DB resume after background errors (facebook#3997)
Browse files Browse the repository at this point in the history
Summary:
Currently, if RocksDB encounters errors during a write operation (user requested or BG operations), it sets DBImpl::bg_error_ and fails subsequent writes. This PR allows the DB to be resumed for certain classes of errors. It consists of 3 parts -
1. Introduce Status::Severity in rocksdb::Status to indicate whether a given error can be recovered from or not
2. Refactor the error handling code so that setting bg_error_ and deciding on severity is in one place
3. Provide an API for the user to clear the error and resume the DB instance

This whole change is broken up into multiple PRs. Initially, we only allow clearing the error for Status::NoSpace() errors during background flush/compaction. Subsequent PRs will expand this to include more errors and foreground operations such as Put(), and implement a polling mechanism for out-of-space errors.
Closes facebook#3997

Differential Revision: D8653831

Pulled By: anand1976

fbshipit-source-id: 6dc835c76122443a7668497c0226b4f072bc6afd
  • Loading branch information
Anand Ananthabhotla authored and facebook-github-bot committed Jun 28, 2018
1 parent 26d67e3 commit 52d4c9b
Show file tree
Hide file tree
Showing 23 changed files with 531 additions and 128 deletions.
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -481,6 +481,7 @@ set(SOURCES
db/db_info_dumper.cc
db/db_iter.cc
db/dbformat.cc
db/error_handler.cc
db/event_helpers.cc
db/experimental.cc
db/external_sst_file_ingestion_job.cc
Expand Down Expand Up @@ -877,6 +878,7 @@ if(WITH_TESTS)
db/db_write_test.cc
db/dbformat_test.cc
db/deletefile_test.cc
db/error_handler_test.cc
db/obsolete_files_test.cc
db/external_sst_file_basic_test.cc
db/external_sst_file_test.cc
Expand Down
4 changes: 4 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,7 @@ TESTS = \
db_table_properties_test \
db_statistics_test \
db_write_test \
error_handler_test \
autovector_test \
blob_db_test \
cleanable_test \
Expand Down Expand Up @@ -1194,6 +1195,9 @@ db_statistics_test: db/db_statistics_test.o db/db_test_util.o $(LIBOBJECTS) $(TE
db_write_test: db/db_write_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS)
$(AM_LINK)

error_handler_test: db/error_handler_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS)
$(AM_LINK)

external_sst_file_basic_test: db/external_sst_file_basic_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS)
$(AM_LINK)

Expand Down
1 change: 1 addition & 0 deletions TARGETS
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ cpp_library(
"db/db_info_dumper.cc",
"db/db_iter.cc",
"db/dbformat.cc",
"db/error_handler.cc",
"db/event_helpers.cc",
"db/experimental.cc",
"db/external_sst_file_ingestion_job.cc",
Expand Down
19 changes: 6 additions & 13 deletions db/compaction_job.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,10 @@
#include <vector>

#include "db/builder.h"
#include "db/db_impl.h"
#include "db/db_iter.h"
#include "db/dbformat.h"
#include "db/error_handler.h"
#include "db/event_helpers.h"
#include "db/log_reader.h"
#include "db/log_writer.h"
Expand Down Expand Up @@ -307,7 +309,7 @@ CompactionJob::CompactionJob(
const std::atomic<bool>* shutting_down,
const SequenceNumber preserve_deletes_seqnum, LogBuffer* log_buffer,
Directory* db_directory, Directory* output_directory, Statistics* stats,
InstrumentedMutex* db_mutex, Status* db_bg_error,
InstrumentedMutex* db_mutex, ErrorHandler* db_error_handler,
std::vector<SequenceNumber> existing_snapshots,
SequenceNumber earliest_write_conflict_snapshot,
const SnapshotChecker* snapshot_checker, std::shared_ptr<Cache> table_cache,
Expand All @@ -331,7 +333,7 @@ CompactionJob::CompactionJob(
output_directory_(output_directory),
stats_(stats),
db_mutex_(db_mutex),
db_bg_error_(db_bg_error),
db_error_handler_(db_error_handler),
existing_snapshots_(std::move(existing_snapshots)),
earliest_write_conflict_snapshot_(earliest_write_conflict_snapshot),
snapshot_checker_(snapshot_checker),
Expand Down Expand Up @@ -1282,21 +1284,12 @@ Status CompactionJob::FinishCompactionOutputFile(
if (sfm->IsMaxAllowedSpaceReached()) {
// TODO(ajkr): should we return OK() if max space was reached by the final
// compaction output file (similarly to how flush works when full)?
s = Status::NoSpace("Max allowed space was reached");
s = Status::SpaceLimit("Max allowed space was reached");
TEST_SYNC_POINT(
"CompactionJob::FinishCompactionOutputFile:"
"MaxAllowedSpaceReached");
InstrumentedMutexLock l(db_mutex_);
if (db_bg_error_->ok()) {
Status new_bg_error = s;
// may temporarily unlock and lock the mutex.
EventHelpers::NotifyOnBackgroundError(
cfd->ioptions()->listeners, BackgroundErrorReason::kCompaction,
&new_bg_error, db_mutex_);
if (!new_bg_error.ok()) {
*db_bg_error_ = new_bg_error;
}
}
db_error_handler_->SetBGError(s, BackgroundErrorReason::kCompaction);
}
}
#endif
Expand Down
5 changes: 3 additions & 2 deletions db/compaction_job.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
namespace rocksdb {

class Arena;
class ErrorHandler;
class MemTable;
class SnapshotChecker;
class TableCache;
Expand All @@ -64,7 +65,7 @@ class CompactionJob {
LogBuffer* log_buffer,
Directory* db_directory, Directory* output_directory,
Statistics* stats, InstrumentedMutex* db_mutex,
Status* db_bg_error,
ErrorHandler* db_error_handler,
std::vector<SequenceNumber> existing_snapshots,
SequenceNumber earliest_write_conflict_snapshot,
const SnapshotChecker* snapshot_checker,
Expand Down Expand Up @@ -145,7 +146,7 @@ class CompactionJob {
Directory* output_directory_;
Statistics* stats_;
InstrumentedMutex* db_mutex_;
Status* db_bg_error_;
ErrorHandler* db_error_handler_;
// If there were two snapshots with seq numbers s1 and
// s2 and s1 < s2, and if we find two instances of a key k1 then lies
// entirely within s1 and s2, then the earlier version of k1 can be safely
Expand Down
19 changes: 10 additions & 9 deletions db/compaction_job_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

#include "db/column_family.h"
#include "db/compaction_job.h"
#include "db/error_handler.h"
#include "db/version_set.h"
#include "rocksdb/cache.h"
#include "rocksdb/db.h"
Expand Down Expand Up @@ -77,7 +78,8 @@ class CompactionJobTest : public testing::Test {
&write_controller_)),
shutting_down_(false),
preserve_deletes_seqnum_(0),
mock_table_factory_(new mock::MockTableFactory()) {
mock_table_factory_(new mock::MockTableFactory()),
error_handler_(db_options_, &mutex_) {
EXPECT_OK(env_->CreateDirIfMissing(dbname_));
db_options_.db_paths.emplace_back(dbname_,
std::numeric_limits<uint64_t>::max());
Expand Down Expand Up @@ -255,13 +257,12 @@ class CompactionJobTest : public testing::Test {
EventLogger event_logger(db_options_.info_log.get());
// TODO(yiwu) add a mock snapshot checker and add test for it.
SnapshotChecker* snapshot_checker = nullptr;
CompactionJob compaction_job(0, &compaction, db_options_, env_options_,
versions_.get(), &shutting_down_,
preserve_deletes_seqnum_, &log_buffer,
nullptr, nullptr, nullptr, &mutex_, &bg_error_,
snapshots, earliest_write_conflict_snapshot,
snapshot_checker, table_cache_, &event_logger,
false, false, dbname_, &compaction_job_stats_);
CompactionJob compaction_job(
0, &compaction, db_options_, env_options_, versions_.get(),
&shutting_down_, preserve_deletes_seqnum_, &log_buffer, nullptr,
nullptr, nullptr, &mutex_, &error_handler_, snapshots,
earliest_write_conflict_snapshot, snapshot_checker, table_cache_,
&event_logger, false, false, dbname_, &compaction_job_stats_);
VerifyInitializationOfCompactionJobStats(compaction_job_stats_);

compaction_job.Prepare();
Expand Down Expand Up @@ -303,7 +304,7 @@ class CompactionJobTest : public testing::Test {
ColumnFamilyData* cfd_;
std::unique_ptr<CompactionFilter> compaction_filter_;
std::shared_ptr<MergeOperator> merge_op_;
Status bg_error_;
ErrorHandler error_handler_;
};

TEST_F(CompactionJobTest, Simple) {
Expand Down
45 changes: 42 additions & 3 deletions db/db_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
#include "db/db_info_dumper.h"
#include "db/db_iter.h"
#include "db/dbformat.h"
#include "db/error_handler.h"
#include "db/event_helpers.h"
#include "db/external_sst_file_ingestion_job.h"
#include "db/flush_job.h"
Expand Down Expand Up @@ -215,7 +216,8 @@ DBImpl::DBImpl(const DBOptions& options, const std::string& dbname,
// as well.
use_custom_gc_(seq_per_batch),
preserve_deletes_(options.preserve_deletes),
closed_(false) {
closed_(false),
error_handler_(immutable_db_options_, &mutex_) {
env_->GetAbsolutePath(dbname, &db_absolute_path_);

// Reserve ten files or so for other uses and give the rest to TableCache.
Expand Down Expand Up @@ -244,6 +246,43 @@ DBImpl::DBImpl(const DBOptions& options, const std::string& dbname,
preserve_deletes_seqnum_.store(0);
}

Status DBImpl::Resume() {
ROCKS_LOG_INFO(immutable_db_options_.info_log, "Resuming DB");

InstrumentedMutexLock db_mutex(&mutex_);

if (!error_handler_.IsDBStopped() && !error_handler_.IsBGWorkStopped()) {
// Nothing to do
return Status::OK();
}

Status s = error_handler_.GetBGError();
if (s.severity() > Status::Severity::kHardError) {
ROCKS_LOG_INFO(immutable_db_options_.info_log,
"DB resume requested but failed due to Fatal/Unrecoverable error");
return s;
}

JobContext job_context(0);
FindObsoleteFiles(&job_context, true);
error_handler_.ClearBGError();
mutex_.Unlock();

job_context.manifest_file_number = 1;
if (job_context.HaveSomethingToDelete()) {
PurgeObsoleteFiles(job_context);
}
job_context.Clean();

ROCKS_LOG_INFO(immutable_db_options_.info_log, "Successfully resumed DB");
mutex_.Lock();
MaybeScheduleFlushOrCompaction();

// No need to check BGError again. If something happened, event listener would be
// notified and the operation causing it would have failed
return Status::OK();
}

// Will lock the mutex_, will wait for completion if wait is true
void DBImpl::CancelAllBackgroundWork(bool wait) {
InstrumentedMutexLock l(&mutex_);
Expand Down Expand Up @@ -2879,9 +2918,9 @@ Status DBImpl::IngestExternalFile(
std::list<uint64_t>::iterator pending_output_elem;
{
InstrumentedMutexLock l(&mutex_);
if (!bg_error_.ok()) {
if (error_handler_.IsDBStopped()) {
// Don't ingest files when there is a bg_error
return bg_error_;
return error_handler_.GetBGError();
}

// Make sure that bg cleanup wont delete the files that we are ingesting
Expand Down
10 changes: 7 additions & 3 deletions db/db_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
#include "db/compaction_job.h"
#include "db/dbformat.h"
#include "db/external_sst_file_ingestion_job.h"
#include "db/error_handler.h"
#include "db/event_helpers.h"
#include "db/flush_job.h"
#include "db/flush_scheduler.h"
#include "db/internal_stats.h"
Expand Down Expand Up @@ -73,6 +75,9 @@ class DBImpl : public DB {
const bool seq_per_batch = false);
virtual ~DBImpl();

using DB::Resume;
virtual Status Resume() override;

// Implementations of the DB interface
using DB::Put;
virtual Status Put(const WriteOptions& options,
Expand Down Expand Up @@ -1265,9 +1270,6 @@ class DBImpl : public DB {
PrepickedCompaction* prepicked_compaction;
};

// Have we encountered a background error in paranoid mode?
Status bg_error_;

// shall we disable deletion of obsolete files
// if 0 the deletion is enabled.
// if non-zero, files will not be getting deleted
Expand Down Expand Up @@ -1425,6 +1427,8 @@ class DBImpl : public DB {

// Flag to check whether Close() has been called on this DB
bool closed_;

ErrorHandler error_handler_;
};

extern Options SanitizeOptions(const std::string& db,
Expand Down
Loading

0 comments on commit 52d4c9b

Please sign in to comment.