Skip to content

Commit

Permalink
Pass IOStatus to write path and set retryable IO Error as hard error …
Browse files Browse the repository at this point in the history
…in BG jobs (facebook#6487)

Summary:
In the current code base, we use Status to get and store the returned status from the call. Specifically, for IO related functions, the current Status cannot reflect the IO Error details such as error scope, error retryable attribute, and others. With the implementation of facebook#5761, we have the new Wrapper for IO, which returns IOStatus instead of Status. However, the IOStatus is purged at the lower level of write path and transferred to Status.

The first job of this PR is to pass the IOStatus to the write path (flush, WAL write, and Compaction). The second job is to identify the Retryable IO Error as HardError, and set the bg_error_ as HardError. In this case, the DB Instance becomes read only. User is informed of the Status and need to take actions to deal with it (e.g., call db->Resume()).
Pull Request resolved: facebook#6487

Test Plan: Added the testing case to error_handler_fs_test. Pass make asan_check

Reviewed By: anand1976

Differential Revision: D20685017

Pulled By: zhichao-cao

fbshipit-source-id: ff85f042896243abcd6ef37877834e26f36b6eb0
  • Loading branch information
zhichao-cao authored and facebook-github-bot committed Mar 27, 2020
1 parent 2e27697 commit 4246888
Show file tree
Hide file tree
Showing 44 changed files with 881 additions and 317 deletions.
22 changes: 15 additions & 7 deletions db/builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -81,10 +81,11 @@ Status BuildTable(
SnapshotChecker* snapshot_checker, const CompressionType compression,
uint64_t sample_for_compression, const CompressionOptions& compression_opts,
bool paranoid_file_checks, InternalStats* internal_stats,
TableFileCreationReason reason, EventLogger* event_logger, int job_id,
const Env::IOPriority io_priority, TableProperties* table_properties,
int level, const uint64_t creation_time, const uint64_t oldest_key_time,
Env::WriteLifeTimeHint write_hint, const uint64_t file_creation_time) {
TableFileCreationReason reason, IOStatus* io_status,
EventLogger* event_logger, int job_id, const Env::IOPriority io_priority,
TableProperties* table_properties, int level, const uint64_t creation_time,
const uint64_t oldest_key_time, Env::WriteLifeTimeHint write_hint,
const uint64_t file_creation_time) {
assert((column_family_id ==
TablePropertiesCollectorFactory::Context::kUnknownColumnFamily) ==
column_family_name.empty());
Expand Down Expand Up @@ -185,11 +186,13 @@ Status BuildTable(
tp = builder->GetTableProperties();
bool empty = builder->NumEntries() == 0 && tp.num_range_deletions == 0;
s = c_iter.status();
TEST_SYNC_POINT("BuildTable:BeforeFinishBuildTable");
if (!s.ok() || empty) {
builder->Abandon();
} else {
s = builder->Finish();
}
*io_status = builder->io_status();

if (s.ok() && !empty) {
uint64_t file_size = builder->FileSize();
Expand All @@ -209,11 +212,16 @@ Status BuildTable(
// Finish and check for file errors
if (s.ok() && !empty) {
StopWatch sw(env, ioptions.statistics, TABLE_SYNC_MICROS);
s = file_writer->Sync(ioptions.use_fsync);
*io_status = file_writer->Sync(ioptions.use_fsync);
}
if (s.ok() && !empty) {
s = file_writer->Close();
if (io_status->ok() && !empty) {
*io_status = file_writer->Close();
}
if (!io_status->ok()) {
s = *io_status;
}

// TODO Also check the IO status when create the Iterator.

if (s.ok() && !empty) {
// Verify that the table is usable
Expand Down
2 changes: 1 addition & 1 deletion db/builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ extern Status BuildTable(
const uint64_t sample_for_compression,
const CompressionOptions& compression_opts, bool paranoid_file_checks,
InternalStats* internal_stats, TableFileCreationReason reason,
EventLogger* event_logger = nullptr, int job_id = 0,
IOStatus* io_status, EventLogger* event_logger = nullptr, int job_id = 0,
const Env::IOPriority io_priority = Env::IO_HIGH,
TableProperties* table_properties = nullptr, int level = -1,
const uint64_t creation_time = 0, const uint64_t oldest_key_time = 0,
Expand Down
26 changes: 22 additions & 4 deletions db/compaction/compaction_job.cc
Original file line number Diff line number Diff line change
Expand Up @@ -613,8 +613,13 @@ Status CompactionJob::Run() {
}
}

IOStatus io_s;
if (status.ok() && output_directory_) {
status = output_directory_->Fsync(IOOptions(), nullptr);
io_s = output_directory_->Fsync(IOOptions(), nullptr);
}
if (!io_s.ok()) {
io_status_ = io_s;
status = io_s;
}

if (status.ok()) {
Expand Down Expand Up @@ -713,9 +718,13 @@ Status CompactionJob::Install(const MutableCFOptions& mutable_cf_options) {
cfd->internal_stats()->AddCompactionStats(
compact_->compaction->output_level(), thread_pri_, compaction_stats_);

versions_->SetIOStatusOK();
if (status.ok()) {
status = InstallCompactionResults(mutable_cf_options);
}
if (!versions_->io_status().ok()) {
io_status_ = versions_->io_status();
}
VersionStorageInfo::LevelSummaryStorage tmp;
auto vstorage = cfd->current()->storage_info();
const auto& stats = compaction_stats_;
Expand Down Expand Up @@ -1294,6 +1303,10 @@ Status CompactionJob::FinishCompactionOutputFile(
} else {
sub_compact->builder->Abandon();
}
if (!sub_compact->builder->io_status().ok()) {
io_status_ = sub_compact->builder->io_status();
s = io_status_;
}
const uint64_t current_bytes = sub_compact->builder->FileSize();
if (s.ok()) {
// Add the checksum information to file metadata.
Expand All @@ -1307,12 +1320,17 @@ Status CompactionJob::FinishCompactionOutputFile(
sub_compact->total_bytes += current_bytes;

// Finish and check for file errors
IOStatus io_s;
if (s.ok()) {
StopWatch sw(env_, stats_, COMPACTION_OUTFILE_SYNC_MICROS);
s = sub_compact->outfile->Sync(db_options_.use_fsync);
io_s = sub_compact->outfile->Sync(db_options_.use_fsync);
}
if (s.ok()) {
s = sub_compact->outfile->Close();
if (io_s.ok()) {
io_s = sub_compact->outfile->Close();
}
if (!io_s.ok()) {
io_status_ = io_s;
s = io_s;
}
sub_compact->outfile.reset();

Expand Down
4 changes: 4 additions & 0 deletions db/compaction/compaction_job.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,9 @@ class CompactionJob {
// Add compaction input/output to the current version
Status Install(const MutableCFOptions& mutable_cf_options);

// Return the IO status
IOStatus io_status() const { return io_status_; }

private:
struct SubcompactionState;

Expand Down Expand Up @@ -193,6 +196,7 @@ class CompactionJob {
std::vector<uint64_t> sizes_;
Env::WriteLifeTimeHint write_hint_;
Env::Priority thread_pri_;
IOStatus io_status_;
};

} // namespace ROCKSDB_NAMESPACE
2 changes: 1 addition & 1 deletion db/compaction/compaction_job_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ class CompactionJobTest : public testing::Test {
}
ASSERT_OK(s);
// Make "CURRENT" file that points to the new manifest file.
s = SetCurrentFile(env_, dbname_, 1, nullptr);
s = SetCurrentFile(fs_.get(), dbname_, 1, nullptr);

std::vector<ColumnFamilyDescriptor> column_families;
cf_options_.table_factory = mock_table_factory_;
Expand Down
34 changes: 24 additions & 10 deletions db/db_impl/db_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,11 @@ Status DBImpl::ResumeImpl() {
s = bg_error;
}

// Make sure the IO Status stored in version set is set to OK.
if(s.ok()) {
versions_->SetIOStatusOK();
}

// We cannot guarantee consistency of the WAL. So force flush Memtables of
// all the column families
if (s.ok()) {
Expand Down Expand Up @@ -1146,25 +1151,25 @@ int DBImpl::FindMinimumEmptyLevelFitting(

Status DBImpl::FlushWAL(bool sync) {
if (manual_wal_flush_) {
Status s;
IOStatus io_s;
{
// We need to lock log_write_mutex_ since logs_ might change concurrently
InstrumentedMutexLock wl(&log_write_mutex_);
log::Writer* cur_log_writer = logs_.back().writer;
s = cur_log_writer->WriteBuffer();
io_s = cur_log_writer->WriteBuffer();
}
if (!s.ok()) {
if (!io_s.ok()) {
ROCKS_LOG_ERROR(immutable_db_options_.info_log, "WAL flush error %s",
s.ToString().c_str());
io_s.ToString().c_str());
// In case there is a fs error we should set it globally to prevent the
// future writes
WriteStatusCheck(s);
IOStatusCheck(io_s);
// whether sync or not, we should abort the rest of function upon error
return s;
return std::move(io_s);
}
if (!sync) {
ROCKS_LOG_DEBUG(immutable_db_options_.info_log, "FlushWAL sync=false");
return s;
return std::move(io_s);
}
}
if (!sync) {
Expand Down Expand Up @@ -1216,12 +1221,21 @@ Status DBImpl::SyncWAL() {
TEST_SYNC_POINT("DBWALTest::SyncWALNotWaitWrite:1");
RecordTick(stats_, WAL_FILE_SYNCED);
Status status;
IOStatus io_s;
for (log::Writer* log : logs_to_sync) {
status = log->file()->SyncWithoutFlush(immutable_db_options_.use_fsync);
if (!status.ok()) {
io_s = log->file()->SyncWithoutFlush(immutable_db_options_.use_fsync);
if (!io_s.ok()) {
status = io_s;
break;
}
}
if (!io_s.ok()) {
ROCKS_LOG_ERROR(immutable_db_options_.info_log, "WAL Sync error %s",
io_s.ToString().c_str());
// In case there is a fs error we should set it globally to prevent the
// future writes
IOStatusCheck(io_s);
}
if (status.ok() && need_log_dir_sync) {
status = directories_.GetWalDir()->Fsync(IOOptions(), nullptr);
}
Expand All @@ -1248,7 +1262,7 @@ Status DBImpl::LockWAL() {
// future writes
WriteStatusCheck(status);
}
return status;
return std::move(status);
}

Status DBImpl::UnlockWAL() {
Expand Down
24 changes: 14 additions & 10 deletions db/db_impl/db_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -1364,7 +1364,7 @@ class DBImpl : public DB {
void ReleaseFileNumberFromPendingOutputs(
std::unique_ptr<std::list<uint64_t>::iterator>& v);

Status SyncClosedLogs(JobContext* job_context);
IOStatus SyncClosedLogs(JobContext* job_context);

// Flush the in-memory write buffer to storage. Switches to a new
// log-file/memtable and writes a new descriptor iff successful. Then
Expand Down Expand Up @@ -1501,21 +1501,25 @@ class DBImpl : public DB {
WriteBatch* tmp_batch, size_t* write_with_wal,
WriteBatch** to_be_cached_state);

Status WriteToWAL(const WriteBatch& merged_batch, log::Writer* log_writer,
uint64_t* log_used, uint64_t* log_size);
IOStatus WriteToWAL(const WriteBatch& merged_batch, log::Writer* log_writer,
uint64_t* log_used, uint64_t* log_size);

Status WriteToWAL(const WriteThread::WriteGroup& write_group,
log::Writer* log_writer, uint64_t* log_used,
bool need_log_sync, bool need_log_dir_sync,
SequenceNumber sequence);
IOStatus WriteToWAL(const WriteThread::WriteGroup& write_group,
log::Writer* log_writer, uint64_t* log_used,
bool need_log_sync, bool need_log_dir_sync,
SequenceNumber sequence);

Status ConcurrentWriteToWAL(const WriteThread::WriteGroup& write_group,
uint64_t* log_used, SequenceNumber* last_sequence,
size_t seq_inc);
IOStatus ConcurrentWriteToWAL(const WriteThread::WriteGroup& write_group,
uint64_t* log_used,
SequenceNumber* last_sequence, size_t seq_inc);

// Used by WriteImpl to update bg_error_ if paranoid check is enabled.
void WriteStatusCheck(const Status& status);

// Used by WriteImpl to update bg_error_ when IO error happens, e.g., write
// WAL, sync WAL fails, if paranoid check is enabled.
void IOStatusCheck(const IOStatus& status);

// Used by WriteImpl to update bg_error_ in case of memtable insert error.
void MemTableInsertStatusCheck(const Status& memtable_insert_status);

Expand Down
Loading

0 comments on commit 4246888

Please sign in to comment.