Skip to content

Commit

Permalink
Enable unsynced data loss in crash test (facebook#9947)
Browse files Browse the repository at this point in the history
Summary:
`db_stress` already tracks expected state history to verify prefix-recoverability when `sync_fault_injection` is enabled. This PR enables `sync_fault_injection` in `db_crashtest.py`.

Previously enabling `sync_fault_injection` would cause whole unsynced files to be dropped. This PR adds a more interesting case of losing only the tail of unsynced data by implementing `TestFSWritableFile::RangeSync()` and enabling `{wal_,}bytes_per_sync`.

Pull Request resolved: facebook#9947

Test Plan:
- regular blackbox, blackbox --simple
- various commands to stress this new case, such as `TEST_TMPDIR=/dev/shm python3 tools/db_crashtest.py blackbox --max_key=100000 --write_buffer_size=2097152 --avoid_flush_during_recovery=1 --disable_wal=0 --interval=10 --db_write_buffer_size=0 --sync_fault_injection=1 --wal_compression=none --delpercent=0 --delrangepercent=0 --prefixpercent=0 --iterpercent=0 --writepercent=100 --readpercent=0 --wal_bytes_per_sync=131072 --duration=36000 --sync=0 --open_write_fault_one_in=16`

Reviewed By: riversand963

Differential Revision: D36152775

Pulled By: ajkr

fbshipit-source-id: 44b68a7fad0a4cf74af9fe1f39be01baab8141d8
  • Loading branch information
ajkr authored and facebook-github-bot committed May 5, 2022
1 parent 49628c9 commit a62506a
Show file tree
Hide file tree
Showing 6 changed files with 47 additions and 1 deletion.
2 changes: 2 additions & 0 deletions db_stress_tool/db_stress_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,8 @@ DECLARE_bool(mock_direct_io);
DECLARE_bool(statistics);
DECLARE_bool(sync);
DECLARE_bool(use_fsync);
DECLARE_uint64(bytes_per_sync);
DECLARE_uint64(wal_bytes_per_sync);
DECLARE_int32(kill_random_test);
DECLARE_string(kill_exclude_prefixes);
DECLARE_bool(disable_wal);
Expand Down
9 changes: 9 additions & 0 deletions db_stress_tool/db_stress_gflags.cc
Original file line number Diff line number Diff line change
Expand Up @@ -516,6 +516,15 @@ DEFINE_bool(sync, false, "Sync all writes to disk");

DEFINE_bool(use_fsync, false, "If true, issue fsync instead of fdatasync");

DEFINE_uint64(bytes_per_sync, ROCKSDB_NAMESPACE::Options().bytes_per_sync,
"If nonzero, sync SST file data incrementally after every "
"`bytes_per_sync` bytes are written");

DEFINE_uint64(wal_bytes_per_sync,
ROCKSDB_NAMESPACE::Options().wal_bytes_per_sync,
"If nonzero, sync WAL file data incrementally after every "
"`bytes_per_sync` bytes are written");

DEFINE_int32(kill_random_test, 0,
"If non-zero, kill at various points in source code with "
"probability 1/this");
Expand Down
2 changes: 2 additions & 0 deletions db_stress_tool/db_stress_test_base.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2368,6 +2368,8 @@ void StressTest::Open(SharedState* shared) {
options_.statistics = dbstats;
options_.env = db_stress_env;
options_.use_fsync = FLAGS_use_fsync;
options_.bytes_per_sync = FLAGS_bytes_per_sync;
options_.wal_bytes_per_sync = FLAGS_wal_bytes_per_sync;
options_.compaction_readahead_size = FLAGS_compaction_readahead_size;
options_.allow_mmap_reads = FLAGS_mmap_read;
options_.allow_mmap_writes = FLAGS_mmap_write;
Expand Down
4 changes: 3 additions & 1 deletion tools/db_crashtest.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,8 @@
# Sync mode might make test runs slower so running it in a smaller chance
"sync" : lambda : random.choice(
[1 if t == 0 else 0 for t in range(0, 20)]),
"bytes_per_sync": lambda: random.choice([0, 262144]),
"wal_bytes_per_sync": lambda: random.choice([0, 524288]),
# Disable compaction_readahead_size because the test is not passing.
#"compaction_readahead_size" : lambda : random.choice(
# [0, 0, 1024 * 1024]),
Expand All @@ -153,7 +155,7 @@
"open_metadata_write_fault_one_in": lambda: random.choice([0, 0, 8]),
"open_write_fault_one_in": lambda: random.choice([0, 0, 16]),
"open_read_fault_one_in": lambda: random.choice([0, 0, 32]),
"sync_fault_injection": False,
"sync_fault_injection": lambda: random.randint(0, 1),
"get_property_one_in": 1000000,
"paranoid_file_checks": lambda: random.choice([0, 1, 1, 1]),
"max_write_buffer_size_to_maintain": lambda: random.choice(
Expand Down
28 changes: 28 additions & 0 deletions utilities/fault_injection_fs.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

#include "utilities/fault_injection_fs.h"

#include <algorithm>
#include <functional>
#include <utility>

Expand Down Expand Up @@ -290,6 +291,33 @@ IOStatus TestFSWritableFile::Sync(const IOOptions& options,
return io_s;
}

IOStatus TestFSWritableFile::RangeSync(uint64_t offset, uint64_t nbytes,
const IOOptions& options,
IODebugContext* dbg) {
if (!fs_->IsFilesystemActive()) {
return fs_->GetError();
}
// Assumes caller passes consecutive byte ranges.
uint64_t sync_limit = offset + nbytes;
uint64_t buf_begin =
state_.pos_at_last_sync_ < 0 ? 0 : state_.pos_at_last_sync_;

IOStatus io_s;
if (sync_limit < buf_begin) {
return io_s;
}
uint64_t num_to_sync = std::min(static_cast<uint64_t>(state_.buffer_.size()),
sync_limit - buf_begin);
Slice buf_to_sync(state_.buffer_.data(), num_to_sync);
io_s = target_->Append(buf_to_sync, options, dbg);
state_.buffer_ = state_.buffer_.substr(num_to_sync);
// Ignore sync errors
target_->RangeSync(offset, nbytes, options, dbg).PermitUncheckedError();
state_.pos_at_last_sync_ = offset + num_to_sync;
fs_->WritableFileSynced(state_);
return io_s;
}

TestFSRandomRWFile::TestFSRandomRWFile(const std::string& /*fname*/,
std::unique_ptr<FSRandomRWFile>&& f,
FaultInjectionTestFS* fs)
Expand Down
3 changes: 3 additions & 0 deletions utilities/fault_injection_fs.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,9 @@ class TestFSWritableFile : public FSWritableFile {
IODebugContext* dbg) override;
virtual IOStatus Flush(const IOOptions&, IODebugContext*) override;
virtual IOStatus Sync(const IOOptions& options, IODebugContext* dbg) override;
virtual IOStatus RangeSync(uint64_t /*offset*/, uint64_t /*nbytes*/,
const IOOptions& options,
IODebugContext* dbg) override;
virtual bool IsSyncThreadSafe() const override { return true; }
virtual IOStatus PositionedAppend(const Slice& data, uint64_t offset,
const IOOptions& options,
Expand Down

0 comments on commit a62506a

Please sign in to comment.