Skip to content

Commit 9e7b7e2

Browse files
anand76facebook-github-bot
anand76
authored andcommitted
Silence false alarms in db_stress fault injection (facebook#6741)
Summary: False alarms are caused by codepaths that intentionally swallow IO errors. Tests: make crash_test Pull Request resolved: facebook#6741 Reviewed By: ltamasi Differential Revision: D21181138 Pulled By: anand1976 fbshipit-source-id: 5ccfbc68eb192033488de6269e59c00f2c65ce00
1 parent e04f3bc commit 9e7b7e2

12 files changed

+73
-40
lines changed

db_stress_tool/db_stress_shared_state.cc

+3-3
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,12 @@ const uint32_t SharedState::UNKNOWN_SENTINEL = 0xfffffffe;
1616
const uint32_t SharedState::DELETION_SENTINEL = 0xffffffff;
1717
#if defined(ROCKSDB_SUPPORT_THREAD_LOCAL)
1818
#if defined(OS_SOLARIS)
19-
__thread bool SharedState::filter_read_error;
19+
__thread bool SharedState::ignore_read_error;
2020
#else
21-
thread_local bool SharedState::filter_read_error;
21+
thread_local bool SharedState::ignore_read_error;
2222
#endif // OS_SOLARIS
2323
#else
24-
bool SharedState::filter_read_error;
24+
bool SharedState::ignore_read_error;
2525
#endif // ROCKSDB_SUPPORT_THREAD_LOCAL
2626
} // namespace ROCKSDB_NAMESPACE
2727
#endif // GFLAGS

db_stress_tool/db_stress_shared_state.h

+7-7
Original file line numberDiff line numberDiff line change
@@ -48,12 +48,12 @@ class SharedState {
4848
// for those calls
4949
#if defined(ROCKSDB_SUPPORT_THREAD_LOCAL)
5050
#if defined(OS_SOLARIS)
51-
static __thread bool filter_read_error;
51+
static __thread bool ignore_read_error;
5252
#else
53-
static thread_local bool filter_read_error;
53+
static thread_local bool ignore_read_error;
5454
#endif // OS_SOLARIS
5555
#else
56-
static bool filter_read_error;
56+
static bool ignore_read_error;
5757
#endif // ROCKSDB_SUPPORT_THREAD_LOCAL
5858

5959
SharedState(Env* env, StressTest* stress_test)
@@ -192,8 +192,8 @@ class SharedState {
192192
}
193193
#ifndef NDEBUG
194194
if (FLAGS_read_fault_one_in) {
195-
SyncPoint::GetInstance()->SetCallBack("FilterReadError",
196-
FilterReadErrorCallback);
195+
SyncPoint::GetInstance()->SetCallBack("FaultInjectionIgnoreError",
196+
IgnoreReadErrorCallback);
197197
SyncPoint::GetInstance()->EnableProcessing();
198198
}
199199
#endif // NDEBUG
@@ -362,8 +362,8 @@ class SharedState {
362362
}
363363

364364
private:
365-
static void FilterReadErrorCallback(void*) {
366-
filter_read_error = true;
365+
static void IgnoreReadErrorCallback(void*) {
366+
ignore_read_error = true;
367367
}
368368

369369
port::Mutex mu_;

db_stress_tool/no_batched_ops_stress.cc

+4-4
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ class NonBatchedOpsStressTest : public StressTest {
152152
#ifndef NDEBUG
153153
if (fault_fs_guard) {
154154
fault_fs_guard->EnableErrorInjection();
155-
SharedState::filter_read_error = false;
155+
SharedState::ignore_read_error = false;
156156
}
157157
#endif // NDEBUG
158158
Status s = db_->Get(read_opts, cfh, key, &from_db);
@@ -164,7 +164,7 @@ class NonBatchedOpsStressTest : public StressTest {
164164
if (s.ok()) {
165165
#ifndef NDEBUG
166166
if (fault_fs_guard) {
167-
if (error_count && !SharedState::filter_read_error) {
167+
if (error_count && !SharedState::ignore_read_error) {
168168
// Grab mutex so multiple thread don't try to print the
169169
// stack trace at the same time
170170
MutexLock l(thread->shared->GetMutex());
@@ -272,7 +272,7 @@ class NonBatchedOpsStressTest : public StressTest {
272272
#ifndef NDEBUG
273273
if (fault_fs_guard) {
274274
fault_fs_guard->EnableErrorInjection();
275-
SharedState::filter_read_error = false;
275+
SharedState::ignore_read_error = false;
276276
}
277277
#endif // NDEBUG
278278
db_->MultiGet(read_opts, cfh, num_keys, keys.data(), values.data(),
@@ -291,7 +291,7 @@ class NonBatchedOpsStressTest : public StressTest {
291291
}
292292

293293
#ifndef NDEBUG
294-
if (fault_fs_guard && error_count && !SharedState::filter_read_error) {
294+
if (fault_fs_guard && error_count && !SharedState::ignore_read_error) {
295295
int stat_nok = 0;
296296
for (const auto& s : statuses) {
297297
if (!s.ok() && !s.IsNotFound()) {

file/file_prefetch_buffer.cc

+10-1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include "monitoring/iostats_context_imp.h"
1818
#include "port/port.h"
1919
#include "test_util/sync_point.h"
20+
#include "test_util/testharness.h"
2021
#include "util/random.h"
2122
#include "util/rate_limiter.h"
2223

@@ -86,9 +87,17 @@ Status FilePrefetchBuffer::Prefetch(RandomAccessFileReader* reader,
8687
}
8788

8889
Slice result;
90+
size_t read_len = static_cast<size_t>(roundup_len - chunk_len);
8991
s = reader->Read(rounddown_offset + chunk_len,
90-
static_cast<size_t>(roundup_len - chunk_len), &result,
92+
read_len, &result,
9193
buffer_.BufferStart() + chunk_len, nullptr, for_compaction);
94+
#ifndef NDEBUG
95+
if (!s.ok() || result.size() < read_len) {
96+
// Fake an IO error to force db_stress fault injection to ignore
97+
// truncated read errors
98+
IGNORE_STATUS_IF_ERROR(Status::IOError());
99+
}
100+
#endif
92101
if (s.ok()) {
93102
buffer_offset_ = rounddown_offset;
94103
buffer_.Size(static_cast<size_t>(chunk_len) + result.size());

table/block_based/block_based_filter_block.cc

+2
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "monitoring/perf_context_imp.h"
1515
#include "rocksdb/filter_policy.h"
1616
#include "table/block_based/block_based_table_reader.h"
17+
#include "test_util/testharness.h"
1718
#include "util/coding.h"
1819
#include "util/string_util.h"
1920

@@ -184,6 +185,7 @@ std::unique_ptr<FilterBlockReader> BlockBasedFilterBlockReader::Create(
184185
use_cache, nullptr /* get_context */,
185186
lookup_context, &filter_block);
186187
if (!s.ok()) {
188+
IGNORE_STATUS_IF_ERROR(s);
187189
return std::unique_ptr<FilterBlockReader>();
188190
}
189191

table/block_based/block_based_table_reader.cc

+5-5
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@
5252
#include "table/sst_file_writer_collectors.h"
5353
#include "table/two_level_iterator.h"
5454

55+
#include "test_util/testharness.h"
5556
#include "monitoring/perf_context_imp.h"
5657
#include "port/lang.h"
5758
#include "test_util/sync_point.h"
@@ -728,6 +729,7 @@ Status BlockBasedTable::PrefetchTail(
728729
nullptr, 0, 0, true /* enable */, true /* track_min_offset */));
729730
s = (*prefetch_buffer)->Prefetch(file, prefetch_off, prefetch_len);
730731
}
732+
731733
return s;
732734
}
733735

@@ -788,10 +790,12 @@ Status BlockBasedTable::ReadPropertiesBlock(
788790
nullptr /* ret_block_handle */, nullptr /* ret_block_contents */,
789791
false /* compression_type_missing */, nullptr /* memory_allocator */);
790792
}
793+
IGNORE_STATUS_IF_ERROR(s);
791794

792795
if (s.IsCorruption()) {
793796
s = TryReadPropertiesWithGlobalSeqno(prefetch_buffer, meta_iter->value(),
794797
&table_properties);
798+
IGNORE_STATUS_IF_ERROR(s);
795799
}
796800
std::unique_ptr<TableProperties> props_guard;
797801
if (table_properties != nullptr) {
@@ -890,6 +894,7 @@ Status BlockBasedTable::ReadRangeDelBlock(
890894
rep_->ioptions.info_log,
891895
"Encountered error while reading data from range del block %s",
892896
s.ToString().c_str());
897+
IGNORE_STATUS_IF_ERROR(s);
893898
} else {
894899
rep_->fragmented_range_dels =
895900
std::make_shared<FragmentedRangeTombstoneList>(std::move(iter),
@@ -994,11 +999,6 @@ Status BlockBasedTable::PrefetchIndexAndFilterBlocks(
994999
auto filter = new_table->CreateFilterBlockReader(
9951000
prefetch_buffer, use_cache, prefetch_filter, pin_filter,
9961001
lookup_context);
997-
#ifndef NDEBUG
998-
if (rep_->filter_type != Rep::FilterType::kNoFilter && !filter) {
999-
TEST_SYNC_POINT("FilterReadError");
1000-
}
1001-
#endif
10021002
if (filter) {
10031003
// Refer to the comment above about paritioned indexes always being cached
10041004
if (prefetch_all) {

table/block_based/full_filter_block.cc

+4-2
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include "port/port.h"
1212
#include "rocksdb/filter_policy.h"
1313
#include "table/block_based/block_based_table_reader.h"
14+
#include "test_util/testharness.h"
1415
#include "util/coding.h"
1516

1617
namespace ROCKSDB_NAMESPACE {
@@ -132,6 +133,7 @@ std::unique_ptr<FilterBlockReader> FullFilterBlockReader::Create(
132133
use_cache, nullptr /* get_context */,
133134
lookup_context, &filter_block);
134135
if (!s.ok()) {
136+
IGNORE_STATUS_IF_ERROR(s);
135137
return std::unique_ptr<FilterBlockReader>();
136138
}
137139

@@ -164,7 +166,7 @@ bool FullFilterBlockReader::MayMatch(
164166
const Status s =
165167
GetOrReadFilterBlock(no_io, get_context, lookup_context, &filter_block);
166168
if (!s.ok()) {
167-
TEST_SYNC_POINT("FilterReadError");
169+
IGNORE_STATUS_IF_ERROR(s);
168170
return true;
169171
}
170172

@@ -222,7 +224,7 @@ void FullFilterBlockReader::MayMatch(
222224
const Status s = GetOrReadFilterBlock(no_io, range->begin()->get_context,
223225
lookup_context, &filter_block);
224226
if (!s.ok()) {
225-
TEST_SYNC_POINT("FilterReadError");
227+
IGNORE_STATUS_IF_ERROR(s);
226228
return;
227229
}
228230

table/block_based/partitioned_filter_block.cc

+6-13
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include "rocksdb/filter_policy.h"
1414
#include "table/block_based/block.h"
1515
#include "table/block_based/block_based_table_reader.h"
16+
#include "test_util/testharness.h"
1617
#include "util/coding.h"
1718

1819
namespace ROCKSDB_NAMESPACE {
@@ -143,6 +144,7 @@ std::unique_ptr<FilterBlockReader> PartitionedFilterBlockReader::Create(
143144
use_cache, nullptr /* get_context */,
144145
lookup_context, &filter_block);
145146
if (!s.ok()) {
147+
IGNORE_STATUS_IF_ERROR(s);
146148
return std::unique_ptr<FilterBlockReader>();
147149
}
148150

@@ -254,7 +256,7 @@ bool PartitionedFilterBlockReader::MayMatch(
254256
Status s =
255257
GetOrReadFilterBlock(no_io, get_context, lookup_context, &filter_block);
256258
if (UNLIKELY(!s.ok())) {
257-
TEST_SYNC_POINT("FilterReadError");
259+
IGNORE_STATUS_IF_ERROR(s);
258260
return true;
259261
}
260262

@@ -272,7 +274,7 @@ bool PartitionedFilterBlockReader::MayMatch(
272274
no_io, get_context, lookup_context,
273275
&filter_partition_block);
274276
if (UNLIKELY(!s.ok())) {
275-
TEST_SYNC_POINT("FilterReadError");
277+
IGNORE_STATUS_IF_ERROR(s);
276278
return true;
277279
}
278280

@@ -312,7 +314,7 @@ void PartitionedFilterBlockReader::CacheDependencies(bool pin) {
312314
"Error retrieving top-level filter block while trying to "
313315
"cache filter partitions: %s",
314316
s.ToString().c_str());
315-
TEST_SYNC_POINT("FilterReadError");
317+
IGNORE_STATUS_IF_ERROR(s);
316318
return;
317319
}
318320

@@ -343,11 +345,6 @@ void PartitionedFilterBlockReader::CacheDependencies(bool pin) {
343345
prefetch_buffer.reset(new FilePrefetchBuffer());
344346
s = prefetch_buffer->Prefetch(rep->file.get(), prefetch_off,
345347
static_cast<size_t>(prefetch_len));
346-
#ifndef NDEBUG
347-
if (!s.ok()) {
348-
TEST_SYNC_POINT("FilterReadError");
349-
}
350-
#endif
351348

352349
// After prefetch, read the partitions one by one
353350
ReadOptions read_options;
@@ -370,11 +367,7 @@ void PartitionedFilterBlockReader::CacheDependencies(bool pin) {
370367
}
371368
}
372369
}
373-
#ifndef NDEBUG
374-
if (!s.ok()) {
375-
TEST_SYNC_POINT("FilterReadError");
376-
}
377-
#endif
370+
IGNORE_STATUS_IF_ERROR(s);
378371
}
379372
}
380373

table/block_based/partitioned_index_reader.cc

+4
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
// found in the LICENSE file. See the AUTHORS file for names of contributors.
99
#include "table/block_based/partitioned_index_reader.h"
1010
#include "table/block_based/partitioned_index_iterator.h"
11+
#include "test_util/testharness.h"
1112

1213
namespace ROCKSDB_NAMESPACE {
1314
Status PartitionIndexReader::Create(
@@ -116,6 +117,7 @@ void PartitionIndexReader::CacheDependencies(bool pin) {
116117
"Error retrieving top-level index block while trying to "
117118
"cache index partitions: %s",
118119
s.ToString().c_str());
120+
IGNORE_STATUS_IF_ERROR(s);
119121
return;
120122
}
121123

@@ -162,6 +164,8 @@ void PartitionIndexReader::CacheDependencies(bool pin) {
162164
&block, BlockType::kIndex, /*get_context=*/nullptr, &lookup_context,
163165
/*contents=*/nullptr);
164166

167+
IGNORE_STATUS_IF_ERROR(s);
168+
165169
assert(s.ok() || block.GetValue() == nullptr);
166170
if (s.ok() && block.GetValue() != nullptr) {
167171
if (block.IsCached()) {

test_util/fault_injection_test_fs.cc

+6-4
Original file line numberDiff line numberDiff line change
@@ -473,13 +473,14 @@ IOStatus FaultInjectionTestFS::InjectError(ErrorOperation op,
473473
switch (op) {
474474
case kRead:
475475
{
476-
uint32_t type = ctx->rand.Uniform(3);
476+
ErrorType type =
477+
static_cast<ErrorType>(ctx->rand.Uniform(ErrorType::kErrorTypeMax));
477478
switch (type) {
478479
// Inject IO error
479-
case 0:
480+
case ErrorType::kErrorTypeStatus:
480481
return IOStatus::IOError();
481482
// Inject random corruption
482-
case 1:
483+
case ErrorType::kErrorTypeCorruption:
483484
{
484485
if (result->data() == scratch) {
485486
uint64_t offset = ctx->rand.Uniform((uint32_t)result->size());
@@ -496,7 +497,7 @@ IOStatus FaultInjectionTestFS::InjectError(ErrorOperation op,
496497
}
497498
}
498499
// Truncate the result
499-
case 2:
500+
case ErrorType::kErrorTypeTruncated:
500501
{
501502
assert(result->size() > 0);
502503
uint64_t offset = ctx->rand.Uniform((uint32_t)result->size());
@@ -525,6 +526,7 @@ void FaultInjectionTestFS::PrintFaultBacktrace() {
525526
if (ctx == nullptr) {
526527
return;
527528
}
529+
fprintf(stderr, "Injected error type = %d\n", ctx->type);
528530
port::PrintAndFreeStack(ctx->callstack, ctx->frames);
529531
ctx->callstack = nullptr;
530532
#endif

test_util/fault_injection_test_fs.h

+8
Original file line numberDiff line numberDiff line change
@@ -355,13 +355,21 @@ class FaultInjectionTestFS : public FileSystemWrapper {
355355
// to underlying FS for writable files
356356
IOStatus error_;
357357

358+
enum ErrorType : int {
359+
kErrorTypeStatus = 0,
360+
kErrorTypeCorruption,
361+
kErrorTypeTruncated,
362+
kErrorTypeMax
363+
};
364+
358365
struct ErrorContext {
359366
Random rand;
360367
int one_in;
361368
int count;
362369
bool enable_error_injection;
363370
void* callstack;
364371
int frames;
372+
ErrorType type;
365373

366374
explicit ErrorContext(uint32_t seed)
367375
: rand(seed),

test_util/testharness.h

+14-1
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,19 @@ ::testing::AssertionResult AssertStatus(const char* s_expr, const Status& s);
4242
#define EXPECT_OK(s) \
4343
EXPECT_PRED_FORMAT1(ROCKSDB_NAMESPACE::test::AssertStatus, s)
4444
#define EXPECT_NOK(s) EXPECT_FALSE((s).ok())
45-
4645
} // namespace test
46+
47+
// Callback sync point for any read IO errors that should be ignored by
48+
// the fault injection framework
49+
#ifdef NDEBUG
50+
// Disable in release mode
51+
#define IGNORE_STATUS_IF_ERROR(_status_)
52+
#else
53+
#define IGNORE_STATUS_IF_ERROR(_status_) \
54+
{ \
55+
if (!_status_.ok()) { \
56+
TEST_SYNC_POINT("FaultInjectionIgnoreError"); \
57+
} \
58+
}
59+
#endif // NDEBUG
4760
} // namespace ROCKSDB_NAMESPACE

0 commit comments

Comments
 (0)