Skip to content

Commit

Permalink
[core] More failpoint crash testing (MystenLabs#10445)
Browse files Browse the repository at this point in the history
## Description 

As in title

## Test Plan 

Ran the following with no observed failures:

```
for f in `seq 1 50`; do RUST_LOG=sui=debug,error MSIM_TEST_SEED=9$f cargo simtest test_simulated_load_reconfig_crashes -E '(test(test_simulated_load_reconfig_crashes) and !(test(test_simulated_load_reconfig_crashes_during_epoch_change)))' --no-capture > test_$f 2>&1 &; done
```

---
If your changes are not user-facing and not a breaking change, you can
skip the following section. Otherwise, please indicate what changed, and
then add to the Release Notes section as highlighted during the release
process.

### Type of Change (Check all that apply)

- [ ] user-visible impact
- [ ] breaking change for a client SDKs
- [ ] breaking change for FNs (FN binary must upgrade)
- [ ] breaking change for validators or node operators (must upgrade
binaries)
- [ ] breaking change for on-chain data layout
- [ ] necessitate either a data wipe or data migration

### Release notes
  • Loading branch information
williampsmith authored Apr 6, 2023
1 parent def18a6 commit aed2b46
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 8 deletions.
13 changes: 11 additions & 2 deletions crates/sui-benchmark/tests/simtest.rs
Original file line number Diff line number Diff line change
Expand Up @@ -147,9 +147,18 @@ mod test {
let dead_validator = dead_validator_orig.clone();
let client_node = sui_simulator::current_simnode_id();
register_fail_points(
&["batch-write", "transaction-commit", "put-cf"],
&[
"batch-write-before",
"batch-write-after",
"put-cf-before",
"put-cf-after",
"delete-cf-before",
"delete-cf-after",
"transaction-commit",
"highest-executed-checkpoint",
],
move || {
handle_failpoint(dead_validator.clone(), client_node, 0.01);
handle_failpoint(dead_validator.clone(), client_node, 0.02);
},
);

Expand Down
4 changes: 4 additions & 0 deletions crates/sui-core/src/checkpoints/checkpoint_executor/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ use itertools::izip;
use mysten_metrics::{spawn_monitored_task, MonitoredFutureExt};
use prometheus::Registry;
use sui_config::node::CheckpointExecutorConfig;
use sui_macros::{fail_point, fail_point_async};
use sui_types::message_envelope::Message;
use sui_types::messages::VerifiedExecutableTransaction;
use sui_types::{
Expand Down Expand Up @@ -156,6 +157,7 @@ impl CheckpointExecutor {
pending.is_empty(),
"Pending checkpoint execution buffer should be empty after processing last checkpoint of epoch",
);
fail_point_async!("crash");
return;
}
self.schedule_synced_checkpoints(
Expand Down Expand Up @@ -240,6 +242,8 @@ impl CheckpointExecutor {
info!("Finished syncing and executing checkpoint {}", seq);
}

fail_point!("highest-executed-checkpoint");

self.checkpoint_store
.update_highest_executed_checkpoint(checkpoint)
.unwrap();
Expand Down
22 changes: 16 additions & 6 deletions crates/typed-store/src/rocks/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,11 @@ impl RocksDB {
key: K,
writeopts: &WriteOptions,
) -> Result<(), rocksdb::Error> {
delegate_call!(self.delete_cf_opt(cf, key, writeopts))
fail_point!("delete-cf-before");
let ret = delegate_call!(self.delete_cf_opt(cf, key, writeopts));
fail_point!("delete-cf-after");
#[allow(clippy::let_and_return)]
ret
}

pub fn path(&self) -> &Path {
Expand All @@ -276,8 +280,11 @@ impl RocksDB {
K: AsRef<[u8]>,
V: AsRef<[u8]>,
{
fail_point!("put-cf");
delegate_call!(self.put_cf_opt(cf, key, value, writeopts))
fail_point!("put-cf-before");
let ret = delegate_call!(self.put_cf_opt(cf, key, value, writeopts));
fail_point!("put-cf-after");
#[allow(clippy::let_and_return)]
ret
}

pub fn key_may_exist_cf<K: AsRef<[u8]>>(
Expand All @@ -294,8 +301,8 @@ impl RocksDB {
}

pub fn write(&self, batch: RocksDBBatch) -> Result<(), TypedStoreError> {
fail_point!("batch-write");
match (self, batch) {
fail_point!("batch-write-before");
let ret = match (self, batch) {
(RocksDB::DBWithThreadMode(db), RocksDBBatch::Regular(batch)) => {
db.underlying.write(batch)?;
Ok(())
Expand All @@ -307,7 +314,10 @@ impl RocksDB {
_ => Err(TypedStoreError::RocksDBError(
"using invalid batch type for the database".to_string(),
)),
}
};
fail_point!("batch-write-after");
#[allow(clippy::let_and_return)]
ret
}

pub fn transaction_without_snapshot(
Expand Down

0 comments on commit aed2b46

Please sign in to comment.