Skip to content

Commit

Permalink
Add a passive reconfiguration test (MystenLabs#7045)
Browse files Browse the repository at this point in the history
The test is currently flaky (need to hunt down the non-determinism), and
so is ignored. To run:

```
cargo simtest  --test reconfiguration_tests test_passive_reconfig --run-ignored all
```
  • Loading branch information
mystenmark authored Dec 30, 2022
1 parent b52e0d6 commit 5ad8e2a
Show file tree
Hide file tree
Showing 13 changed files with 92 additions and 21 deletions.
4 changes: 2 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 9 additions & 1 deletion crates/sui-config/src/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ pub struct ConfigBuilder<R = OsRng> {
initial_accounts_config: Option<GenesisConfig>,
with_swarm: bool,
validator_ip_sel: ValidatorIpSelection,
checkpoints_per_epoch: Option<u64>,
}

impl ConfigBuilder {
Expand All @@ -60,6 +61,7 @@ impl ConfigBuilder {
} else {
ValidatorIpSelection::Localhost
},
checkpoints_per_epoch: default_checkpoints_per_epoch(),
}
}
}
Expand Down Expand Up @@ -95,6 +97,11 @@ impl<R> ConfigBuilder<R> {
self
}

pub fn with_checkpoints_per_epoch(mut self, ckpts: u64) -> Self {
self.checkpoints_per_epoch = Some(ckpts);
self
}

pub fn rng<N: rand::RngCore + rand::CryptoRng>(self, rng: N) -> ConfigBuilder<N> {
ConfigBuilder {
rng: Some(rng),
Expand All @@ -104,6 +111,7 @@ impl<R> ConfigBuilder<R> {
initial_accounts_config: self.initial_accounts_config,
with_swarm: self.with_swarm,
validator_ip_sel: self.validator_ip_sel,
checkpoints_per_epoch: self.checkpoints_per_epoch,
}
}
}
Expand Down Expand Up @@ -315,7 +323,7 @@ impl<R: rand::RngCore + rand::CryptoRng> ConfigBuilder<R> {
consensus_config: Some(consensus_config),
enable_event_processing: false,
enable_checkpoint: false,
checkpoints_per_epoch: default_checkpoints_per_epoch(),
checkpoints_per_epoch: self.checkpoints_per_epoch,
genesis: crate::node::Genesis::new(genesis.clone()),
grpc_load_shed: initial_accounts_config.grpc_load_shed,
grpc_concurrency_limit: initial_accounts_config.grpc_concurrency_limit,
Expand Down
2 changes: 1 addition & 1 deletion crates/sui-core/src/checkpoints/checkpoint_executor/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,7 @@ impl CheckpointExecutorEventLoop {
checkpoint: VerifiedCheckpoint,
pending: &mut CheckpointExecutionBuffer,
) -> SuiResult {
info!(
debug!(
"Scheduling checkpoint {:?} for execution",
checkpoint.sequence_number()
);
Expand Down
5 changes: 3 additions & 2 deletions crates/sui-core/src/checkpoints/checkpoint_output.rs
Original file line number Diff line number Diff line change
Expand Up @@ -91,10 +91,11 @@ impl CheckpointOutput for LogCheckpointOutput {
summary.sequence_number, contents
);
info!(
"Creating checkpoint {:?} at sequence {}, previous digest {:?}, transactions count {}, content digest {:?}",
"Creating checkpoint {:?} at epoch {}, sequence {}, previous digest {:?}, transactions count {}, content digest {:?}",
Hex::encode(summary.digest()),
summary.epoch,
summary.sequence_number,
summary.previous_digest,
summary.previous_digest.map(Hex::encode),
contents.size(),
Hex::encode(summary.content_digest),
);
Expand Down
13 changes: 8 additions & 5 deletions crates/sui-core/src/checkpoints/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ use sui_types::messages_checkpoint::{
CheckpointSequenceNumber, CheckpointSignatureMessage, CheckpointSummary, VerifiedCheckpoint,
};
use tokio::sync::{mpsc, watch, Notify};
use tracing::{debug, error, info, warn};
use tracing::{debug, error, warn};
use typed_store::rocks::{DBMap, TypedStoreError};
use typed_store::traits::{TableSummary, TypedStoreDebug};
use typed_store::Map;
Expand Down Expand Up @@ -415,9 +415,12 @@ impl CheckpointBuilder {
)
.await?;
}
if effects.is_empty() {
return Ok(None);
}

// Note: empty checkpoints are ok - they shouldn't happen at all on a network with even
// modest load. Even if they do happen, it is still useful as it allows fullnodes to
// distinguish between "no transactions have happened" and "i am not receiving new
// checkpoints".

let contents = CheckpointContents::new_with_causally_ordered_transactions(
effects.iter().map(TransactionEffects::execution_digests),
);
Expand Down Expand Up @@ -830,7 +833,7 @@ impl CheckpointServiceNotify for CheckpointService {
return Ok(());
}
}
info!(
debug!(
"Received signature for checkpoint sequence {}, digest {} from {}",
sequence,
Hex::encode(info.summary.summary.digest()),
Expand Down
2 changes: 1 addition & 1 deletion crates/sui-macros/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,4 @@ syn = "1.0.104"
workspace-hack.workspace = true

[target.'cfg(msim)'.dependencies]
msim-macros = { git = "https://github.com/MystenLabs/mysten-sim.git", rev = "3aeccfeb0f021ad5801c1ab4dcf4004de7780847", package = "msim-macros" }
msim-macros = { git = "https://github.com/MystenLabs/mysten-sim.git", rev = "465f7d6c351e83003da9c9b77e8e3e352be7621e", package = "msim-macros" }
2 changes: 1 addition & 1 deletion crates/sui-simulator/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,4 @@ fastcrypto = { workspace = true, features = ["copy_key"] }
telemetry-subscribers.workspace = true

[target.'cfg(msim)'.dependencies]
msim = { git = "https://github.com/MystenLabs/mysten-sim.git", rev = "3aeccfeb0f021ad5801c1ab4dcf4004de7780847", package = "msim" }
msim = { git = "https://github.com/MystenLabs/mysten-sim.git", rev = "465f7d6c351e83003da9c9b77e8e3e352be7621e", package = "msim" }
12 changes: 12 additions & 0 deletions crates/sui-swarm/src/memory/swarm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ pub struct SwarmBuilder<R = OsRng> {
fullnode_count: usize,
fullnode_rpc_addr: Option<SocketAddr>,
with_event_store: bool,
checkpoints_per_epoch: Option<u64>,
}

impl SwarmBuilder {
Expand All @@ -40,6 +41,7 @@ impl SwarmBuilder {
fullnode_count: 0,
fullnode_rpc_addr: None,
with_event_store: false,
checkpoints_per_epoch: None,
}
}
}
Expand All @@ -54,6 +56,7 @@ impl<R> SwarmBuilder<R> {
fullnode_count: self.fullnode_count,
fullnode_rpc_addr: self.fullnode_rpc_addr,
with_event_store: false,
checkpoints_per_epoch: None,
}
}

Expand Down Expand Up @@ -94,6 +97,11 @@ impl<R> SwarmBuilder<R> {
self.fullnode_rpc_addr = Some(fullnode_rpc_addr);
self
}

pub fn with_checkpoints_per_epoch(mut self, ckpts: u64) -> Self {
self.checkpoints_per_epoch = Some(ckpts);
self
}
}

impl<R: rand::RngCore + rand::CryptoRng> SwarmBuilder<R> {
Expand All @@ -111,6 +119,10 @@ impl<R: rand::RngCore + rand::CryptoRng> SwarmBuilder<R> {
config_builder = config_builder.initial_accounts_config(initial_accounts_config);
}

if let Some(checkpoints_per_epoch) = self.checkpoints_per_epoch {
config_builder = config_builder.with_checkpoints_per_epoch(checkpoints_per_epoch);
}

let network_config = config_builder
.committee(self.committee)
.with_swarm()
Expand Down
4 changes: 4 additions & 0 deletions crates/sui/tests/full_node_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,10 @@ async fn test_full_node_follows_txes() -> Result<(), anyhow::Error> {

let context = &mut test_cluster.wallet;

// TODO: test fails on CI due to flakiness without this. Once https://github.com/MystenLabs/sui/pull/7056 is
// merged we should be able to root out the flakiness.
sleep(Duration::from_millis(10)).await;

let (transferred_object, _, receiver, digest, _, _) = transfer_coin(context).await?;

wait_for_tx(digest, node.state().clone()).await;
Expand Down
36 changes: 32 additions & 4 deletions crates/sui/tests/reconfiguration_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,17 @@ use sui_core::authority_aggregator::{
};
use sui_core::authority_client::AuthorityAPI;
use sui_core::test_utils::init_local_authorities;
use sui_macros::sim_test;
use sui_types::error::SuiError;
use sui_types::gas::GasCostSummary;
use sui_types::messages::VerifiedTransaction;
use test_utils::authority::{spawn_test_authorities, test_authority_configs};
use test_utils::{
authority::{spawn_test_authorities, test_authority_configs},
network::TestClusterBuilder,
};
use tokio::time::sleep;

#[tokio::test]
#[sim_test]
async fn local_advance_epoch_tx_test() {
// This test checks the following functionalities related to advance epoch transaction:
// 1. The create_advance_epoch_tx_cert API in AuthorityState can properly sign an advance
Expand All @@ -43,7 +48,7 @@ async fn local_advance_epoch_tx_test() {
advance_epoch_tx_test_impl(states, &certifier).await;
}

#[tokio::test]
#[sim_test]
async fn network_advance_epoch_tx_test() {
// Same as local_advance_epoch_tx_test, but uses network clients.
let authorities = spawn_test_authorities([].into_iter(), &test_authority_configs()).await;
Expand Down Expand Up @@ -95,9 +100,13 @@ async fn advance_epoch_tx_test_impl(
}
}

#[tokio::test]
#[sim_test]
async fn basic_reconfig_end_to_end_test() {
let authorities = spawn_test_authorities([].into_iter(), &test_authority_configs()).await;

// TODO: If this line is removed, the validators never advance to the next epoch
tokio::time::sleep(Duration::from_secs(1)).await;

// Close epoch on 3 (2f+1) validators.
for handle in authorities.iter().skip(1) {
handle.with(|node| node.close_epoch().unwrap());
Expand All @@ -118,6 +127,25 @@ async fn basic_reconfig_end_to_end_test() {
.collect();
join_all(handles).await;
}

// This test just starts up a cluster that reconfigures itself under 0 load.
#[sim_test]
#[ignore] // test is flaky right now
async fn test_passive_reconfig() {
let _test_cluster = TestClusterBuilder::new()
.with_checkpoints_per_epoch(10)
.build()
.await
.unwrap();

let duration_secs: u64 = std::env::var("RECONFIG_TEST_DURATION")
.ok()
.map(|v| v.parse().unwrap())
.unwrap_or(30);

sleep(Duration::from_secs(duration_secs)).await;
}

/*
use futures::future::join_all;
Expand Down
11 changes: 11 additions & 0 deletions crates/test-utils/src/network.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ pub struct TestClusterBuilder {
num_validators: Option<usize>,
fullnode_rpc_port: Option<u16>,
enable_fullnode_events: bool,
checkpoints_per_epoch: Option<u64>,
}

impl TestClusterBuilder {
Expand All @@ -117,6 +118,7 @@ impl TestClusterBuilder {
fullnode_rpc_port: None,
num_validators: None,
enable_fullnode_events: false,
checkpoints_per_epoch: None,
}
}

Expand All @@ -140,6 +142,11 @@ impl TestClusterBuilder {
self
}

pub fn with_checkpoints_per_epoch(mut self, ckpts: u64) -> Self {
self.checkpoints_per_epoch = Some(ckpts);
self
}

pub async fn build(self) -> anyhow::Result<TestCluster> {
let cluster = self.start_test_network_with_customized_ports().await?;
Ok(cluster)
Expand Down Expand Up @@ -198,6 +205,10 @@ impl TestClusterBuilder {
builder = builder.initial_accounts_config(genesis_config);
}

if let Some(checkpoints_per_epoch) = self.checkpoints_per_epoch {
builder = builder.with_checkpoints_per_epoch(checkpoints_per_epoch);
}

let mut swarm = builder.build();
swarm.launch().await?;

Expand Down
8 changes: 6 additions & 2 deletions narwhal/primary/src/core.rs
Original file line number Diff line number Diff line change
Expand Up @@ -420,11 +420,15 @@ impl Core {
let mut join_all = futures::future::try_join_all(tasks);
loop {
tokio::select! {
_result = &mut join_all => {
_ = &mut join_all => {
// Reliable broadcast will not return errors.
return Ok(())
},
_result = rx_narwhal_round_updates.changed() => {
result = rx_narwhal_round_updates.changed() => {
if result.is_err() {
// this happens during reconfig when the other side hangs up.
return Ok(());
}
narwhal_round = *rx_narwhal_round_updates.borrow();
if narwhal_round > certificate_round {
// Round has advanced. No longer need to broadcast this cert to
Expand Down
4 changes: 2 additions & 2 deletions scripts/simtest/cargo-simtest
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,9 @@ if [ -n "$LOCAL_MSIM_PATH" ]; then
else
cargo_patch_args=(
--config 'patch.crates-io.tokio.git = "https://github.com/MystenLabs/mysten-sim.git"'
--config 'patch.crates-io.tokio.rev = "3aeccfeb0f021ad5801c1ab4dcf4004de7780847"'
--config 'patch.crates-io.tokio.rev = "465f7d6c351e83003da9c9b77e8e3e352be7621e"'
--config 'patch.crates-io.futures-timer.git = "https://github.com/MystenLabs/mysten-sim.git"'
--config 'patch.crates-io.futures-timer.rev = "3aeccfeb0f021ad5801c1ab4dcf4004de7780847"'
--config 'patch.crates-io.futures-timer.rev = "465f7d6c351e83003da9c9b77e8e3e352be7621e"'
)
fi

Expand Down

0 comments on commit 5ad8e2a

Please sign in to comment.