Skip to content

Commit

Permalink
Use constant (60s) certificate sequencing timeout (MystenLabs#4961)
Browse files Browse the repository at this point in the history
Increase observability and hopefully resilience of consensus adapter:
1. Add more fine grained buckets for the `sequencing_certificate_latency` metric. Also change its unit to sec. Currently the largest bucket is 10 (ms).
2. Add a metric for inflight (non-checkpoint) consensus transactions being sequenced.
3. Reduce capacity of consensus listener from 1M pending txns to 2000, which is calculated from `200 tps * 5 sec consensus latency * 2 (margin) ~ 2000`.
4. Use a uniform 60s certificate sequencing timeout:
    - The existing adaptive control algorithm (for user consensus) can reduce the timeout a bit too low, e.g. delay_ms can drop to 2.5s, so the timeout value becomes 2.5s + 15s ~ 17.5s. Consensus transaction timeouts result in amplified traffic so it is best avoided. Using a higher timeout by default would avoid this scenario.
  • Loading branch information
mwtian authored Oct 4, 2022
1 parent 2602d45 commit 93f796a
Show file tree
Hide file tree
Showing 7 changed files with 102 additions and 107 deletions.
2 changes: 1 addition & 1 deletion crates/sui-config/src/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,7 @@ impl<R: ::rand::RngCore + ::rand::CryptoRng> ConfigBuilder<R> {
let consensus_config = ConsensusConfig {
consensus_address,
consensus_db_path,
delay_step: Some(15_000),
timeout_secs: Some(60),
narwhal_config: Default::default(),
};

Expand Down
4 changes: 3 additions & 1 deletion crates/sui-config/src/node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,9 @@ impl NodeConfig {
pub struct ConsensusConfig {
pub consensus_address: Multiaddr,
pub consensus_db_path: PathBuf,
pub delay_step: Option<u64>,
// Timeout to retry sending transaction to consensus internally.
// Default to 60s.
pub timeout_secs: Option<u64>,

pub narwhal_config: ConsensusParameters,
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ validator_configs:
consensus-config:
consensus-address: ""
consensus-db-path: /tmp/foo/
delay-step: 15000
timeout-secs: 60
narwhal-config:
header_size: 1000
max_header_delay: 100ms
Expand Down Expand Up @@ -59,7 +59,7 @@ validator_configs:
consensus-config:
consensus-address: ""
consensus-db-path: /tmp/foo/
delay-step: 15000
timeout-secs: 60
narwhal-config:
header_size: 1000
max_header_delay: 100ms
Expand Down Expand Up @@ -102,7 +102,7 @@ validator_configs:
consensus-config:
consensus-address: ""
consensus-db-path: /tmp/foo/
delay-step: 15000
timeout-secs: 60
narwhal-config:
header_size: 1000
max_header_delay: 100ms
Expand Down Expand Up @@ -145,7 +145,7 @@ validator_configs:
consensus-config:
consensus-address: ""
consensus-db-path: /tmp/foo/
delay-step: 15000
timeout-secs: 60
narwhal-config:
header_size: 1000
max_header_delay: 100ms
Expand Down Expand Up @@ -188,7 +188,7 @@ validator_configs:
consensus-config:
consensus-address: ""
consensus-db-path: /tmp/foo/
delay-step: 15000
timeout-secs: 60
narwhal-config:
header_size: 1000
max_header_delay: 100ms
Expand Down Expand Up @@ -231,7 +231,7 @@ validator_configs:
consensus-config:
consensus-address: ""
consensus-db-path: /tmp/foo/
delay-step: 15000
timeout-secs: 60
narwhal-config:
header_size: 1000
max_header_delay: 100ms
Expand Down Expand Up @@ -274,7 +274,7 @@ validator_configs:
consensus-config:
consensus-address: ""
consensus-db-path: /tmp/foo/
delay-step: 15000
timeout-secs: 60
narwhal-config:
header_size: 1000
max_header_delay: 100ms
Expand Down
29 changes: 18 additions & 11 deletions crates/sui-core/src/authority_server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@ mod server_tests;
const MIN_BATCH_SIZE: u64 = 1000;
const MAX_DELAY_MILLIS: u64 = 5_000; // 5 sec

// Assuming 200 consensus tps * 5 sec consensus latency = 1000 inflight consensus txns.
// Leaving a bit more headroom to cap the max inflight consensus txns to 1000*2 = 2000.
const MAX_PENDING_CONSENSUS_TRANSACTIONS: u64 = 2000;

pub struct AuthorityServerHandle {
tx_cancellation: tokio::sync::oneshot::Sender<()>,
local_addr: Multiaddr,
Expand Down Expand Up @@ -93,7 +97,7 @@ impl AuthorityServer {
consensus_address,
state.clone_committee(),
tx_consensus_listener,
/* max_delay */ Duration::from_millis(20_000),
Duration::from_secs(20),
metrics,
);

Expand Down Expand Up @@ -286,21 +290,17 @@ impl ValidatorService {

// Spawn a consensus listener. It listen for consensus outputs and notifies the
// authority server when a sequenced transaction is ready for execution.
ConsensusListener::spawn(
rx_sui_to_consensus,
rx_consensus_to_sui,
/* max_pending_transactions */ 1_000_000,
);
ConsensusListener::spawn(rx_sui_to_consensus, rx_consensus_to_sui);

let timeout = Duration::from_secs(consensus_config.timeout_secs.unwrap_or(60));
let ca_metrics = ConsensusAdapterMetrics::new(&prometheus_registry);

let delay_step = consensus_config.delay_step.unwrap_or(15_000);
// The consensus adapter allows the authority to send user certificates through consensus.
let consensus_adapter = ConsensusAdapter::new(
consensus_config.address().to_owned(),
state.clone_committee(),
tx_sui_to_consensus.clone(),
Duration::from_millis(delay_step),
timeout,
ca_metrics.clone(),
);

Expand All @@ -318,7 +318,7 @@ impl ValidatorService {
/* tx_consensus_listener */ tx_sui_to_consensus,
rx_checkpoint_consensus_adapter,
/* checkpoint_locals */ state.checkpoints(),
/* retry_delay */ Duration::from_millis(5_000),
/* retry_delay */ timeout,
/* max_pending_transactions */ 10_000,
ca_metrics,
)
Expand Down Expand Up @@ -352,7 +352,7 @@ impl ValidatorService {
.verify()
.map_err(|e| tonic::Status::invalid_argument(e.to_string()))?;
drop(tx_verif_metrics_guard);
//TODO This is really really bad, we should have different types for signature-verified transactions
// TODO This is really really bad, we should have different types for signature-verified transactions
transaction.is_verified = true;

let tx_digest = transaction.digest();
Expand Down Expand Up @@ -381,6 +381,7 @@ impl ValidatorService {
) -> Result<tonic::Response<TransactionInfoResponse>, tonic::Status> {
let mut certificate = request.into_inner();
let is_consensus_tx = certificate.contains_shared_object();

let _metrics_guard = start_timer(if is_consensus_tx {
metrics.handle_certificate_consensus_latency.clone()
} else {
Expand All @@ -394,7 +395,7 @@ impl ValidatorService {
.verify(&state.committee.load())
.map_err(|e| tonic::Status::invalid_argument(e.to_string()))?;
drop(cert_verif_metrics_guard);
//TODO This is really really bad, we should have different types for signature verified transactions
// TODO This is really really bad, we should have different types for signature verified transactions
certificate.is_verified = true;

// 2) Check idempotency
Expand All @@ -415,6 +416,12 @@ impl ValidatorService {
.await
.map_err(|e| tonic::Status::internal(e.to_string()))?
{
// Note that num_inflight_transactions() only include user submitted transactions, and only user txns can be dropped here.
// This backpressure should not affect system transactions, e.g. for checkpointing.
if consensus_adapter.num_inflight_transactions() > MAX_PENDING_CONSENSUS_TRANSACTIONS {
return Err(tonic::Status::resource_exhausted("Reached {MAX_PENDING_CONSENSUS_TRANSACTIONS} concurrent consensus transactions",
));
}
let _metrics_guard = start_timer(metrics.consensus_latency.clone());
consensus_adapter
.submit(&state.name, &certificate)
Expand Down
Loading

0 comments on commit 93f796a

Please sign in to comment.