Skip to content

Commit

Permalink
[benchmarking] add commands per second metric (MystenLabs#9442)
Browse files Browse the repository at this point in the history
## Description

Add logic to compute the number of commands per successfully executed
transaction and report commands per second (CPS) as part of the final
results. Needed to show how much computation we can do per tx now that
we have programmable txes.
 

Describe the changes or additions included in this PR.

## Test Plan 

`cargo run --package sui-benchmark --bin stress -- --num-client-threads
1 --num-server-threads 6 bench --target-qps 10 --in-flight-ratio 1
--transfer-object 0 --batch-payment 1 --batch-payment-size 15
--run-duration 10s`

```
+-------------+-----+-----+--------+---------------+---------------+---------------+
| duration(s) | tps | cps | error% | latency (min) | latency (p50) | latency (p99) |
+==================================================================================+
| 10          | 11  | 330 | 0      | 105           | 192           | 263           |
+-------------+-----+-----+--------+---------------+---------------+---------------+
```

Note: CPS is 330 because a batch payment to N recipients currently has N
* 2 commands (11 TPS * 30 commands/tx = 330). This is because we encode
paying N parties as `SplitCoin; Pay` N times. @tnowacki would it be
possible to encode paying N parties with N+1 commands by adding a
command (SplitCoinN?) or tweaking the existing encoding of `pay_sui`?

### Type of Change (Check all that apply)

- [ ] user-visible impact
- [ ] breaking change for a client SDKs
- [ ] breaking change for FNs (FN binary must upgrade)
- [ ] breaking change for validators or node operators (must upgrade
binaries)
- [ ] breaking change for on-chain data layout
- [ ] necessitate either a data wipe or data migration

### Release notes
  • Loading branch information
sblackshear authored Mar 17, 2023
1 parent 7ae008d commit 92f7935
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 48 deletions.
84 changes: 47 additions & 37 deletions crates/sui-benchmark/src/drivers/bench_driver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ use crate::ValidatorProxy;
use std::collections::{BTreeMap, VecDeque};
use std::sync::Arc;
use std::time::Duration;
use sui_types::messages::VerifiedTransaction;
use sui_types::messages::{TransactionDataAPI, VerifiedTransaction};
use sysinfo::{CpuExt, System, SystemExt};
use tokio::sync::Barrier;
use tokio::{time, time::Instant};
Expand Down Expand Up @@ -142,8 +142,16 @@ struct Stats {
}

type RetryType = Box<(VerifiedTransaction, Box<dyn Payload>)>;

enum NextOp {
Response(Option<(Duration, Box<dyn Payload>)>),
Response {
/// Time taken to execute the tx and produce effects
latency: Duration,
/// Number of commands in the executed transction
num_commands: u16,
/// The payload updated with the effects of the transaction
payload: Box<dyn Payload>,
},
Retry(RetryType),
}

Expand Down Expand Up @@ -308,8 +316,9 @@ impl Driver<(BenchmarkStats, StressStats)> for BenchDriver {
let runner = tokio::spawn(async move {
cloned_barrier.wait().await;
let start_time = print_and_start_benchmark().await;
let mut num_success = 0;
let mut num_error = 0;
let mut num_success_txes = 0;
let mut num_error_txes = 0;
let mut num_success_cmds = 0;
let mut num_no_gas = 0;
let mut num_in_flight: u64 = 0;
let mut num_submitted = 0;
Expand Down Expand Up @@ -337,17 +346,19 @@ impl Driver<(BenchmarkStats, StressStats)> for BenchDriver {
num_submitted,
bench_stats: BenchmarkStats {
duration: stat_start_time.elapsed(),
num_error,
num_success,
num_error_txes,
num_success_txes,
num_success_cmds,
latency_ms: HistogramWrapper {histogram: latency_histogram.clone()},
},
})
.is_err()
{
debug!("Failed to update stat!");
}
num_success = 0;
num_error = 0;
num_success_txes = 0;
num_error_txes = 0;
num_success_cmds = 0;
num_no_gas = 0;
num_submitted = 0;
stat_start_time = Instant::now();
Expand All @@ -358,7 +369,7 @@ impl Driver<(BenchmarkStats, StressStats)> for BenchDriver {
// If a retry is available send that
// (sending retries here subjects them to our rate limit)
if let Some(mut b) = retry_queue.pop_front() {
num_error += 1;
num_error_txes += 1;
num_submitted += 1;
metrics_cloned.num_submitted.with_label_values(&[&b.1.to_string()]).inc();
let metrics_cloned = metrics_cloned.clone();
Expand Down Expand Up @@ -388,11 +399,9 @@ impl Driver<(BenchmarkStats, StressStats)> for BenchDriver {
if let Some(sig_info) = effects.quorum_sig() {
sig_info.authorities(&committee_cloned).for_each(|name| metrics_cloned.validators_in_effects_cert.with_label_values(&[&name.unwrap().to_string()]).inc())
}
let num_commands = b.0.data().transaction_data().kind().num_commands() as u16;
b.1.make_new_payload(&effects);
NextOp::Response(Some((
latency,
b.1,
)))
NextOp::Response { latency, num_commands, payload: b.1 }
}
Err(err) => {
error!("{}", err);
Expand Down Expand Up @@ -441,10 +450,8 @@ impl Driver<(BenchmarkStats, StressStats)> for BenchDriver {
// auth_sign_info.authorities(&committee_cloned).for_each(|name| metrics_cloned.validators_in_tx_cert.with_label_values(&[&name.unwrap().to_string()]).inc());
if let Some(sig_info) = effects.quorum_sig() { sig_info.authorities(&committee_cloned).for_each(|name| metrics_cloned.validators_in_effects_cert.with_label_values(&[&name.unwrap().to_string()]).inc()) }
payload.make_new_payload(&effects);
NextOp::Response(Some((
latency,
payload,
)))
let num_commands = tx.data().transaction_data().kind().num_commands() as u16;
NextOp::Response { latency, num_commands, payload }
}
Err(err) => {
error!("Retry due to error: {}", err);
Expand All @@ -465,20 +472,17 @@ impl Driver<(BenchmarkStats, StressStats)> for BenchDriver {
break;
}
}
NextOp::Response(Some((latency, new_payload))) => {
num_success += 1;
NextOp::Response { latency, num_commands, payload } => {
num_success_txes += 1;
num_success_cmds += num_commands as u64;
num_in_flight -= 1;
free_pool.push(new_payload);
free_pool.push(payload);
latency_histogram.saturating_record(latency.as_millis().try_into().unwrap());
BenchDriver::update_progress(*start_time, run_duration, progress_cloned.clone());
if progress_cloned.is_finished() {
break;
}
}
NextOp::Response(None) => {
// num_in_flight -= 1;
unreachable!();
}
}
}
}
Expand All @@ -492,8 +496,9 @@ impl Driver<(BenchmarkStats, StressStats)> for BenchDriver {
num_submitted,
bench_stats: BenchmarkStats {
duration: stat_start_time.elapsed(),
num_error,
num_success,
num_error_txes,
num_success_txes,
num_success_cmds,
latency_ms: HistogramWrapper {
histogram: latency_histogram,
},
Expand All @@ -510,8 +515,9 @@ impl Driver<(BenchmarkStats, StressStats)> for BenchDriver {
let benchmark_stat_task = tokio::spawn(async move {
let mut benchmark_stat = BenchmarkStats {
duration: Duration::ZERO,
num_error: 0,
num_success: 0,
num_error_txes: 0,
num_success_txes: 0,
num_success_cmds: 0,
latency_ms: HistogramWrapper {
histogram: hdrhistogram::Histogram::<u64>::new_with_max(120_000, 3).unwrap(),
},
Expand All @@ -533,35 +539,39 @@ impl Driver<(BenchmarkStats, StressStats)> for BenchDriver {
benchmark_stat.update(start.elapsed(), &sample_stat.bench_stats);
stat_collection.insert(id, sample_stat);
let mut total_qps: f32 = 0.0;
let mut num_success: u64 = 0;
let mut num_error: u64 = 0;
let mut total_cps: f32 = 0.0;
let mut num_success_txes: u64 = 0;
let mut num_error_txes: u64 = 0;
let mut num_success_cmds = 0;
let mut latency_histogram =
hdrhistogram::Histogram::<u64>::new_with_max(120_000, 3).unwrap();

let mut num_in_flight: u64 = 0;
let mut num_submitted: u64 = 0;
let mut num_no_gas = 0;
for (_, v) in stat_collection.iter() {
total_qps +=
v.bench_stats.num_success as f32 / v.bench_stats.duration.as_secs() as f32;
num_success += v.bench_stats.num_success;
num_error += v.bench_stats.num_error;
let duration = v.bench_stats.duration.as_secs() as f32;
total_qps += v.bench_stats.num_success_txes as f32 / duration;
total_cps += v.bench_stats.num_success_cmds as f32 / duration;
num_success_txes += v.bench_stats.num_success_txes;
num_error_txes += v.bench_stats.num_error_txes;
num_success_cmds += v.bench_stats.num_success_cmds;
num_no_gas += v.num_no_gas;
num_submitted += v.num_submitted;
num_in_flight += v.num_in_flight;
latency_histogram
.add(&v.bench_stats.latency_ms.histogram)
.unwrap();
}
let denom = num_success + num_error;
let denom = num_success_txes + num_error_txes;
let _error_rate = if denom > 0 {
num_error as f32 / denom as f32
num_error_txes as f32 / denom as f32
} else {
0.0
};
counter += 1;
if counter % num_workers == 0 {
stat = format!("Throughput = {}, latency_ms(min/p50/p99/max) = {}/{}/{}/{}, num_success = {}, num_error = {}, no_gas = {}, submitted = {}, in_flight = {}", total_qps, latency_histogram.min(), latency_histogram.value_at_quantile(0.5), latency_histogram.value_at_quantile(0.99), latency_histogram.max(), num_success, num_error, num_no_gas, num_submitted, num_in_flight);
stat = format!("TPS = {}, CPS = {}, latency_ms(min/p50/p99/max) = {}/{}/{}/{}, num_success_tx = {}, num_error_tx = {}, num_success_cmds = {}, no_gas = {}, submitted = {}, in_flight = {}", total_qps, total_cps, latency_histogram.min(), latency_histogram.value_at_quantile(0.5), latency_histogram.value_at_quantile(0.99), latency_histogram.max(), num_success_txes, num_error_txes, num_success_cmds, num_no_gas, num_submitted, num_in_flight);
if show_progress {
eprintln!("{}", stat);
}
Expand Down
30 changes: 20 additions & 10 deletions crates/sui-benchmark/src/drivers/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -98,16 +98,21 @@ impl StressStats {
#[derive(serde::Serialize, serde::Deserialize, Debug)]
pub struct BenchmarkStats {
pub duration: Duration,
pub num_error: u64,
pub num_success: u64,
/// Number of transactions that ended in an error
pub num_error_txes: u64,
/// Number of transactions that were executed successfully
pub num_success_txes: u64,
/// Total number of commands in transactions that executed successfully
pub num_success_cmds: u64,
pub latency_ms: HistogramWrapper,
}

impl BenchmarkStats {
pub fn update(&mut self, duration: Duration, sample_stat: &BenchmarkStats) {
self.duration = duration;
self.num_error += sample_stat.num_error;
self.num_success += sample_stat.num_success;
self.num_error_txes += sample_stat.num_error_txes;
self.num_success_txes += sample_stat.num_success_txes;
self.num_success_cmds += sample_stat.num_success_cmds;
self.latency_ms
.histogram
.add(&sample_stat.latency_ms.histogram)
Expand All @@ -121,16 +126,19 @@ impl BenchmarkStats {
.set_header(vec![
"duration(s)",
"tps",
"cps",
"error%",
"latency (min)",
"latency (p50)",
"latency (p99)",
]);
let mut row = Row::new();
row.add_cell(Cell::new(self.duration.as_secs()));
row.add_cell(Cell::new(self.num_success / self.duration.as_secs()));
row.add_cell(Cell::new(self.num_success_txes / self.duration.as_secs()));
row.add_cell(Cell::new(self.num_success_cmds / self.duration.as_secs()));
row.add_cell(Cell::new(
(100 * self.num_error) as f32 / (self.num_error + self.num_success) as f32,
(100 * self.num_error_txes) as f32
/ (self.num_error_txes + self.num_success_txes) as f32,
));
row.add_cell(Cell::new(self.latency_ms.histogram.min()));
row.add_cell(Cell::new(self.latency_ms.histogram.value_at_quantile(0.5)));
Expand Down Expand Up @@ -201,8 +209,8 @@ impl BenchmarkCmp<'_> {
]
}
pub fn cmp_tps(&self) -> Comparison {
let old_tps = self.old.num_success / self.old.duration.as_secs();
let new_tps = self.new.num_success / self.new.duration.as_secs();
let old_tps = self.old.num_success_txes / self.old.duration.as_secs();
let new_tps = self.new.num_success_txes / self.new.duration.as_secs();
let diff = new_tps as i64 - old_tps as i64;
let diff_ratio = diff as f64 / old_tps as f64;
let speedup = 1.0 + diff_ratio;
Expand All @@ -216,8 +224,10 @@ impl BenchmarkCmp<'_> {
}
}
pub fn cmp_error_rate(&self) -> Comparison {
let old_error_rate = self.old.num_error / (self.old.num_error + self.old.num_success);
let new_error_rate = self.new.num_error / (self.new.num_error + self.new.num_success);
let old_error_rate =
self.old.num_error_txes / (self.old.num_error_txes + self.old.num_success_txes);
let new_error_rate =
self.new.num_error_txes / (self.new.num_error_txes + self.new.num_success_txes);
let diff = new_error_rate as i64 - old_error_rate as i64;
let diff_ratio = diff as f64 / old_error_rate as f64;
let speedup = 1.0 / (1.0 + diff_ratio);
Expand Down
2 changes: 1 addition & 1 deletion crates/sui-benchmark/tests/simtest.rs
Original file line number Diff line number Diff line change
Expand Up @@ -300,7 +300,7 @@ mod test {
.unwrap();

// TODO: make this stricter (== 0) when we have reliable error retrying on the client.
assert!(benchmark_stats.num_error < 30);
assert!(benchmark_stats.num_error_txes < 30);

tracing::info!("end of test {:?}", benchmark_stats);
}
Expand Down

0 comments on commit 92f7935

Please sign in to comment.