Skip to content

Commit

Permalink
Add fully-reproducible online tracer for banking (solana-labs#29196)
Browse files Browse the repository at this point in the history
* Add fully-reproducible online tracer for banking

* Don't use eprintln!()...

* Update programs/sbf/Cargo.lock...

* Remove meaningless assert_eq

* Group test-only code under aptly named mod

* Remove needless overflow handling in receive_until

* Delay stat aggregation as it's possible now

* Use Cow to avoid needless heap allocs

* Properly consume metrics action as soon as hold

* Trace UnprocessedTransactionStorage::len() instead

* Loosen joining api over type safety for replaystage

* Introce hash event to override these when simulating

* Use serde_with/serde_as instead of hacky workaround

* Update another Cargo.lock...

* Add detailed comment for Packet::buffer serialize

* Rename sender_overhead_minimized_receiver_loop()

* Use type interference for TraceError

* Another minor rename

* Retire now useless ForEach to simplify code

* Use type alias as much as possible

* Properly translate and propagate tracing errors

* Clarify --enable-banking-trace with better naming

* Consider unclean (signal-based) node restarts..

* Tweak logging and cli

* Remove Bank events as it's not needed anymore

* Make tpu own banking tracer thread

* Reduce diff a bit..

* Use latest serde_with

* Finally use the published rolling-file crate

* Make test code change more consistent

* Revive dead and non-terminating test code path...

* Dispose batches early now that possible

* Split off thread handle very early at ::new()

* Tweak message for TooSmallDirByteLimitl

* Remove too much of indirection

* Remove needless pub from ::channel()

* Clarify test comments

* Avoid needless event creation if tracer is disabled

* Write tests around file rotation and spill-over

* Remove unneeded PathBuf::clone()s...

* Introduce inner struct instead of tuple...

* Remove unused enum BankStatus...

* Avoid .unwrap() for the case of disabled tracer...
  • Loading branch information
ryoqun authored Jan 25, 2023
1 parent 7d28696 commit 40bbf99
Show file tree
Hide file tree
Showing 26 changed files with 1,276 additions and 148 deletions.
74 changes: 74 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

30 changes: 25 additions & 5 deletions banking-bench/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@ use {
rand::{thread_rng, Rng},
rayon::prelude::*,
solana_client::connection_cache::ConnectionCache,
solana_core::banking_stage::BankingStage,
solana_core::{
banking_stage::BankingStage,
banking_trace::{BankingPacketBatch, BankingTracer, BANKING_TRACE_DIR_DEFAULT_BYTE_LIMIT},
},
solana_gossip::cluster_info::{ClusterInfo, Node},
solana_ledger::{
blockstore::Blockstore,
Expand Down Expand Up @@ -255,6 +258,12 @@ fn main() {
.takes_value(false)
.help("Skip transaction sanity execution"),
)
.arg(
Arg::new("trace_banking")
.long("trace-banking")
.takes_value(false)
.help("Enable banking tracing"),
)
.arg(
Arg::new("write_lock_contention")
.long("write-lock-contention")
Expand Down Expand Up @@ -407,9 +416,17 @@ fn main() {
let leader_schedule_cache = Arc::new(LeaderScheduleCache::new_from_bank(&bank));
let (exit, poh_recorder, poh_service, signal_receiver) =
create_test_recorder(&bank, &blockstore, None, Some(leader_schedule_cache));
let (non_vote_sender, non_vote_receiver) = unbounded();
let (tpu_vote_sender, tpu_vote_receiver) = unbounded();
let (gossip_vote_sender, gossip_vote_receiver) = unbounded();
let (banking_tracer, tracer_thread) =
BankingTracer::new(matches.is_present("trace_banking").then_some((
&blockstore.banking_trace_path(),
exit.clone(),
BANKING_TRACE_DIR_DEFAULT_BYTE_LIMIT,
)))
.unwrap();
let (non_vote_sender, non_vote_receiver) = banking_tracer.create_channel_non_vote();
let (tpu_vote_sender, tpu_vote_receiver) = banking_tracer.create_channel_tpu_vote();
let (gossip_vote_sender, gossip_vote_receiver) =
banking_tracer.create_channel_gossip_vote();
let cluster_info = {
let keypair = Arc::new(Keypair::new());
let node = Node::new_localhost_with_pubkey(&keypair.pubkey());
Expand Down Expand Up @@ -462,7 +479,7 @@ fn main() {
timestamp(),
);
non_vote_sender
.send((vec![packet_batch.clone()], None))
.send(BankingPacketBatch::new((vec![packet_batch.clone()], None)))
.unwrap();
}

Expand Down Expand Up @@ -583,6 +600,9 @@ fn main() {
poh_service.join().unwrap();
sleep(Duration::from_secs(1));
debug!("waited for poh_service");
if let Some(tracer_thread) = tracer_thread {
tracer_thread.join().unwrap().unwrap();
}
}
let _unused = Blockstore::destroy(&ledger_path);
}
1 change: 1 addition & 0 deletions core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ num_enum = "0.5.7"
rand = "0.7.0"
rand_chacha = "0.2.2"
rayon = "1.5.3"
rolling-file = "0.2.0"
serde = "1.0.144"
serde_derive = "1.0.103"
solana-address-lookup-table-program = { path = "../programs/address-lookup-table", version = "=1.15.0" }
Expand Down
22 changes: 16 additions & 6 deletions core/benches/banking_stage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ use {
solana_client::connection_cache::ConnectionCache,
solana_core::{
banking_stage::{BankingStage, BankingStageStats},
banking_trace::{BankingPacketBatch, BankingTracer},
leader_slot_banking_stage_metrics::LeaderSlotMetricsTracker,
qos_service::QosService,
unprocessed_packet_batches::*,
Expand Down Expand Up @@ -197,9 +198,10 @@ fn bench_banking(bencher: &mut Bencher, tx_type: TransactionType) {
// during the benchmark
genesis_config.ticks_per_slot = 10_000;

let (non_vote_sender, non_vote_receiver) = unbounded();
let (tpu_vote_sender, tpu_vote_receiver) = unbounded();
let (gossip_vote_sender, gossip_vote_receiver) = unbounded();
let banking_tracer = BankingTracer::new_disabled();
let (non_vote_sender, non_vote_receiver) = banking_tracer.create_channel_non_vote();
let (tpu_vote_sender, tpu_vote_receiver) = banking_tracer.create_channel_tpu_vote();
let (gossip_vote_sender, gossip_vote_receiver) = banking_tracer.create_channel_gossip_vote();

let mut bank = Bank::new_for_benches(&genesis_config);
// Allow arbitrary transaction processing time for the purposes of this bench
Expand Down Expand Up @@ -304,10 +306,16 @@ fn bench_banking(bencher: &mut Bencher, tx_type: TransactionType) {
let mut sent = 0;
if let Some(vote_packets) = &vote_packets {
tpu_vote_sender
.send((vote_packets[start..start + chunk_len].to_vec(), None))
.send(BankingPacketBatch::new((
vote_packets[start..start + chunk_len].to_vec(),
None,
)))
.unwrap();
gossip_vote_sender
.send((vote_packets[start..start + chunk_len].to_vec(), None))
.send(BankingPacketBatch::new((
vote_packets[start..start + chunk_len].to_vec(),
None,
)))
.unwrap();
}
for v in verified[start..start + chunk_len].chunks(chunk_len / num_threads) {
Expand All @@ -321,7 +329,9 @@ fn bench_banking(bencher: &mut Bencher, tx_type: TransactionType) {
for xv in v {
sent += xv.len();
}
non_vote_sender.send((v.to_vec(), None)).unwrap();
non_vote_sender
.send(BankingPacketBatch::new((v.to_vec(), None)))
.unwrap();
}

check_txs(&signal_receiver2, txes / CHUNKS);
Expand Down
Loading

0 comments on commit 40bbf99

Please sign in to comment.