forked from solana-labs/solana
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbootstrap.rs
1704 lines (1568 loc) · 64.3 KB
/
bootstrap.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
use {
log::*,
rand::{seq::SliceRandom, thread_rng, Rng},
solana_client::rpc_client::RpcClient,
solana_core::validator::{ValidatorConfig, ValidatorStartProgress},
solana_download_utils::{download_snapshot_archive, DownloadProgressRecord},
solana_genesis_utils::download_then_check_genesis_hash,
solana_gossip::{
cluster_info::{ClusterInfo, Node},
contact_info::ContactInfo,
crds_value,
gossip_service::GossipService,
},
solana_runtime::{
snapshot_archive_info::SnapshotArchiveInfoGetter,
snapshot_package::SnapshotType,
snapshot_utils::{
self, DEFAULT_MAX_FULL_SNAPSHOT_ARCHIVES_TO_RETAIN,
DEFAULT_MAX_INCREMENTAL_SNAPSHOT_ARCHIVES_TO_RETAIN,
},
},
solana_sdk::{
clock::Slot,
commitment_config::CommitmentConfig,
hash::Hash,
pubkey::Pubkey,
signature::{Keypair, Signer},
},
solana_streamer::socket::SocketAddrSpace,
std::{
collections::{HashMap, HashSet},
net::{SocketAddr, TcpListener, UdpSocket},
path::Path,
process::exit,
sync::{
atomic::{AtomicBool, Ordering},
Arc, RwLock,
},
thread::sleep,
time::{Duration, Instant},
},
};
#[derive(Debug)]
pub struct RpcBootstrapConfig {
pub no_genesis_fetch: bool,
pub no_snapshot_fetch: bool,
pub only_known_rpc: bool,
pub max_genesis_archive_unpacked_size: u64,
pub check_vote_account: Option<String>,
pub incremental_snapshot_fetch: bool,
}
fn verify_reachable_ports(
node: &Node,
cluster_entrypoint: &ContactInfo,
validator_config: &ValidatorConfig,
socket_addr_space: &SocketAddrSpace,
) -> bool {
let mut udp_sockets = vec![&node.sockets.gossip, &node.sockets.repair];
if ContactInfo::is_valid_address(&node.info.serve_repair, socket_addr_space) {
udp_sockets.push(&node.sockets.serve_repair);
}
if ContactInfo::is_valid_address(&node.info.tpu, socket_addr_space) {
udp_sockets.extend(node.sockets.tpu.iter());
udp_sockets.push(&node.sockets.tpu_quic);
}
if ContactInfo::is_valid_address(&node.info.tpu_forwards, socket_addr_space) {
udp_sockets.extend(node.sockets.tpu_forwards.iter());
udp_sockets.push(&node.sockets.tpu_forwards_quic);
}
if ContactInfo::is_valid_address(&node.info.tpu_vote, socket_addr_space) {
udp_sockets.extend(node.sockets.tpu_vote.iter());
}
if ContactInfo::is_valid_address(&node.info.tvu, socket_addr_space) {
udp_sockets.extend(node.sockets.tvu.iter());
udp_sockets.extend(node.sockets.broadcast.iter());
udp_sockets.extend(node.sockets.retransmit_sockets.iter());
}
if ContactInfo::is_valid_address(&node.info.tvu_forwards, socket_addr_space) {
udp_sockets.extend(node.sockets.tvu_forwards.iter());
}
let mut tcp_listeners = vec![];
if let Some((rpc_addr, rpc_pubsub_addr)) = validator_config.rpc_addrs {
for (purpose, bind_addr, public_addr) in &[
("RPC", rpc_addr, &node.info.rpc),
("RPC pubsub", rpc_pubsub_addr, &node.info.rpc_pubsub),
] {
if ContactInfo::is_valid_address(public_addr, socket_addr_space) {
tcp_listeners.push((
bind_addr.port(),
TcpListener::bind(bind_addr).unwrap_or_else(|err| {
error!(
"Unable to bind to tcp {:?} for {}: {}",
bind_addr, purpose, err
);
exit(1);
}),
));
}
}
}
if let Some(ip_echo) = &node.sockets.ip_echo {
let ip_echo = ip_echo.try_clone().expect("unable to clone tcp_listener");
tcp_listeners.push((ip_echo.local_addr().unwrap().port(), ip_echo));
}
solana_net_utils::verify_reachable_ports(
&cluster_entrypoint.gossip,
tcp_listeners,
&udp_sockets,
)
}
fn is_known_validator(id: &Pubkey, known_validators: &Option<HashSet<Pubkey>>) -> bool {
if let Some(known_validators) = known_validators {
known_validators.contains(id)
} else {
false
}
}
fn start_gossip_node(
identity_keypair: Arc<Keypair>,
cluster_entrypoints: &[ContactInfo],
ledger_path: &Path,
gossip_addr: &SocketAddr,
gossip_socket: UdpSocket,
expected_shred_version: Option<u16>,
gossip_validators: Option<HashSet<Pubkey>>,
should_check_duplicate_instance: bool,
socket_addr_space: SocketAddrSpace,
) -> (Arc<ClusterInfo>, Arc<AtomicBool>, GossipService) {
let contact_info = ClusterInfo::gossip_contact_info(
identity_keypair.pubkey(),
*gossip_addr,
expected_shred_version.unwrap_or(0),
);
let mut cluster_info = ClusterInfo::new(contact_info, identity_keypair, socket_addr_space);
cluster_info.set_entrypoints(cluster_entrypoints.to_vec());
cluster_info.restore_contact_info(ledger_path, 0);
let cluster_info = Arc::new(cluster_info);
let gossip_exit_flag = Arc::new(AtomicBool::new(false));
let gossip_service = GossipService::new(
&cluster_info,
None,
gossip_socket,
gossip_validators,
should_check_duplicate_instance,
None,
&gossip_exit_flag,
);
(cluster_info, gossip_exit_flag, gossip_service)
}
fn get_rpc_peers(
cluster_info: &ClusterInfo,
cluster_entrypoints: &[ContactInfo],
validator_config: &ValidatorConfig,
blacklisted_rpc_nodes: &mut HashSet<Pubkey>,
blacklist_timeout: &Instant,
retry_reason: &mut Option<String>,
bootstrap_config: &RpcBootstrapConfig,
) -> Option<Vec<ContactInfo>> {
let shred_version = validator_config
.expected_shred_version
.unwrap_or_else(|| cluster_info.my_shred_version());
if shred_version == 0 {
let all_zero_shred_versions = cluster_entrypoints.iter().all(|cluster_entrypoint| {
cluster_info
.lookup_contact_info_by_gossip_addr(&cluster_entrypoint.gossip)
.map_or(false, |entrypoint| entrypoint.shred_version == 0)
});
if all_zero_shred_versions {
eprintln!("Entrypoint shred version is zero. Restart with --expected-shred-version");
exit(1);
}
info!("Waiting to adopt entrypoint shred version...");
return None;
}
info!(
"Searching for an RPC service with shred version {}{}...",
shred_version,
retry_reason
.as_ref()
.map(|s| format!(" (Retrying: {})", s))
.unwrap_or_default()
);
let mut rpc_peers = cluster_info
.all_rpc_peers()
.into_iter()
.filter(|contact_info| contact_info.shred_version == shred_version)
.collect::<Vec<_>>();
if bootstrap_config.only_known_rpc {
rpc_peers.retain(|rpc_peer| {
is_known_validator(&rpc_peer.id, &validator_config.known_validators)
});
}
let rpc_peers_total = rpc_peers.len();
// Filter out blacklisted nodes
let rpc_peers: Vec<_> = rpc_peers
.into_iter()
.filter(|rpc_peer| !blacklisted_rpc_nodes.contains(&rpc_peer.id))
.collect();
let rpc_peers_blacklisted = rpc_peers_total - rpc_peers.len();
let rpc_known_peers = rpc_peers
.iter()
.filter(|rpc_peer| is_known_validator(&rpc_peer.id, &validator_config.known_validators))
.count();
info!(
"Total {} RPC nodes found. {} known, {} blacklisted ",
rpc_peers_total, rpc_known_peers, rpc_peers_blacklisted
);
if rpc_peers_blacklisted == rpc_peers_total {
*retry_reason =
if !blacklisted_rpc_nodes.is_empty() && blacklist_timeout.elapsed().as_secs() > 60 {
// If all nodes are blacklisted and no additional nodes are discovered after 60 seconds,
// remove the blacklist and try them all again
blacklisted_rpc_nodes.clear();
Some("Blacklist timeout expired".to_owned())
} else {
Some("Wait for known rpc peers".to_owned())
};
return None;
}
Some(rpc_peers)
}
fn check_vote_account(
rpc_client: &RpcClient,
identity_pubkey: &Pubkey,
vote_account_address: &Pubkey,
authorized_voter_pubkeys: &[Pubkey],
) -> Result<(), String> {
let vote_account = rpc_client
.get_account_with_commitment(vote_account_address, CommitmentConfig::confirmed())
.map_err(|err| format!("failed to fetch vote account: {}", err))?
.value
.ok_or_else(|| format!("vote account does not exist: {}", vote_account_address))?;
if vote_account.owner != solana_vote_program::id() {
return Err(format!(
"not a vote account (owned by {}): {}",
vote_account.owner, vote_account_address
));
}
let identity_account = rpc_client
.get_account_with_commitment(identity_pubkey, CommitmentConfig::confirmed())
.map_err(|err| format!("failed to fetch identity account: {}", err))?
.value
.ok_or_else(|| format!("identity account does not exist: {}", identity_pubkey))?;
let vote_state = solana_vote_program::vote_state::VoteState::from(&vote_account);
if let Some(vote_state) = vote_state {
if vote_state.authorized_voters().is_empty() {
return Err("Vote account not yet initialized".to_string());
}
if vote_state.node_pubkey != *identity_pubkey {
return Err(format!(
"vote account's identity ({}) does not match the validator's identity {}).",
vote_state.node_pubkey, identity_pubkey
));
}
for (_, vote_account_authorized_voter_pubkey) in vote_state.authorized_voters().iter() {
if !authorized_voter_pubkeys.contains(vote_account_authorized_voter_pubkey) {
return Err(format!(
"authorized voter {} not available",
vote_account_authorized_voter_pubkey
));
}
}
} else {
return Err(format!(
"invalid vote account data for {}",
vote_account_address
));
}
// Maybe we can calculate minimum voting fee; rather than 1 lamport
if identity_account.lamports <= 1 {
return Err(format!(
"underfunded identity account ({}): only {} lamports available",
identity_pubkey, identity_account.lamports
));
}
Ok(())
}
/// Struct to wrap the return value from get_rpc_node(). The `rpc_contact_info` is the peer to
/// download from, and `snapshot_hash` is the (optional) full and (optional) incremental
/// snapshots to download.
#[derive(Debug)]
struct GetRpcNodeResult {
rpc_contact_info: ContactInfo,
snapshot_hash: Option<SnapshotHash>,
}
/// Struct to wrap the peers & snapshot hashes together.
#[derive(Debug, PartialEq, Eq, Clone)]
struct PeerSnapshotHash {
rpc_contact_info: ContactInfo,
snapshot_hash: SnapshotHash,
}
/// A snapshot hash. In this context (bootstrap *with* incremental snapshots), a snapshot hash
/// is _both_ a full snapshot hash and an (optional) incremental snapshot hash.
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy)]
struct SnapshotHash {
full: (Slot, Hash),
incr: Option<(Slot, Hash)>,
}
#[allow(clippy::too_many_arguments)]
pub fn rpc_bootstrap(
node: &Node,
identity_keypair: &Arc<Keypair>,
ledger_path: &Path,
full_snapshot_archives_dir: &Path,
incremental_snapshot_archives_dir: &Path,
vote_account: &Pubkey,
authorized_voter_keypairs: Arc<RwLock<Vec<Arc<Keypair>>>>,
cluster_entrypoints: &[ContactInfo],
validator_config: &mut ValidatorConfig,
bootstrap_config: RpcBootstrapConfig,
do_port_check: bool,
use_progress_bar: bool,
maximum_local_snapshot_age: Slot,
should_check_duplicate_instance: bool,
start_progress: &Arc<RwLock<ValidatorStartProgress>>,
minimal_snapshot_download_speed: f32,
maximum_snapshot_download_abort: u64,
socket_addr_space: SocketAddrSpace,
) {
if do_port_check {
let mut order: Vec<_> = (0..cluster_entrypoints.len()).collect();
order.shuffle(&mut thread_rng());
if order.into_iter().all(|i| {
!verify_reachable_ports(
node,
&cluster_entrypoints[i],
validator_config,
&socket_addr_space,
)
}) {
exit(1);
}
}
if bootstrap_config.no_genesis_fetch && bootstrap_config.no_snapshot_fetch {
return;
}
let mut blacklisted_rpc_nodes = HashSet::new();
let mut gossip = None;
let mut download_abort_count = 0;
loop {
if gossip.is_none() {
*start_progress.write().unwrap() = ValidatorStartProgress::SearchingForRpcService;
gossip = Some(start_gossip_node(
identity_keypair.clone(),
cluster_entrypoints,
ledger_path,
&node.info.gossip,
node.sockets.gossip.try_clone().unwrap(),
validator_config.expected_shred_version,
validator_config.gossip_validators.clone(),
should_check_duplicate_instance,
socket_addr_space,
));
}
let rpc_node_details = get_rpc_node(
&gossip.as_ref().unwrap().0,
cluster_entrypoints,
validator_config,
&mut blacklisted_rpc_nodes,
&bootstrap_config,
);
if rpc_node_details.is_none() {
return;
}
let GetRpcNodeResult {
rpc_contact_info,
snapshot_hash,
} = rpc_node_details.unwrap();
info!(
"Using RPC service from node {}: {:?}",
rpc_contact_info.id, rpc_contact_info.rpc
);
let rpc_client = RpcClient::new_socket(rpc_contact_info.rpc);
let result = match rpc_client.get_version() {
Ok(rpc_version) => {
info!("RPC node version: {}", rpc_version.solana_core);
Ok(())
}
Err(err) => Err(format!("Failed to get RPC node version: {}", err)),
}
.and_then(|_| {
let genesis_config = download_then_check_genesis_hash(
&rpc_contact_info.rpc,
ledger_path,
validator_config.expected_genesis_hash,
bootstrap_config.max_genesis_archive_unpacked_size,
bootstrap_config.no_genesis_fetch,
use_progress_bar,
);
if let Ok(genesis_config) = genesis_config {
let genesis_hash = genesis_config.hash();
if validator_config.expected_genesis_hash.is_none() {
info!("Expected genesis hash set to {}", genesis_hash);
validator_config.expected_genesis_hash = Some(genesis_hash);
}
}
if let Some(expected_genesis_hash) = validator_config.expected_genesis_hash {
// Sanity check that the RPC node is using the expected genesis hash before
// downloading a snapshot from it
let rpc_genesis_hash = rpc_client
.get_genesis_hash()
.map_err(|err| format!("Failed to get genesis hash: {}", err))?;
if expected_genesis_hash != rpc_genesis_hash {
return Err(format!(
"Genesis hash mismatch: expected {} but RPC node genesis hash is {}",
expected_genesis_hash, rpc_genesis_hash
));
}
}
let (cluster_info, gossip_exit_flag, gossip_service) = gossip.take().unwrap();
cluster_info.save_contact_info();
gossip_exit_flag.store(true, Ordering::Relaxed);
gossip_service.join().unwrap();
let rpc_client_slot = rpc_client
.get_slot_with_commitment(CommitmentConfig::finalized())
.map_err(|err| format!("Failed to get RPC node slot: {}", err))?;
info!("RPC node root slot: {}", rpc_client_slot);
download_snapshots(
full_snapshot_archives_dir,
incremental_snapshot_archives_dir,
validator_config,
&bootstrap_config,
use_progress_bar,
maximum_local_snapshot_age,
start_progress,
minimal_snapshot_download_speed,
maximum_snapshot_download_abort,
&mut download_abort_count,
snapshot_hash,
&rpc_contact_info,
)
})
.map(|_| {
if let Some(url) = bootstrap_config.check_vote_account.as_ref() {
let rpc_client = RpcClient::new(url);
check_vote_account(
&rpc_client,
&identity_keypair.pubkey(),
vote_account,
&authorized_voter_keypairs
.read()
.unwrap()
.iter()
.map(|k| k.pubkey())
.collect::<Vec<_>>(),
)
.unwrap_or_else(|err| {
// Consider failures here to be more likely due to user error (eg,
// incorrect `solana-validator` command-line arguments) rather than the
// RPC node failing.
//
// Power users can always use the `--no-check-vote-account` option to
// bypass this check entirely
error!("{}", err);
exit(1);
});
}
});
if result.is_ok() {
break;
}
warn!("{}", result.unwrap_err());
if let Some(ref known_validators) = validator_config.known_validators {
if known_validators.contains(&rpc_contact_info.id) {
continue; // Never blacklist a known node
}
}
info!(
"Excluding {} as a future RPC candidate",
rpc_contact_info.id
);
blacklisted_rpc_nodes.insert(rpc_contact_info.id);
}
if let Some((cluster_info, gossip_exit_flag, gossip_service)) = gossip.take() {
cluster_info.save_contact_info();
gossip_exit_flag.store(true, Ordering::Relaxed);
gossip_service.join().unwrap();
}
}
/// Get an RPC peer node to download from.
///
/// This function finds the highest compatible snapshots from the cluster, then picks one peer
/// at random to use (return).
fn get_rpc_node(
cluster_info: &ClusterInfo,
cluster_entrypoints: &[ContactInfo],
validator_config: &ValidatorConfig,
blacklisted_rpc_nodes: &mut HashSet<Pubkey>,
bootstrap_config: &RpcBootstrapConfig,
) -> Option<GetRpcNodeResult> {
let mut blacklist_timeout = Instant::now();
let mut newer_cluster_snapshot_timeout = None;
let mut retry_reason = None;
loop {
sleep(Duration::from_secs(1));
info!("\n{}", cluster_info.rpc_info_trace());
let rpc_peers = get_rpc_peers(
cluster_info,
cluster_entrypoints,
validator_config,
blacklisted_rpc_nodes,
&blacklist_timeout,
&mut retry_reason,
bootstrap_config,
);
if rpc_peers.is_none() {
continue;
}
let rpc_peers = rpc_peers.unwrap();
blacklist_timeout = Instant::now();
if bootstrap_config.no_snapshot_fetch {
if rpc_peers.is_empty() {
retry_reason = Some("No RPC peers available.".to_owned());
continue;
} else {
let random_peer = &rpc_peers[thread_rng().gen_range(0, rpc_peers.len())];
return Some(GetRpcNodeResult {
rpc_contact_info: random_peer.clone(),
snapshot_hash: None,
});
}
}
let peer_snapshot_hashes = get_peer_snapshot_hashes(
cluster_info,
&rpc_peers,
validator_config.known_validators.as_ref(),
bootstrap_config.incremental_snapshot_fetch,
);
if peer_snapshot_hashes.is_empty() {
match newer_cluster_snapshot_timeout {
None => newer_cluster_snapshot_timeout = Some(Instant::now()),
Some(newer_cluster_snapshot_timeout) => {
if newer_cluster_snapshot_timeout.elapsed().as_secs() > 180 {
warn!("Giving up, did not get newer snapshots from the cluster.");
return None;
}
}
}
retry_reason = Some("No snapshots available".to_owned());
continue;
} else {
let rpc_peers = peer_snapshot_hashes
.iter()
.map(|peer_snapshot_hash| peer_snapshot_hash.rpc_contact_info.id)
.collect::<Vec<_>>();
let PeerSnapshotHash {
rpc_contact_info: final_rpc_contact_info,
snapshot_hash: final_snapshot_hash,
} = get_final_peer_snapshot_hash(&peer_snapshot_hashes);
info!(
"Highest available snapshot slot is {}, available from {} node{}: {:?}",
final_snapshot_hash
.incr
.map(|(slot, _hash)| slot)
.unwrap_or(final_snapshot_hash.full.0),
rpc_peers.len(),
if rpc_peers.len() > 1 { "s" } else { "" },
rpc_peers,
);
return Some(GetRpcNodeResult {
rpc_contact_info: final_rpc_contact_info,
snapshot_hash: Some(final_snapshot_hash),
});
}
}
}
/// Get the Slot and Hash of the local snapshot with the highest slot. Can be either a full
/// snapshot or an incremental snapshot.
fn get_highest_local_snapshot_hash(
full_snapshot_archives_dir: impl AsRef<Path>,
incremental_snapshot_archives_dir: impl AsRef<Path>,
incremental_snapshot_fetch: bool,
) -> Option<(Slot, Hash)> {
snapshot_utils::get_highest_full_snapshot_archive_info(full_snapshot_archives_dir).and_then(
|full_snapshot_info| {
if incremental_snapshot_fetch {
snapshot_utils::get_highest_incremental_snapshot_archive_info(
incremental_snapshot_archives_dir,
full_snapshot_info.slot(),
)
.map(|incremental_snapshot_info| {
(
incremental_snapshot_info.slot(),
*incremental_snapshot_info.hash(),
)
})
} else {
None
}
.or_else(|| Some((full_snapshot_info.slot(), *full_snapshot_info.hash())))
},
)
}
/// Get peer snapshot hashes
///
/// The result is a vector of peers with snapshot hashes that:
/// 1. match a snapshot hash from the known validators
/// 2. have the highest incremental snapshot slot
/// 3. have the highest full snapshot slot of (2)
fn get_peer_snapshot_hashes(
cluster_info: &ClusterInfo,
rpc_peers: &[ContactInfo],
known_validators: Option<&HashSet<Pubkey>>,
incremental_snapshot_fetch: bool,
) -> Vec<PeerSnapshotHash> {
let mut peer_snapshot_hashes =
get_eligible_peer_snapshot_hashes(cluster_info, rpc_peers, incremental_snapshot_fetch);
if known_validators.is_some() {
let known_snapshot_hashes = get_snapshot_hashes_from_known_validators(
cluster_info,
known_validators,
incremental_snapshot_fetch,
);
retain_peer_snapshot_hashes_that_match_known_snapshot_hashes(
&known_snapshot_hashes,
&mut peer_snapshot_hashes,
);
}
retain_peer_snapshot_hashes_with_highest_incremental_snapshot_slot(&mut peer_snapshot_hashes);
retain_peer_snapshot_hashes_with_highest_full_snapshot_slot(&mut peer_snapshot_hashes);
peer_snapshot_hashes
}
/// Map full snapshot hashes to a set of incremental snapshot hashes. Each full snapshot hash
/// is treated as the base for its set of incremental snapshot hashes.
type KnownSnapshotHashes = HashMap<(Slot, Hash), HashSet<(Slot, Hash)>>;
/// Get the snapshot hashes from known validators.
///
/// The snapshot hashes are put into a map from full snapshot hash to a set of incremental
/// snapshot hashes. This map will be used as the "known snapshot hashes"; when peers are
/// queried for their individual snapshot hashes, their results will be checked against this
/// map to verify correctness.
///
/// NOTE: Only a single snashot hash is allowed per slot. If somehow two known validators have
/// a snapshot hash with the same slot and _different_ hashes, the second will be skipped.
/// This applies to both full and incremental snapshot hashes.
fn get_snapshot_hashes_from_known_validators(
cluster_info: &ClusterInfo,
known_validators: Option<&HashSet<Pubkey>>,
incremental_snapshot_fetch: bool,
) -> KnownSnapshotHashes {
// Get the full snapshot hashes for a node from CRDS
let get_full_snapshot_hashes_for_node = |node| {
let mut full_snapshot_hashes = Vec::new();
cluster_info.get_snapshot_hash_for_node(node, |snapshot_hashes| {
full_snapshot_hashes = snapshot_hashes.clone();
});
full_snapshot_hashes
};
// Get the incremental snapshot hashes for a node from CRDS
let get_incremental_snapshot_hashes_for_node = |node| {
cluster_info
.get_incremental_snapshot_hashes_for_node(node)
.map(|hashes| (hashes.base, hashes.hashes))
};
known_validators
.map(|known_validators| {
build_known_snapshot_hashes(
known_validators,
get_full_snapshot_hashes_for_node,
get_incremental_snapshot_hashes_for_node,
incremental_snapshot_fetch,
)
})
.unwrap_or_else(|| {
trace!("No known validators, so no known snapshot hashes");
KnownSnapshotHashes::new()
})
}
/// Build the known snapshot hashes from a set of nodes.
///
/// The `get_full_snapshot_hashes_for_node` and `get_incremental_snapshot_hashes_for_node`
/// parameters are Fns that map a pubkey to its respective full and incremental snapshot
/// hashes. These parameters exist to provide a way to test the inner algorithm without
/// needing runtime information such as the ClusterInfo or ValidatorConfig.
fn build_known_snapshot_hashes<'a, F1, F2>(
nodes: impl IntoIterator<Item = &'a Pubkey>,
get_full_snapshot_hashes_for_node: F1,
get_incremental_snapshot_hashes_for_node: F2,
incremental_snapshot_fetch: bool,
) -> KnownSnapshotHashes
where
F1: Fn(&'a Pubkey) -> Vec<(Slot, Hash)>,
F2: Fn(&'a Pubkey) -> Option<((Slot, Hash), Vec<(Slot, Hash)>)>,
{
let mut known_snapshot_hashes = KnownSnapshotHashes::new();
/// Check to see if there exists another snapshot hash in the haystack with the *same* slot
/// but *different* hash as the needle.
fn is_any_same_slot_and_different_hash<'a>(
needle: &(Slot, Hash),
haystack: impl IntoIterator<Item = &'a (Slot, Hash)>,
) -> bool {
haystack
.into_iter()
.any(|hay| needle.0 == hay.0 && needle.1 != hay.1)
}
'to_next_node: for node in nodes {
// First get the full snapshot hashes for each node and add them as the keys in the
// known snapshot hashes map.
let full_snapshot_hashes = get_full_snapshot_hashes_for_node(node);
'_to_next_full_snapshot: for full_snapshot_hash in &full_snapshot_hashes {
// Do not add this snapshot hash if there's already a full snapshot hash with the
// same slot but with a _different_ hash.
// NOTE: Nodes should not produce snapshots at the same slot with _different_
// hashes. So if it happens, keep the first and ignore the rest.
if is_any_same_slot_and_different_hash(full_snapshot_hash, known_snapshot_hashes.keys())
{
warn!(
"Ignoring all snapshot hashes from node {} since we've seen a different full snapshot hash with this slot.\nfull snapshot hash: {:?}",
node,
full_snapshot_hash,
);
debug!(
"known full snapshot hashes: {:#?}",
known_snapshot_hashes.keys(),
);
continue 'to_next_node;
}
// Insert a new full snapshot hash into the known snapshot hashes IFF an entry
// doesn't already exist. This is to ensure we don't overwrite existing
// incremental snapshot hashes that may be present for this full snapshot hash.
let _ = known_snapshot_hashes
.entry(*full_snapshot_hash)
.or_default();
}
if incremental_snapshot_fetch {
// Then get the incremental snapshot hashes for each node and add them as the values in the
// known snapshot hashes map.
if let Some((base_snapshot_hash, incremental_snapshot_hashes)) =
get_incremental_snapshot_hashes_for_node(node)
{
// Incremental snapshots must be based off a valid full snapshot. Ensure the node
// has a full snapshot hash that matches its base snapshot hash.
if !full_snapshot_hashes.contains(&base_snapshot_hash) {
warn!(
"Ignoring all incremental snapshot hashes from node {} since its base snapshot hash does not match any of its full snapshot hashes.\nbase snapshot hash: {:?}\nfull snapshot hashes: {:?}",
node,
base_snapshot_hash,
full_snapshot_hashes
);
continue 'to_next_node;
}
if let Some(known_incremental_snapshot_hashes) =
known_snapshot_hashes.get_mut(&base_snapshot_hash)
{
'to_next_incremental_snapshot: for incremental_snapshot_hash in
&incremental_snapshot_hashes
{
// Do not add this snapshot hash if there's already an incremental snapshot
// hash with the same slot, but with a _different_ hash.
// NOTE: Nodes should not produce snapshots at the same slot with _different_
// hashes. So if it happens, keep the first and ignore the rest.
if is_any_same_slot_and_different_hash(
incremental_snapshot_hash,
known_incremental_snapshot_hashes.iter(),
) {
warn!(
"Ignoring incremental snapshot hash from node {} since we've seen a different incremental snapshot hash with this slot.\nbase snapshot hash: {:?}\nincremental snapshot hash: {:?}",
node,
base_snapshot_hash,
incremental_snapshot_hash,
);
debug!(
"known incremental snapshot hashes at this slot: {:#?}",
known_incremental_snapshot_hashes.iter(),
);
continue 'to_next_incremental_snapshot;
}
known_incremental_snapshot_hashes.insert(*incremental_snapshot_hash);
}
} else {
// Since incremental snapshots *must* have a valid base (i.e. full)
// snapshot, if .get() returned None, then that can only happen if there
// already is a full snapshot hash in the known snapshot hashes with the
// same slot but _different_ a hash. Assert that below. If the assert
// ever fails, there is a programmer bug.
assert!(
is_any_same_slot_and_different_hash(&base_snapshot_hash, known_snapshot_hashes.keys()),
"There must exist a full snapshot hash already in the known snapshot hashes with the same slot but a different hash!",
);
debug!(
"Ignoring incremental snapshot hashes from node {} since we've seen a different base snapshot hash with this slot.\nbase snapshot hash: {:?}\nknown full snapshot hashes: {:?}",
node,
base_snapshot_hash,
known_snapshot_hashes.keys(),
);
continue 'to_next_node;
}
}
}
}
trace!("known snapshot hashes: {:?}", &known_snapshot_hashes);
known_snapshot_hashes
}
/// Get snapshot hashes from all the eligible peers. This fn will get only one
/// snapshot hash per peer (the one with the highest slot). This may be just a full snapshot
/// hash, or a combo full (i.e. base) snapshot hash and incremental snapshot hash.
fn get_eligible_peer_snapshot_hashes(
cluster_info: &ClusterInfo,
rpc_peers: &[ContactInfo],
incremental_snapshot_fetch: bool,
) -> Vec<PeerSnapshotHash> {
let mut peer_snapshot_hashes = Vec::new();
for rpc_peer in rpc_peers {
// Get this peer's highest (full) snapshot hash. We need to get these snapshot hashes
// (instead of just the IncrementalSnapshotHashes) in case the peer is either (1) not
// taking incremental snapshots, or (2) if the last snapshot taken was a full snapshot,
// which would get pushed to CRDS here (i.e. `crds_value::SnapshotHashes`) first.
let highest_snapshot_hash =
get_highest_full_snapshot_hash_for_peer(cluster_info, &rpc_peer.id).max(
if incremental_snapshot_fetch {
get_highest_incremental_snapshot_hash_for_peer(cluster_info, &rpc_peer.id)
} else {
None
},
);
if let Some(snapshot_hash) = highest_snapshot_hash {
peer_snapshot_hashes.push(PeerSnapshotHash {
rpc_contact_info: rpc_peer.clone(),
snapshot_hash,
});
};
}
trace!("peer snapshot hashes: {:?}", &peer_snapshot_hashes);
peer_snapshot_hashes
}
/// Retain the peer snapshot hashes that match a snapshot hash from the known snapshot hashes
fn retain_peer_snapshot_hashes_that_match_known_snapshot_hashes(
known_snapshot_hashes: &KnownSnapshotHashes,
peer_snapshot_hashes: &mut Vec<PeerSnapshotHash>,
) {
peer_snapshot_hashes.retain(|peer_snapshot_hash| {
known_snapshot_hashes
.get(&peer_snapshot_hash.snapshot_hash.full)
.map(|known_incremental_hashes| {
if peer_snapshot_hash.snapshot_hash.incr.is_none() {
// If the peer's full snapshot hashes match, but doesn't have any
// incremental snapshots, that's fine; keep 'em!
true
} else {
known_incremental_hashes
.contains(peer_snapshot_hash.snapshot_hash.incr.as_ref().unwrap())
}
})
.unwrap_or(false)
});
trace!(
"retain peer snapshot hashes that match known snapshot hashes: {:?}",
&peer_snapshot_hashes
);
}
/// Retain the peer snapshot hashes with the highest full snapshot slot
fn retain_peer_snapshot_hashes_with_highest_full_snapshot_slot(
peer_snapshot_hashes: &mut Vec<PeerSnapshotHash>,
) {
// retain the hashes with the highest full snapshot slot
// do a two-pass algorithm
// 1. find the full snapshot hash with the highest full snapshot slot
// 2. retain elems with that full snapshot hash
let mut highest_full_snapshot_hash = (Slot::MIN, Hash::default());
peer_snapshot_hashes.iter().for_each(|peer_snapshot_hash| {
if peer_snapshot_hash.snapshot_hash.full.0 > highest_full_snapshot_hash.0 {
highest_full_snapshot_hash = peer_snapshot_hash.snapshot_hash.full;
}
});
peer_snapshot_hashes.retain(|peer_snapshot_hash| {
peer_snapshot_hash.snapshot_hash.full == highest_full_snapshot_hash
});
trace!(
"retain peer snapshot hashes with highest full snapshot slot: {:?}",
&peer_snapshot_hashes
);
}
/// Retain the peer snapshot hashes with the highest incremental snapshot slot
fn retain_peer_snapshot_hashes_with_highest_incremental_snapshot_slot(
peer_snapshot_hashes: &mut Vec<PeerSnapshotHash>,
) {
let mut highest_incremental_snapshot_hash: Option<(Slot, Hash)> = None;
peer_snapshot_hashes.iter().for_each(|peer_snapshot_hash| {
if let Some(incremental_snapshot_hash) = peer_snapshot_hash.snapshot_hash.incr.as_ref() {
if highest_incremental_snapshot_hash.is_none()
|| incremental_snapshot_hash.0 > highest_incremental_snapshot_hash.unwrap().0
{
highest_incremental_snapshot_hash = Some(*incremental_snapshot_hash);
}
};
});
peer_snapshot_hashes.retain(|peer_snapshot_hash| {
peer_snapshot_hash.snapshot_hash.incr == highest_incremental_snapshot_hash
});
trace!(
"retain peer snapshot hashes with highest incremental snapshot slot: {:?}",
&peer_snapshot_hashes
);
}
/// Get a final peer from the remaining peer snapshot hashes. At this point all the snapshot
/// hashes should (must) be the same, and only the peers are different. Pick an element from
/// the slice at random and return it.
fn get_final_peer_snapshot_hash(peer_snapshot_hashes: &[PeerSnapshotHash]) -> PeerSnapshotHash {
assert!(!peer_snapshot_hashes.is_empty());
// pick a final rpc peer at random
let final_peer_snapshot_hash =
&peer_snapshot_hashes[thread_rng().gen_range(0, peer_snapshot_hashes.len())];
// It is a programmer bug if the assert fires! By the time this function is called, the
// only remaining `incremental_snapshot_hashes` should all be the same.
assert!(
peer_snapshot_hashes.iter().all(|peer_snapshot_hash| {
peer_snapshot_hash.snapshot_hash == final_peer_snapshot_hash.snapshot_hash
}),
"To safely pick a peer at random, all the snapshot hashes must be the same"
);
trace!("final peer snapshot hash: {:?}", final_peer_snapshot_hash);
final_peer_snapshot_hash.clone()
}
/// Check to see if we can use our local snapshots, otherwise download newer ones.
#[allow(clippy::too_many_arguments)]
fn download_snapshots(
full_snapshot_archives_dir: &Path,