Skip to content

Commit 3dad27d

Browse files
authored
Optimize connection cache eviction logic (solana-labs#24911)
* Optimize connection cache eviction logic * add eviction count and timing to metrics * use swap_remove
1 parent d1f8646 commit 3dad27d

File tree

1 file changed

+67
-23
lines changed

1 file changed

+67
-23
lines changed

client/src/connection_cache.rs

+67-23
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ use {
1313
timing::AtomicInterval, transaction::VersionedTransaction, transport::TransportError,
1414
},
1515
std::{
16-
collections::BTreeMap,
16+
collections::HashMap,
1717
net::{IpAddr, Ipv4Addr, SocketAddr},
1818
sync::{
1919
atomic::{AtomicU64, Ordering},
@@ -35,6 +35,8 @@ pub enum Connection {
3535
struct ConnectionCacheStats {
3636
cache_hits: AtomicU64,
3737
cache_misses: AtomicU64,
38+
cache_evictions: AtomicU64,
39+
eviction_time_ms: AtomicU64,
3840
sent_packets: AtomicU64,
3941
total_batches: AtomicU64,
4042
batch_success: AtomicU64,
@@ -84,6 +86,16 @@ impl ConnectionCacheStats {
8486
self.cache_misses.swap(0, Ordering::Relaxed),
8587
i64
8688
),
89+
(
90+
"cache_evictions",
91+
self.cache_evictions.swap(0, Ordering::Relaxed),
92+
i64
93+
),
94+
(
95+
"eviction_time_ms",
96+
self.eviction_time_ms.swap(0, Ordering::Relaxed),
97+
i64
98+
),
8799
(
88100
"get_connection_ms",
89101
self.get_connection_ms.swap(0, Ordering::Relaxed),
@@ -160,7 +172,8 @@ impl ConnectionCacheStats {
160172
}
161173

162174
struct ConnectionMap {
163-
map: BTreeMap<SocketAddr, Connection>,
175+
map: HashMap<SocketAddr, Connection>,
176+
list_of_peers: Vec<SocketAddr>,
164177
stats: Arc<ConnectionCacheStats>,
165178
last_stats: AtomicInterval,
166179
use_quic: bool,
@@ -169,7 +182,8 @@ struct ConnectionMap {
169182
impl ConnectionMap {
170183
pub fn new() -> Self {
171184
Self {
172-
map: BTreeMap::new(),
185+
map: HashMap::with_capacity(MAX_CONNECTIONS),
186+
list_of_peers: vec![],
173187
stats: Arc::new(ConnectionCacheStats::default()),
174188
last_stats: AtomicInterval::default(),
175189
use_quic: false,
@@ -194,32 +208,41 @@ struct GetConnectionResult {
194208
connection: Connection,
195209
cache_hit: bool,
196210
report_stats: bool,
197-
map_timing: u64,
198-
lock_timing: u64,
211+
map_timing_ms: u64,
212+
lock_timing_ms: u64,
199213
connection_cache_stats: Arc<ConnectionCacheStats>,
200214
other_stats: Option<(Arc<ClientStats>, ConnectionStats)>,
215+
num_evictions: u64,
216+
eviction_timing_ms: u64,
201217
}
202218

203219
fn get_or_add_connection(addr: &SocketAddr) -> GetConnectionResult {
204220
let mut get_connection_map_lock_measure = Measure::start("get_connection_map_lock_measure");
205221
let map = (*CONNECTION_MAP).read().unwrap();
206222
get_connection_map_lock_measure.stop();
207223

208-
let mut lock_timing = get_connection_map_lock_measure.as_ms();
224+
let mut lock_timing_ms = get_connection_map_lock_measure.as_ms();
209225

210226
let report_stats = map
211227
.last_stats
212228
.should_update(CONNECTION_STAT_SUBMISSION_INTERVAL);
213229

214230
let mut get_connection_map_measure = Measure::start("get_connection_hit_measure");
215-
let (connection, cache_hit, connection_cache_stats, maybe_stats) = match map.map.get(addr) {
231+
let (
232+
connection,
233+
cache_hit,
234+
connection_cache_stats,
235+
maybe_stats,
236+
num_evictions,
237+
eviction_timing_ms,
238+
) = match map.map.get(addr) {
216239
Some(connection) => {
217240
let mut stats = None;
218241
// update connection stats
219242
if let Connection::Quic(conn) = connection {
220243
stats = conn.stats().map(|s| (conn.base_stats(), s));
221244
}
222-
(connection.clone(), true, map.stats.clone(), stats)
245+
(connection.clone(), true, map.stats.clone(), stats, 0, 0)
223246
}
224247
None => {
225248
let (_, send_socket) = solana_net_utils::bind_in_range(
@@ -241,20 +264,31 @@ fn get_or_add_connection(addr: &SocketAddr) -> GetConnectionResult {
241264
let mut map = (*CONNECTION_MAP).write().unwrap();
242265
get_connection_map_lock_measure.stop();
243266

244-
lock_timing = lock_timing.saturating_add(get_connection_map_lock_measure.as_ms());
267+
lock_timing_ms = lock_timing_ms.saturating_add(get_connection_map_lock_measure.as_ms());
245268

246-
// evict a connection if the map is reaching upper bounds
247-
while map.map.len() >= MAX_CONNECTIONS {
269+
// evict a connection if the cache is reaching upper bounds
270+
let mut num_evictions = 0;
271+
let mut get_connection_cache_eviction_measure =
272+
Measure::start("get_connection_cache_eviction_measure");
273+
while map.list_of_peers.len() >= MAX_CONNECTIONS {
248274
let mut rng = thread_rng();
249275
let n = rng.gen_range(0, MAX_CONNECTIONS);
250-
if let Some((nth_addr, _)) = map.map.iter().nth(n) {
251-
let nth_addr = *nth_addr;
252-
map.map.remove(&nth_addr);
253-
}
276+
let nth_addr = map.list_of_peers.swap_remove(n);
277+
map.map.remove(&nth_addr);
278+
num_evictions += 1;
254279
}
280+
get_connection_cache_eviction_measure.stop();
255281

256282
map.map.insert(*addr, connection.clone());
257-
(connection, false, map.stats.clone(), None)
283+
map.list_of_peers.push(*addr);
284+
(
285+
connection,
286+
false,
287+
map.stats.clone(),
288+
None,
289+
num_evictions,
290+
get_connection_cache_eviction_measure.as_ms(),
291+
)
258292
}
259293
};
260294
get_connection_map_measure.stop();
@@ -263,10 +297,12 @@ fn get_or_add_connection(addr: &SocketAddr) -> GetConnectionResult {
263297
connection,
264298
cache_hit,
265299
report_stats,
266-
map_timing: get_connection_map_measure.as_ms(),
267-
lock_timing,
300+
map_timing_ms: get_connection_map_measure.as_ms(),
301+
lock_timing_ms,
268302
connection_cache_stats,
269303
other_stats: maybe_stats,
304+
num_evictions,
305+
eviction_timing_ms,
270306
}
271307
}
272308

@@ -278,10 +314,12 @@ fn get_connection(addr: &SocketAddr) -> (Connection, Arc<ConnectionCacheStats>)
278314
connection,
279315
cache_hit,
280316
report_stats,
281-
map_timing,
282-
lock_timing,
317+
map_timing_ms,
318+
lock_timing_ms,
283319
connection_cache_stats,
284320
other_stats,
321+
num_evictions,
322+
eviction_timing_ms,
285323
} = get_or_add_connection(addr);
286324

287325
if report_stats {
@@ -325,20 +363,26 @@ fn get_connection(addr: &SocketAddr) -> (Connection, Arc<ConnectionCacheStats>)
325363
.fetch_add(1, Ordering::Relaxed);
326364
connection_cache_stats
327365
.get_connection_hit_ms
328-
.fetch_add(map_timing, Ordering::Relaxed);
366+
.fetch_add(map_timing_ms, Ordering::Relaxed);
329367
} else {
330368
connection_cache_stats
331369
.cache_misses
332370
.fetch_add(1, Ordering::Relaxed);
333371
connection_cache_stats
334372
.get_connection_miss_ms
335-
.fetch_add(map_timing, Ordering::Relaxed);
373+
.fetch_add(map_timing_ms, Ordering::Relaxed);
374+
connection_cache_stats
375+
.cache_evictions
376+
.fetch_add(num_evictions, Ordering::Relaxed);
377+
connection_cache_stats
378+
.eviction_time_ms
379+
.fetch_add(eviction_timing_ms, Ordering::Relaxed);
336380
}
337381

338382
get_connection_measure.stop();
339383
connection_cache_stats
340384
.get_connection_lock_ms
341-
.fetch_add(lock_timing, Ordering::Relaxed);
385+
.fetch_add(lock_timing_ms, Ordering::Relaxed);
342386
connection_cache_stats
343387
.get_connection_ms
344388
.fetch_add(get_connection_measure.as_ms(), Ordering::Relaxed);

0 commit comments

Comments
 (0)