From e819b5e2dab75694aff4cf9ae0d9dcf21d03b968 Mon Sep 17 00:00:00 2001 From: Ashok Menon Date: Fri, 18 Nov 2022 19:03:18 +0000 Subject: [PATCH] [Authority] Track size of compiled module cache (#6039) New Prometheus collector outputting a gauge metric to track the number of modules in Move's `SyncModuleCache`. --- crates/sui-core/src/authority.rs | 5 ++ crates/sui-core/src/lib.rs | 1 + crates/sui-core/src/module_cache_gauge.rs | 63 +++++++++++++++++++++++ 3 files changed, 69 insertions(+) create mode 100644 crates/sui-core/src/module_cache_gauge.rs diff --git a/crates/sui-core/src/authority.rs b/crates/sui-core/src/authority.rs index 4be36dc920ac1..f6773a4d8c04a 100644 --- a/crates/sui-core/src/authority.rs +++ b/crates/sui-core/src/authority.rs @@ -88,6 +88,7 @@ use crate::consensus_handler::{ use crate::epoch::committee_store::CommitteeStore; use crate::epoch::reconfiguration::ReconfigState; use crate::metrics::TaskUtilizationExt; +use crate::module_cache_gauge::ModuleCacheGauge; use crate::scoped_counter; use crate::{ authority_batch::{BroadcastReceiver, BroadcastSender}, @@ -1450,6 +1451,10 @@ impl AuthorityState { checkpoint_service, }; + prometheus_registry + .register(Box::new(ModuleCacheGauge::new(&state.module_cache))) + .unwrap(); + // Process tx recovery log first, so that the batch and checkpoint recovery (below) // don't observe partially-committed txes. state diff --git a/crates/sui-core/src/lib.rs b/crates/sui-core/src/lib.rs index 2f0b2212a35a7..fb17dfd20dbe0 100644 --- a/crates/sui-core/src/lib.rs +++ b/crates/sui-core/src/lib.rs @@ -28,6 +28,7 @@ pub mod checkpoints; mod consensus_handler; pub mod consensus_validator; mod histogram; +mod module_cache_gauge; mod node_sync; mod query_helpers; mod transaction_manager; diff --git a/crates/sui-core/src/module_cache_gauge.rs b/crates/sui-core/src/module_cache_gauge.rs new file mode 100644 index 0000000000000..d92ef436a6653 --- /dev/null +++ b/crates/sui-core/src/module_cache_gauge.rs @@ -0,0 +1,63 @@ +// Copyright (c) Mysten Labs, Inc. +// SPDX-License-Identifier: Apache-2.0 + +use std::sync::{Arc, Weak}; + +use move_bytecode_utils::module_cache::SyncModuleCache; +use move_core_types::resolver::ModuleResolver; +use prometheus::{ + core::{Collector, Desc}, + labels, + proto::{Gauge, Metric, MetricFamily, MetricType}, +}; + +/// Holds the module cache to collect its size and pass it to Prometheus, for monitoring in Grafana. +pub struct ModuleCacheGauge { + desc: Desc, + module_cache: Weak>, +} + +impl ModuleCacheGauge { + pub fn new(module_cache: &Arc>) -> Self { + Self { + desc: Desc::new( + "module_cache_size".into(), + "Number of compiled move modules in the authority's cache.".into(), + /* variable_labels */ vec![], + /* const_labels */ labels! {}, + ) + .unwrap(), + module_cache: Arc::downgrade(module_cache), + } + } + + fn metric(&self) -> Option { + let cache = self.module_cache.upgrade()?; + let mut m = Metric::default(); + let mut gauge = Gauge::default(); + // NB. lossy conversion from usize to f64, to match prometheus' API. + gauge.set_value(cache.len() as f64); + m.set_gauge(gauge); + Some(m) + } +} + +impl Collector for ModuleCacheGauge { + fn desc(&self) -> Vec<&Desc> { + vec![&self.desc] + } + + fn collect(&self) -> Vec { + let mut m = MetricFamily::default(); + + m.set_name(self.desc.fq_name.clone()); + m.set_help(self.desc.help.clone()); + m.set_field_type(MetricType::GAUGE); + + if let Some(metric) = self.metric() { + m.mut_metric().push(metric); + } + + vec![m] + } +}