Skip to content

Commit

Permalink
Allow configurable pruning delay and add some metrics (MystenLabs#10127)
Browse files Browse the repository at this point in the history
## Description 

We want to support running pruner periodically with configurable delay
in the current epoch (to be used by validators). PR allows to set up
this delay through config.

## Test Plan 

Existing tests.
  • Loading branch information
sadhansood authored Mar 31, 2023
1 parent 7211cd9 commit 2ec5898
Show file tree
Hide file tree
Showing 6 changed files with 79 additions and 12 deletions.
1 change: 1 addition & 0 deletions crates/sui-config/data/fullnode-template-with-path.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ authority-store-pruning-config:
max-checkpoints-in-batch: 200
max-transactions-in-batch: 1000
use-range-deletion: true
pruning-run-delay-seconds: 60

protocol-key-pair:
path: "protocol.key"
Expand Down
1 change: 1 addition & 0 deletions crates/sui-config/data/fullnode-template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,4 @@ authority-store-pruning-config:
max-checkpoints-in-batch: 200
max-transactions-in-batch: 1000
use-range-deletion: true
pruning-run-delay-seconds: 60
8 changes: 6 additions & 2 deletions crates/sui-config/src/node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,7 @@ pub struct AuthorityStorePruningConfig {
pub num_latest_epoch_dbs_to_retain: usize,
pub epoch_db_pruning_period_secs: u64,
pub num_epochs_to_retain: u64,
pub pruning_run_delay_seconds: u64,
pub max_checkpoints_in_batch: usize,
pub max_transactions_in_batch: usize,
pub use_range_deletion: bool,
Expand All @@ -296,7 +297,8 @@ impl Default for AuthorityStorePruningConfig {
Self {
num_latest_epoch_dbs_to_retain: usize::MAX,
epoch_db_pruning_period_secs: u64::MAX,
num_epochs_to_retain: 1,
num_epochs_to_retain: 0,
pruning_run_delay_seconds: 60,
max_checkpoints_in_batch: 200,
max_transactions_in_batch: 1000,
use_range_deletion: true,
Expand All @@ -309,7 +311,8 @@ impl AuthorityStorePruningConfig {
Self {
num_latest_epoch_dbs_to_retain: 3,
epoch_db_pruning_period_secs: 60 * 60,
num_epochs_to_retain: 1,
num_epochs_to_retain: 0,
pruning_run_delay_seconds: 60,
max_checkpoints_in_batch: 200,
max_transactions_in_batch: 1000,
use_range_deletion: true,
Expand All @@ -320,6 +323,7 @@ impl AuthorityStorePruningConfig {
num_latest_epoch_dbs_to_retain: 3,
epoch_db_pruning_period_secs: 60 * 60,
num_epochs_to_retain: 1,
pruning_run_delay_seconds: 60,
max_checkpoints_in_batch: 200,
max_transactions_in_batch: 1000,
use_range_deletion: true,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,8 @@ validator_configs:
authority-store-pruning-config:
num-latest-epoch-dbs-to-retain: 3
epoch-db-pruning-period-secs: 3600
num-epochs-to-retain: 1
num-epochs-to-retain: 0
pruning-run-delay-seconds: 60
max-checkpoints-in-batch: 200
max-transactions-in-batch: 1000
use-range-deletion: true
Expand Down Expand Up @@ -134,7 +135,8 @@ validator_configs:
authority-store-pruning-config:
num-latest-epoch-dbs-to-retain: 3
epoch-db-pruning-period-secs: 3600
num-epochs-to-retain: 1
num-epochs-to-retain: 0
pruning-run-delay-seconds: 60
max-checkpoints-in-batch: 200
max-transactions-in-batch: 1000
use-range-deletion: true
Expand Down Expand Up @@ -205,7 +207,8 @@ validator_configs:
authority-store-pruning-config:
num-latest-epoch-dbs-to-retain: 3
epoch-db-pruning-period-secs: 3600
num-epochs-to-retain: 1
num-epochs-to-retain: 0
pruning-run-delay-seconds: 60
max-checkpoints-in-batch: 200
max-transactions-in-batch: 1000
use-range-deletion: true
Expand Down Expand Up @@ -276,7 +279,8 @@ validator_configs:
authority-store-pruning-config:
num-latest-epoch-dbs-to-retain: 3
epoch-db-pruning-period-secs: 3600
num-epochs-to-retain: 1
num-epochs-to-retain: 0
pruning-run-delay-seconds: 60
max-checkpoints-in-batch: 200
max-transactions-in-batch: 1000
use-range-deletion: true
Expand Down Expand Up @@ -347,7 +351,8 @@ validator_configs:
authority-store-pruning-config:
num-latest-epoch-dbs-to-retain: 3
epoch-db-pruning-period-secs: 3600
num-epochs-to-retain: 1
num-epochs-to-retain: 0
pruning-run-delay-seconds: 60
max-checkpoints-in-batch: 200
max-transactions-in-batch: 1000
use-range-deletion: true
Expand Down Expand Up @@ -418,7 +423,8 @@ validator_configs:
authority-store-pruning-config:
num-latest-epoch-dbs-to-retain: 3
epoch-db-pruning-period-secs: 3600
num-epochs-to-retain: 1
num-epochs-to-retain: 0
pruning-run-delay-seconds: 60
max-checkpoints-in-batch: 200
max-transactions-in-batch: 1000
use-range-deletion: true
Expand Down Expand Up @@ -489,7 +495,8 @@ validator_configs:
authority-store-pruning-config:
num-latest-epoch-dbs-to-retain: 3
epoch-db-pruning-period-secs: 3600
num-epochs-to-retain: 1
num-epochs-to-retain: 0
pruning-run-delay-seconds: 60
max-checkpoints-in-batch: 200
max-transactions-in-batch: 1000
use-range-deletion: true
Expand Down
1 change: 1 addition & 0 deletions crates/sui-core/src/authority.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1643,6 +1643,7 @@ impl AuthorityState {
store.objects_lock_table.clone(),
pruning_config,
epoch_store.epoch_start_state().epoch_duration_ms(),
prometheus_registry,
);
let state = Arc::new(AuthorityState {
name,
Expand Down
59 changes: 56 additions & 3 deletions crates/sui-core/src/authority/authority_store_pruner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@
use crate::authority::authority_store_types::{ObjectContentDigest, StoreData, StoreObject};
use crate::checkpoints::CheckpointStore;
use mysten_metrics::monitored_scope;
use prometheus::{
register_int_counter_with_registry, register_int_gauge_with_registry, IntCounter, IntGauge,
Registry,
};
use std::cmp::{max, min};
use std::collections::HashMap;
use std::{sync::Arc, time::Duration};
Expand All @@ -26,6 +30,31 @@ pub struct AuthorityStorePruner {
_objects_pruner_cancel_handle: oneshot::Sender<()>,
}

pub struct AuthorityStorePruningMetrics {
pub last_pruned_checkpoint: IntGauge,
pub num_pruned_objects: IntCounter,
}

impl AuthorityStorePruningMetrics {
pub fn new(registry: &Registry) -> Arc<Self> {
let this = Self {
last_pruned_checkpoint: register_int_gauge_with_registry!(
"last_pruned_checkpoint",
"Last pruned checkpoint",
registry
)
.unwrap(),
num_pruned_objects: register_int_counter_with_registry!(
"num_pruned_objects",
"Number of pruned objects",
registry
)
.unwrap(),
};
Arc::new(this)
}
}

#[derive(Debug, Clone, Copy)]
enum DeletionMethod {
RangeDelete,
Expand All @@ -40,6 +69,7 @@ impl AuthorityStorePruner {
objects_lock_table: &Arc<RwLockTable<ObjectContentDigest>>,
checkpoint_number: CheckpointSequenceNumber,
deletion_method: DeletionMethod,
metrics: Arc<AuthorityStorePruningMetrics>,
) -> anyhow::Result<()> {
let _scope = monitored_scope("ObjectsLivePruner");
let mut wb = perpetual_db.objects.batch();
Expand All @@ -50,6 +80,9 @@ impl AuthorityStorePruner {
object_keys_to_prune.push(ObjectKey(*object_id, *seq_number));
}
}
metrics
.num_pruned_objects
.inc_by(object_keys_to_prune.len() as u64);
let mut indirect_objects: HashMap<_, i64> = HashMap::new();
for object in perpetual_db
.objects
Expand Down Expand Up @@ -94,6 +127,7 @@ impl AuthorityStorePruner {
wb.partial_merge_batch(&perpetual_db.indirect_move_objects, ref_count_update)?;
}
perpetual_db.set_highest_pruned_checkpoint(&mut wb, checkpoint_number)?;
metrics.last_pruned_checkpoint.set(checkpoint_number as i64);

let _locks = objects_lock_table
.acquire_locks(indirect_objects.into_keys())
Expand All @@ -108,6 +142,7 @@ impl AuthorityStorePruner {
checkpoint_store: &Arc<CheckpointStore>,
objects_lock_table: &Arc<RwLockTable<ObjectContentDigest>>,
config: AuthorityStorePruningConfig,
metrics: Arc<AuthorityStorePruningMetrics>,
) -> anyhow::Result<()> {
let deletion_method = if config.use_range_deletion {
DeletionMethod::RangeDelete
Expand Down Expand Up @@ -171,6 +206,7 @@ impl AuthorityStorePruner {
objects_lock_table,
checkpoint_number,
deletion_method,
metrics.clone(),
)
.await?;
batch_effects = vec![];
Expand All @@ -184,6 +220,7 @@ impl AuthorityStorePruner {
objects_lock_table,
checkpoint_number,
deletion_method,
metrics.clone(),
)
.await?;
}
Expand All @@ -200,6 +237,7 @@ impl AuthorityStorePruner {
perpetual_db: Arc<AuthorityPerpetualTables>,
checkpoint_store: Arc<CheckpointStore>,
objects_lock_table: Arc<RwLockTable<ObjectContentDigest>>,
metrics: Arc<AuthorityStorePruningMetrics>,
) -> Sender<()> {
let (sender, mut recv) = tokio::sync::oneshot::channel();
debug!(
Expand All @@ -209,7 +247,7 @@ impl AuthorityStorePruner {
let tick_duration = if config.num_epochs_to_retain > 0 {
Duration::from_millis(epoch_duration_ms / 2)
} else {
Duration::from_secs(1)
Duration::from_secs(config.pruning_run_delay_seconds.min(60))
};

let pruning_initial_delay = min(tick_duration, Duration::from_secs(300));
Expand All @@ -220,7 +258,7 @@ impl AuthorityStorePruner {
loop {
tokio::select! {
_ = prune_interval.tick(), if config.num_epochs_to_retain != u64::MAX => {
if let Err(err) = Self::prune_objects_for_eligible_epochs(&perpetual_db, &checkpoint_store, &objects_lock_table, config).await {
if let Err(err) = Self::prune_objects_for_eligible_epochs(&perpetual_db, &checkpoint_store, &objects_lock_table, config, metrics.clone()).await {
error!("Failed to prune objects: {:?}", err);
}
},
Expand All @@ -236,6 +274,7 @@ impl AuthorityStorePruner {
objects_lock_table: Arc<RwLockTable<ObjectContentDigest>>,
pruning_config: AuthorityStorePruningConfig,
epoch_duration_ms: u64,
registry: &Registry,
) -> Self {
AuthorityStorePruner {
_objects_pruner_cancel_handle: Self::setup_objects_pruning(
Expand All @@ -244,6 +283,7 @@ impl AuthorityStorePruner {
perpetual_db,
checkpoint_store,
objects_lock_table,
AuthorityStorePruningMetrics::new(registry),
),
}
}
Expand All @@ -258,14 +298,15 @@ mod tests {
use std::{collections::HashSet, sync::Arc};
use tracing::log::{error, info};

use crate::authority::authority_store_pruner::DeletionMethod;
use crate::authority::authority_store_pruner::{AuthorityStorePruningMetrics, DeletionMethod};
use crate::authority::authority_store_tables::AuthorityPerpetualTables;
use crate::authority::authority_store_types::{
get_store_object_pair, ObjectContentDigest, StoreData, StoreObject, StoreObjectPair,
StoreObjectWrapper,
};
#[cfg(not(target_env = "msvc"))]
use pprof::Symbol;
use prometheus::Registry;
use sui_storage::mutex_table::RwLockTable;
use sui_types::base_types::{ObjectDigest, VersionNumber};
use sui_types::messages::{TransactionEffects, TransactionEffectsAPI};
Expand Down Expand Up @@ -415,6 +456,8 @@ mod tests {
total_unique_object_ids: u32,
deletion_method: DeletionMethod,
) -> Vec<ObjectKey> {
let registry = Registry::default();
let metrics = AuthorityStorePruningMetrics::new(&registry);
let to_keep = {
let db = Arc::new(AuthorityPerpetualTables::open(path, None));
let (to_keep, to_delete) = generate_test_data(
Expand All @@ -433,6 +476,7 @@ mod tests {
&lock_table(),
0,
deletion_method,
metrics,
)
.await
.unwrap();
Expand Down Expand Up @@ -514,12 +558,15 @@ mod tests {

let mut effects = TransactionEffects::default();
*effects.modified_at_versions_mut_for_testing() = to_delete;
let registry = Registry::default();
let metrics = AuthorityStorePruningMetrics::new(&registry);
let total_pruned = AuthorityStorePruner::prune_effects(
vec![effects],
&perpetual_db,
&lock_table(),
0,
DeletionMethod::RangeDelete,
metrics,
)
.await;
info!("Total pruned keys = {:?}", total_pruned);
Expand All @@ -545,6 +592,8 @@ mod tests {
// then does a bunch of get(). We open the db with `ignore_range_delete` set to true (default mode).
// We then record a cpu profile of the `get()` calls and do not find any range fragmentation stack frame
// in it.
let registry = Registry::default();
let metrics = AuthorityStorePruningMetrics::new(&registry);
let primary_path = tempfile::tempdir()?.into_path();
let perpetual_db = Arc::new(AuthorityPerpetualTables::open(&primary_path, None));
let effects = insert_keys(&perpetual_db.objects)?;
Expand All @@ -554,6 +603,7 @@ mod tests {
&lock_table(),
0,
DeletionMethod::RangeDelete,
metrics,
)
.await?;
let guard = pprof::ProfilerGuardBuilder::default()
Expand All @@ -580,12 +630,15 @@ mod tests {
let primary_path = tempfile::tempdir()?.into_path();
let perpetual_db = Arc::new(AuthorityPerpetualTables::open(&primary_path, None));
let effects = insert_keys(&perpetual_db.objects)?;
let registry = Registry::default();
let metrics = AuthorityStorePruningMetrics::new(&registry);
AuthorityStorePruner::prune_effects(
vec![effects],
&perpetual_db,
&lock_table(),
0,
DeletionMethod::RangeDelete,
metrics,
)
.await?;
if let Ok(()) = perpetual_db.objects.flush() {
Expand Down

0 comments on commit 2ec5898

Please sign in to comment.