Skip to content

Commit

Permalink
[Forge] Add working_dir param to support running node on checkpoint d…
Browse files Browse the repository at this point in the history
…ir, so that the existing data on disk is preserved. (aptos-labs#4591)
  • Loading branch information
grao1991 authored Dec 7, 2022
1 parent 1118252 commit 1d45ed0
Show file tree
Hide file tree
Showing 9 changed files with 104 additions and 15 deletions.
52 changes: 48 additions & 4 deletions aptos-node/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@ mod log_build_information;
use anyhow::{anyhow, Context};
use aptos_api::bootstrap as bootstrap_api;
use aptos_build_info::build_information;
use aptos_config::config::StateSyncConfig;
use aptos_config::{
config::{
AptosDataClientConfig, BaseConfig, NetworkConfig, NodeConfig, PersistableConfig,
StorageServiceConfig,
RocksdbConfigs, StateSyncConfig, StorageServiceConfig, BUFFERED_STATE_TARGET_ITEMS,
DEFAULT_MAX_NUM_NODES_PER_LRU_CACHE_SHARD, NO_OP_STORAGE_PRUNER_CONFIG,
},
network_id::NetworkId,
utils::get_genesis_txn,
Expand All @@ -26,6 +26,7 @@ use aptos_types::{
account_config::CORE_CODE_ADDRESS, account_view::AccountView, chain_id::ChainId,
on_chain_config::ON_CHAIN_CONFIG_REGISTRY, waypoint::Waypoint,
};

use aptos_vm::AptosVM;
use aptosdb::AptosDB;
use backup_service::start_backup_service;
Expand Down Expand Up @@ -53,8 +54,9 @@ use state_sync_driver::{
use std::{
boxed::Box,
collections::{HashMap, HashSet},
fs,
io::Write,
path::PathBuf,
path::{Path, PathBuf},
sync::{
atomic::{AtomicBool, AtomicUsize, Ordering},
Arc,
Expand Down Expand Up @@ -569,8 +571,43 @@ fn bootstrap_indexer(
Ok(None)
}

fn create_checkpoint_and_change_working_dir(
node_config: &mut NodeConfig,
working_dir: impl AsRef<Path>,
) {
let source_dir = node_config.storage.dir();
node_config.set_data_dir(working_dir.as_ref().to_path_buf());
let checkpoint_dir = node_config.storage.dir();

assert!(source_dir != checkpoint_dir);

// Create rocksdb checkpoint.
fs::create_dir_all(&checkpoint_dir).unwrap();

AptosDB::open(
&source_dir,
false, /* readonly */
NO_OP_STORAGE_PRUNER_CONFIG, /* pruner */
RocksdbConfigs::default(),
false,
BUFFERED_STATE_TARGET_ITEMS,
DEFAULT_MAX_NUM_NODES_PER_LRU_CACHE_SHARD,
)
.expect("AptosDB open failure.")
.create_checkpoint(&checkpoint_dir)
.expect("AptosDB checkpoint creation failed.");

consensus::create_checkpoint(&source_dir, &checkpoint_dir)
.expect("ConsensusDB checkpoint creation failed.");
let state_sync_db =
state_sync_driver::metadata_storage::PersistentMetadataStorage::new(&source_dir);
state_sync_db
.create_checkpoint(&checkpoint_dir)
.expect("StateSyncDB checkpoint creation failed.");
}

pub fn setup_environment(
node_config: NodeConfig,
mut node_config: NodeConfig,
remote_log_rx: Option<mpsc::Receiver<TelemetryLog>>,
logger_filter_update_job: Option<LoggerFilterUpdater>,
) -> anyhow::Result<AptosHandle> {
Expand All @@ -580,6 +617,13 @@ pub fn setup_environment(
inspection_service::inspection_service::start_inspection_service(node_config_clone)
});

// If working_dir is provided, we will make RocksDb checkpoint for consensus_db,
// state_sync_db, ledger_db and state_merkle_db to the checkpoint_path, and running the node
// on the new path, so that the existing data won't change. For now this is a testonly feature.
if let Some(working_dir) = node_config.base.working_dir.clone() {
create_checkpoint_and_change_working_dir(&mut node_config, working_dir);
}

// Open the database
let mut instant = Instant::now();
let (aptos_db, db_rw) = DbReaderWriter::wrap(
Expand Down
9 changes: 9 additions & 0 deletions config/src/config/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ pub struct NodeConfig {
#[serde(default, deny_unknown_fields)]
pub struct BaseConfig {
pub data_dir: PathBuf,
pub working_dir: Option<PathBuf>,
pub role: RoleType,
pub waypoint: WaypointConfig,
}
Expand All @@ -109,6 +110,7 @@ impl Default for BaseConfig {
fn default() -> BaseConfig {
BaseConfig {
data_dir: PathBuf::from("/opt/aptos/data"),
working_dir: None,
role: RoleType::Validator,
waypoint: WaypointConfig::None,
}
Expand Down Expand Up @@ -266,6 +268,13 @@ impl NodeConfig {
&self.base.data_dir
}

pub fn working_dir(&self) -> &Path {
match &self.base.working_dir {
Some(working_dir) => working_dir,
None => &self.base.data_dir,
}
}

pub fn set_data_dir(&mut self, data_dir: PathBuf) {
self.base.data_dir = data_dir.clone();
self.consensus.set_data_dir(data_dir.clone());
Expand Down
16 changes: 16 additions & 0 deletions consensus/src/consensusdb/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,22 @@ use std::{collections::HashMap, iter::Iterator, path::Path, time::Instant};
/// The name of the consensus db file
pub const CONSENSUS_DB_NAME: &str = "consensus_db";

/// Creates new physical DB checkpoint in directory specified by `checkpoint_path`.
pub fn create_checkpoint<P: AsRef<Path> + Clone>(db_path: P, checkpoint_path: P) -> Result<()> {
let start = Instant::now();
let consensus_db_checkpoint_path = checkpoint_path.as_ref().join(CONSENSUS_DB_NAME);
std::fs::remove_dir_all(&consensus_db_checkpoint_path).unwrap_or(());
ConsensusDB::new(db_path)
.db
.create_checkpoint(&consensus_db_checkpoint_path)?;
info!(
path = consensus_db_checkpoint_path,
time_ms = %start.elapsed().as_millis(),
"Made ConsensusDB checkpoint."
);
Ok(())
}

pub struct ConsensusDB {
db: DB,
}
Expand Down
2 changes: 2 additions & 0 deletions consensus/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ pub mod network_interface;
/// Required by the smoke tests
pub use consensusdb::CONSENSUS_DB_NAME;

pub use consensusdb::create_checkpoint;

#[cfg(feature = "fuzzing")]
pub use round_manager::round_manager_fuzzing;

Expand Down
2 changes: 1 addition & 1 deletion docker/compose/aptos-node/validator.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ consensus:
type: "local"
backend:
type: "on_disk_storage"
path: /opt/aptos/data/secure-data.json
path: secure-data.json
namespace: ~
initial_safety_rules_config:
from_file:
Expand Down
14 changes: 14 additions & 0 deletions state-sync/state-sync-v2/state-sync-driver/src/metadata_storage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,20 @@ impl PersistentMetadataStorage {
))
})
}

/// Creates new physical DB checkpoint in directory specified by `path`.
pub fn create_checkpoint<P: AsRef<Path>>(&self, path: P) -> Result<()> {
let start = Instant::now();
let state_sync_db_path = path.as_ref().join(STATE_SYNC_DB_NAME);
std::fs::remove_dir_all(&state_sync_db_path).unwrap_or(());
self.database.create_checkpoint(&state_sync_db_path)?;
info!(
path = state_sync_db_path,
time_ms = %start.elapsed().as_millis(),
"Made StateSyncDB checkpoint."
);
Ok(())
}
}

impl MetadataStorageInterface for PersistentMetadataStorage {
Expand Down
20 changes: 12 additions & 8 deletions storage/aptosdb/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,6 @@ use aptos_crypto::hash::HashValue;
use aptos_infallible::Mutex;
use aptos_logger::prelude::*;
use aptos_rocksdb_options::gen_rocksdb_options;
use aptos_types::proof::TransactionAccumulatorSummary;
use aptos_types::state_store::state_storage_usage::StateStorageUsage;
use aptos_types::{
account_address::AccountAddress,
account_config::{new_block_event_key, NewBlockEvent},
Expand All @@ -77,12 +75,13 @@ use aptos_types::{
ledger_info::LedgerInfoWithSignatures,
proof::{
accumulator::InMemoryAccumulator, AccumulatorConsistencyProof, SparseMerkleProofExt,
TransactionInfoListWithProof,
TransactionAccumulatorSummary, TransactionInfoListWithProof,
},
state_proof::StateProof,
state_store::{
state_key::StateKey,
state_key_prefix::StateKeyPrefix,
state_storage_usage::StateStorageUsage,
state_value::{StateValue, StateValueChunkWithProof},
table::{TableHandle, TableInfo},
},
Expand All @@ -108,12 +107,15 @@ use std::{
time::{Duration, Instant},
};

use crate::pruner::{
ledger_pruner_manager::LedgerPrunerManager, ledger_store::ledger_store_pruner::LedgerPruner,
state_pruner_manager::StatePrunerManager, state_store::StateMerklePruner,
use crate::{
pruner::{
ledger_pruner_manager::LedgerPrunerManager,
ledger_store::ledger_store_pruner::LedgerPruner, state_pruner_manager::StatePrunerManager,
state_store::StateMerklePruner,
},
stale_node_index::StaleNodeIndexSchema,
stale_node_index_cross_epoch::StaleNodeIndexCrossEpochSchema,
};
use crate::stale_node_index::StaleNodeIndexSchema;
use crate::stale_node_index_cross_epoch::StaleNodeIndexCrossEpochSchema;
use storage_interface::{
state_delta::StateDelta, state_view::DbStateView, DbReader, DbWriter, ExecutedTrees, Order,
StateSnapshotReceiver, MAX_REQUEST_LIMIT,
Expand Down Expand Up @@ -655,6 +657,8 @@ impl AptosDB {
let start = Instant::now();
let ledger_db_path = path.as_ref().join(LEDGER_DB_NAME);
let state_merkle_db_path = path.as_ref().join(STATE_MERKLE_DB_NAME);
std::fs::remove_dir_all(&ledger_db_path).unwrap_or(());
std::fs::remove_dir_all(&state_merkle_db_path).unwrap_or(());
self.ledger_db.create_checkpoint(&ledger_db_path)?;
self.state_merkle_db
.create_checkpoint(&state_merkle_db_path)?;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ consensus:
type: "local"
backend:
type: "on_disk_storage"
path: /opt/aptos/data/secure-data.json
path: secure-data.json
namespace: ~
initial_safety_rules_config:
from_file:
Expand Down
2 changes: 1 addition & 1 deletion testsuite/forge/src/backend/local/node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,7 @@ impl Node for LocalNode {
let node_config = self.config();
let ledger_db_path = node_config.storage.dir().join(LEDGER_DB_NAME);
let state_db_path = node_config.storage.dir().join(STATE_MERKLE_DB_NAME);
let secure_storage_path = node_config.base.data_dir.join("secure_storage.json");
let secure_storage_path = node_config.working_dir().join("secure_storage.json");
let state_sync_db_path = node_config.storage.dir().join(STATE_SYNC_DB_NAME);

debug!(
Expand Down

0 comments on commit 1d45ed0

Please sign in to comment.