Skip to content

Commit

Permalink
[State Sync] Add simple metrics to the state sync driver.
Browse files Browse the repository at this point in the history
  • Loading branch information
JoshLind authored and aptos-bot committed Apr 18, 2022
1 parent 7d285e6 commit 9f8d6e1
Show file tree
Hide file tree
Showing 7 changed files with 244 additions and 14 deletions.
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions state-sync/state-sync-v2/state-sync-driver/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ edition = "2018"

[dependencies]
futures = "0.3.12"
once_cell = "1.7.2"
serde = { version = "1.0.124", default-features = false }
thiserror = "1.0.24"
tokio = { version = "1.8.1", features = ["full"] }
Expand All @@ -23,6 +24,7 @@ aptos-crypto = { path = "../../../crates/aptos-crypto" }
aptos-data-client = { path = "../../aptos-data-client" }
aptos-infallible = { path = "../../../crates/aptos-infallible" }
aptos-logger = { path = "../../../crates/aptos-logger" }
aptos-metrics = { path = "../../../crates/aptos-metrics" }
aptos-types = { path = "../../../types" }
aptos-workspace-hack = { version = "0.1", path = "../../../crates/aptos-workspace-hack" }
event-notifications = { path = "../../inter-component/event-notifications" }
Expand Down
15 changes: 15 additions & 0 deletions state-sync/state-sync-v2/state-sync-driver/src/driver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use crate::{
driver_client::{ClientNotificationListener, DriverNotification},
error::Error,
logging::{LogEntry, LogSchema},
metrics,
notification_handlers::{
CommitNotification, CommitNotificationListener, CommittedAccounts, CommittedTransactions,
ConsensusNotificationHandler, ErrorNotification, ErrorNotificationListener,
Expand Down Expand Up @@ -246,6 +247,10 @@ impl<
consensus_commit_notification.reconfiguration_events.len()
))
);
metrics::increment_counter(
&metrics::DRIVER_COUNTERS,
metrics::DRIVER_CONSENSUS_COMMIT_NOTIFICATION,
);

// TODO(joshlind): can we get consensus to forward the events?

Expand Down Expand Up @@ -285,6 +290,10 @@ impl<
sync_notification.target, latest_synced_version,
))
);
metrics::increment_counter(
&metrics::DRIVER_COUNTERS,
metrics::DRIVER_CONSENSUS_SYNC_NOTIFICATION,
);

// Initialize a new sync request
let latest_synced_ledger_info =
Expand All @@ -298,6 +307,10 @@ impl<
fn handle_client_notification(&mut self, notification: DriverNotification) {
debug!(LogSchema::new(LogEntry::ClientNotification)
.message("Received a notify bootstrap notification from the client!"));
metrics::increment_counter(
&metrics::DRIVER_COUNTERS,
metrics::DRIVER_CLIENT_NOTIFICATION,
);

// TODO(joshlind): refactor this if the client only supports one notification type!
// Extract the bootstrap notifier channel
Expand Down Expand Up @@ -557,11 +570,13 @@ impl<
error!(LogSchema::new(LogEntry::Driver)
.error(&error)
.message("Error found when driving progress of the continuous syncer!"));
metrics::increment_counter(&metrics::CONTINUOUS_SYNCER_ERRORS, error.get_label());
}
} else if let Err(error) = self.bootstrapper.drive_progress(&global_data_summary).await {
error!(LogSchema::new(LogEntry::Driver)
.error(&error)
.message("Error found when checking the bootstrapper progress!"));
metrics::increment_counter(&metrics::BOOTSTRAPPER_ERRORS, error.get_label());
};
}
}
25 changes: 25 additions & 0 deletions state-sync/state-sync-v2/state-sync-driver/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,31 @@ pub enum Error {
UnexpectedError(String),
}

impl Error {
/// Returns a summary label for the error
pub fn get_label(&self) -> &'static str {
match self {
Error::AlreadyBootstrapped(_) => "already_boostrapped",
Error::AdvertisedDataError(_) => "advertised_data_error",
Error::BootstrapNotComplete(_) => "bootstrap_not_complete",
Error::CallbackSendFailed(_) => "callback_send_failed",
Error::CriticalDataStreamTimeout(_) => "critical_data_stream_timeout",
Error::DataStreamNotificationTimeout(_) => "data_stream_notification_timeout",
Error::EventNotificationError(_) => "event_notification_error",
Error::FullNodeConsensusNotification(_) => "full_node_consensus_notification",
Error::IntegerOverflow(_) => "integer_overflow",
Error::InvalidPayload(_) => "invalid_payload",
Error::NotifyMempoolError(_) => "notify_mempool_error",
Error::OldSyncRequest(_, _) => "old_sync_request",
Error::SenderDroppedError(_) => "sender_dropped_error",
Error::StorageError(_) => "storage_error",
Error::SyncedBeyondTarget(_, _) => "synced_beyond_target",
Error::VerificationError(_) => "verification_error",
Error::UnexpectedError(_) => "unexpected_error",
}
}
}

impl From<Canceled> for Error {
fn from(canceled: Canceled) -> Self {
Error::SenderDroppedError(canceled.to_string())
Expand Down
1 change: 1 addition & 0 deletions state-sync/state-sync-v2/state-sync-driver/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ mod driver_client;
pub mod driver_factory;
mod error;
mod logging;
mod metrics;
mod notification_handlers;
mod storage_synchronizer;
mod utils;
Expand Down
114 changes: 114 additions & 0 deletions state-sync/state-sync-v2/state-sync-driver/src/metrics.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
// Copyright (c) Aptos
// SPDX-License-Identifier: Apache-2.0

use aptos_metrics::{register_int_counter_vec, register_int_gauge_vec, IntCounterVec, IntGaugeVec};
use once_cell::sync::Lazy;

/// Useful metric labels
pub const DRIVER_CLIENT_NOTIFICATION: &str = "driver_client_notification";
pub const DRIVER_CONSENSUS_COMMIT_NOTIFICATION: &str = "driver_consensus_commit_notification";
pub const DRIVER_CONSENSUS_SYNC_NOTIFICATION: &str = "driver_consensus_sync_notification";
pub const STORAGE_SYNCHRONIZER_PENDING_DATA: &str = "storage_synchronizer_pending_data";

/// An enum of storage synchronizer operations performed by state sync
pub enum StorageSynchronizerOperations {
AppliedTransactionOutputs, // Applied a chunk of transactions outputs.
ExecutedTransactions, // Executed a chunk of transactions.
SyncedAccounts, // Wrote a chunk of accounts to storage.
SyncedTransactions, // Wrote a chunk of transactions and outputs to storage.
}

impl StorageSynchronizerOperations {
pub fn get_label(&self) -> &'static str {
match self {
StorageSynchronizerOperations::AppliedTransactionOutputs => {
"applied_transaction_outputs"
}
StorageSynchronizerOperations::ExecutedTransactions => "executed_transactions",
StorageSynchronizerOperations::SyncedAccounts => "synced_accounts",
StorageSynchronizerOperations::SyncedTransactions => "synced_transactions",
}
}
}

/// Counter for state sync bootstrapper errors
pub static BOOTSTRAPPER_ERRORS: Lazy<IntCounterVec> = Lazy::new(|| {
register_int_counter_vec!(
"aptos_state_sync_bootstrapper_errors",
"Counters related to state sync bootstrapper errors",
&["error_label"]
)
.unwrap()
});

/// Counter for state sync continuous syncer errors
pub static CONTINUOUS_SYNCER_ERRORS: Lazy<IntCounterVec> = Lazy::new(|| {
register_int_counter_vec!(
"aptos_state_sync_continuous_syncer_errors",
"Counters related to state sync continuous syncer errors",
&["error_label"]
)
.unwrap()
});

/// Counters related to the state sync driver
pub static DRIVER_COUNTERS: Lazy<IntCounterVec> = Lazy::new(|| {
register_int_counter_vec!(
"aptos_state_sync_driver_counters",
"Counters related to the state sync driver",
&["label"]
)
.unwrap()
});

/// Counter for storage synchronizer errors
pub static STORAGE_SYNCHRONIZER_ERRORS: Lazy<IntCounterVec> = Lazy::new(|| {
register_int_counter_vec!(
"aptos_state_sync_storage_synchronizer_errors",
"Counters related to storage synchronizer errors",
&["error_label"]
)
.unwrap()
});

/// Gauges related to the storage synchronizer
pub static STORAGE_SYNCHRONIZER_GAUGES: Lazy<IntGaugeVec> = Lazy::new(|| {
register_int_gauge_vec!(
"aptos_state_sync_storage_synchronizer_gauges",
"Gauges related to the storage synchronizer",
&["label"]
)
.unwrap()
});

/// Gauges for the storage synchronizer operations.
/// Note: we keep this named "aptos_state_sync_version" to maintain backward
/// compatibility with the metrics used by state sync v1.
pub static STORAGE_SYNCHRONIZER_OPERATIONS: Lazy<IntGaugeVec> = Lazy::new(|| {
register_int_gauge_vec!(
"aptos_state_sync_version",
"The versions processed by the storage synchronizer operations",
&["storage_synchronizer_operation"]
)
.unwrap()
});

/// Increments the given counter with the provided label values.
pub fn increment_counter(counter: &Lazy<IntCounterVec>, label: &str) {
counter.with_label_values(&[label]).inc();
}

/// Increments the gauge with the specific label by the given delta
pub fn increment_gauge(gauge: &Lazy<IntGaugeVec>, label: &str, delta: u64) {
gauge.with_label_values(&[label]).add(delta as i64);
}

/// Decrements the gauge with the specific label by the given delta
pub fn decrement_gauge(gauge: &Lazy<IntGaugeVec>, label: &str, delta: u64) {
gauge.with_label_values(&[label]).sub(delta as i64);
}

/// Sets the gauge with the specific label to the given value
pub fn set_gauge(gauge: &Lazy<IntGaugeVec>, label: &str, value: u64) {
gauge.with_label_values(&[label]).set(value as i64);
}
Loading

0 comments on commit 9f8d6e1

Please sign in to comment.