Skip to content

Commit

Permalink
(LedgerStore) Rate-limit RocksDB perf sample by a minimum time interv…
Browse files Browse the repository at this point in the history
…al (solana-labs#25100)

#### Problem
The current RocksDB read/write perf metrics do not include the total operation nanos
and thus we have to include all fields that might contribute to the total operation nanos.

#### Summary of Changes
This PR includes the total operation nanos in RocksDB's read/write perf and reduces the
number of reported fields in its perf metric.
  • Loading branch information
yhchiang-sol authored May 21, 2022
1 parent d121a06 commit d3dc2db
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 51 deletions.
73 changes: 50 additions & 23 deletions ledger/src/blockstore_db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -542,17 +542,20 @@ impl Rocks {
}

fn write(&self, batch: RWriteBatch) -> Result<()> {
let is_perf_enabled = maybe_enable_rocksdb_perf(
let op_start_instant = maybe_enable_rocksdb_perf(
self.column_options.rocks_perf_sample_interval,
&self.write_batch_perf_status,
);
let result = self.db.write(batch);
if is_perf_enabled {
report_rocksdb_write_perf(rocksdb_metric_header!(
"blockstore_rocksdb_write_perf,op=write_batch",
"write_batch",
self.column_options
));
if let Some(op_start_instant) = op_start_instant {
report_rocksdb_write_perf(
&op_start_instant.elapsed(),
rocksdb_metric_header!(
"blockstore_rocksdb_write_perf,op=write_batch",
"write_batch",
self.column_options
),
);
}
match result {
Ok(_) => Ok(()),
Expand Down Expand Up @@ -1321,8 +1324,11 @@ where
&self.read_perf_status,
);
let result = self.backend.get_cf(self.handle(), &C::key(key));
if is_perf_enabled {
report_rocksdb_read_perf(C::rocksdb_get_perf_metric_header(&self.column_options));
if let Some(op_start_instant) = is_perf_enabled {
report_rocksdb_read_perf(
&op_start_instant.elapsed(),
C::rocksdb_get_perf_metric_header(&self.column_options),
);
}
result
}
Expand Down Expand Up @@ -1399,8 +1405,11 @@ where
&self.write_perf_status,
);
let result = self.backend.put_cf(self.handle(), &C::key(key), value);
if is_perf_enabled {
report_rocksdb_write_perf(C::rocksdb_put_perf_metric_header(&self.column_options));
if let Some(op_start_instant) = is_perf_enabled {
report_rocksdb_write_perf(
&op_start_instant.elapsed(),
C::rocksdb_put_perf_metric_header(&self.column_options),
);
}
result
}
Expand Down Expand Up @@ -1431,8 +1440,11 @@ where
result = Ok(Some(value))
}

if is_perf_enabled {
report_rocksdb_read_perf(C::rocksdb_get_perf_metric_header(&self.column_options));
if let Some(op_start_instant) = is_perf_enabled {
report_rocksdb_read_perf(
&op_start_instant.elapsed(),
C::rocksdb_get_perf_metric_header(&self.column_options),
);
}
result
}
Expand All @@ -1448,8 +1460,11 @@ where
.backend
.put_cf(self.handle(), &C::key(key), &serialized_value);

if is_perf_enabled {
report_rocksdb_write_perf(C::rocksdb_put_perf_metric_header(&self.column_options));
if let Some(op_start_instant) = is_perf_enabled {
report_rocksdb_write_perf(
&op_start_instant.elapsed(),
C::rocksdb_put_perf_metric_header(&self.column_options),
);
}
result
}
Expand All @@ -1460,8 +1475,11 @@ where
&self.write_perf_status,
);
let result = self.backend.delete_cf(self.handle(), &C::key(key));
if is_perf_enabled {
report_rocksdb_write_perf(C::rocksdb_delete_perf_metric_header(&self.column_options));
if let Some(op_start_instant) = is_perf_enabled {
report_rocksdb_write_perf(
&op_start_instant.elapsed(),
C::rocksdb_delete_perf_metric_header(&self.column_options),
);
}
result
}
Expand All @@ -1480,8 +1498,11 @@ where
&self.read_perf_status,
);
let result = self.backend.get_cf(self.handle(), &C::key(key));
if is_perf_enabled {
report_rocksdb_read_perf(C::rocksdb_get_perf_metric_header(&self.column_options));
if let Some(op_start_instant) = is_perf_enabled {
report_rocksdb_read_perf(
&op_start_instant.elapsed(),
C::rocksdb_get_perf_metric_header(&self.column_options),
);
}

if let Some(serialized_value) = result? {
Expand All @@ -1501,8 +1522,11 @@ where
&self.read_perf_status,
);
let result = self.backend.get_cf(self.handle(), &C::key(key));
if is_perf_enabled {
report_rocksdb_read_perf(C::rocksdb_get_perf_metric_header(&self.column_options));
if let Some(op_start_instant) = is_perf_enabled {
report_rocksdb_read_perf(
&op_start_instant.elapsed(),
C::rocksdb_get_perf_metric_header(&self.column_options),
);
}

if let Some(serialized_value) = result? {
Expand All @@ -1521,8 +1545,11 @@ where
&self.write_perf_status,
);
let result = self.backend.put_cf(self.handle(), &C::key(key), &buf);
if is_perf_enabled {
report_rocksdb_write_perf(C::rocksdb_put_perf_metric_header(&self.column_options));
if let Some(op_start_instant) = is_perf_enabled {
report_rocksdb_write_perf(
&op_start_instant.elapsed(),
C::rocksdb_put_perf_metric_header(&self.column_options),
);
}

result
Expand Down
44 changes: 16 additions & 28 deletions ledger/src/blockstore_metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ use {
atomic::{AtomicU64, AtomicUsize, Ordering},
Arc,
},
time::Duration,
time::{Duration, Instant},
},
};

Expand Down Expand Up @@ -251,25 +251,27 @@ const PERF_SAMPLING_MIN_DURATION: Duration = Duration::from_secs(1);
pub(crate) fn maybe_enable_rocksdb_perf(
sample_interval: usize,
perf_status: &PerfSamplingStatus,
) -> bool {
) -> Option<Instant> {
if perf_status.should_sample(sample_interval) {
set_perf_stats(PerfStatsLevel::EnableTime);
PER_THREAD_ROCKS_PERF_CONTEXT.with(|perf_context| {
perf_context.borrow_mut().reset();
});
return true;
return Some(Instant::now());
}
false
None
}

/// Reports the collected PerfContext and disables the PerfContext after
/// reporting.
pub(crate) fn report_rocksdb_read_perf(metric_header: &'static str) {
pub(crate) fn report_rocksdb_read_perf(total_op_duration: &Duration, metric_header: &'static str) {
PER_THREAD_ROCKS_PERF_CONTEXT.with(|perf_context_cell| {
set_perf_stats(PerfStatsLevel::Disable);
let perf_context = perf_context_cell.borrow();
datapoint_info!(
metric_header,
// total nanos spent on the entire operation.
("total_op_nanos", total_op_duration.as_nanos() as i64, i64),
(
"user_key_comparison_count",
perf_context.metric(PerfMetric::UserKeyComparisonCount) as i64,
Expand Down Expand Up @@ -414,42 +416,28 @@ pub(crate) fn report_rocksdb_read_perf(metric_header: &'static str) {
perf_context.metric(PerfMetric::KeyLockWaitCount) as i64,
i64
),
// nanos spent on file/directory operations.
(
"env_file_exists_nanos",
perf_context.metric(PerfMetric::EnvFileExistsNanos) as i64,
i64
),
(
"env_get_children_nanos",
perf_context.metric(PerfMetric::EnvGetChildrenNanos) as i64,
i64
),
(
"env_lock_file_nanos",
perf_context.metric(PerfMetric::EnvLockFileNanos) as i64,
i64
),
(
"env_unlock_file_nanos",
perf_context.metric(PerfMetric::EnvUnlockFileNanos) as i64,
i64
),
(
"total_metric_count",
perf_context.metric(PerfMetric::TotalMetricCount) as i64,
"env_file_ops_nanos",
(perf_context.metric(PerfMetric::EnvFileExistsNanos)
+ perf_context.metric(PerfMetric::EnvGetChildrenNanos)
+ perf_context.metric(PerfMetric::EnvLockFileNanos)
+ perf_context.metric(PerfMetric::EnvUnlockFileNanos)) as i64,
i64
),
);
});
}
/// Reports the collected PerfContext and disables the PerfContext after
/// reporting.
pub(crate) fn report_rocksdb_write_perf(metric_header: &'static str) {
pub(crate) fn report_rocksdb_write_perf(total_op_duration: &Duration, metric_header: &'static str) {
PER_THREAD_ROCKS_PERF_CONTEXT.with(|perf_context_cell| {
set_perf_stats(PerfStatsLevel::Disable);
let perf_context = perf_context_cell.borrow();
datapoint_info!(
metric_header,
// total nanos spent on the entire operation.
("total_op_nanos", total_op_duration.as_nanos() as i64, i64),
// total nanos spent on writing to WAL
(
"write_wal_nanos",
Expand Down

0 comments on commit d3dc2db

Please sign in to comment.