Skip to content

Commit

Permalink
[dev.net] Initial metrics for validator + rpc-server/gateway (MystenL…
Browse files Browse the repository at this point in the history
…abs#1455) (MystenLabs#1910)

Adds port at port 9184
  • Loading branch information
Evan Chan authored May 12, 2022
1 parent 9f81106 commit 7341474
Show file tree
Hide file tree
Showing 14 changed files with 464 additions and 18 deletions.
64 changes: 64 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 9 additions & 0 deletions doc/src/contribute/observability.md
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,13 @@ The `RUST_LOG` environment variable can be used to set both the overall logging
individual components, and even filtering down to specific spans or tags within spans are possible too.
For more details, please see the [EnvFilter](https://docs.rs/tracing-subscriber/latest/tracing_subscriber/filter/struct.EnvFilter.html) docs.

## Metrics

Sui includes Prometheus-based metrics:
* `rpc_requests_by_route` and related for RPC Server API metrics and latencies (see `rpc-server.rs`)
* Gateway transaction metrics (see `GatewayMetrics` struct in `gateway-state.rs`)
* Validator transaction metrics (see `AuthorityMetrics` in `authority.rs`)

## Viewing logs, traces, metrics

The tracing architecture is based on the idea of [subscribers](https://github.com/tokio-rs/tracing#project-layout) which
Expand Down Expand Up @@ -149,6 +156,8 @@ In the graph above, there are multiple subscribers, JSON logs can be for example
The use of a log and metrics aggregator such as Vector allows for easy reconfiguration without interrupting the validator server,
as well as offloading observability traffic.

Metrics: served with a Prometheus scrape endpoint, by default at `<host>:9184/metrics`.

### Stdout (default)

By default, logs (but not spans) are formatted for human readability and output to stdout, with key-value tags at the end of every line.
Expand Down
2 changes: 2 additions & 0 deletions sui/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ serde-name = "0.2.0"
dirs = "4.0.0"
clap = { version = "3.1.17", features = ["derive"] }
telemetry-subscribers = { git = "https://github.com/MystenLabs/mysten-infra", rev = "3b7daedf91fd8937dde26e905b8114cac459b866" }
prometheus_exporter = "0.8.4"
multiaddr = "0.14.0"
mysten-network = { git = "https://github.com/MystenLabs/mysten-infra", rev = "3b7daedf91fd8937dde26e905b8114cac459b866" }

Expand Down Expand Up @@ -66,6 +67,7 @@ once_cell = "1.10.0"

jsonrpsee = { version = "0.12.0", features = ["full"] }
jsonrpsee-proc-macros = "0.12.0"
jsonrpsee-core = "0.12.0"
schemars = "0.8.8"
shell-words = "1.1.0"

Expand Down
71 changes: 68 additions & 3 deletions sui/src/bin/rpc-server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,15 @@ use jsonrpsee::{
http_server::{AccessControlBuilder, HttpServerBuilder},
RpcModule,
};
use jsonrpsee_core::middleware::Middleware;
use prometheus_exporter::prometheus::{
register_histogram_vec, register_int_counter_vec, HistogramVec, IntCounterVec,
};
use std::{
env,
net::{IpAddr, Ipv4Addr, SocketAddr},
path::PathBuf,
time::Instant,
};
use sui::{
api::{RpcGatewayOpenRpc, RpcGatewayServer},
Expand All @@ -20,6 +25,7 @@ use tracing::info;

const DEFAULT_RPC_SERVER_PORT: &str = "5001";
const DEFAULT_RPC_SERVER_ADDR_IPV4: &str = "127.0.0.1";
const PROM_PORT_ADDR: &str = "127.0.0.1:9184";

#[cfg(test)]
#[path = "../unit_tests/rpc_server_tests.rs"]
Expand Down Expand Up @@ -56,7 +62,7 @@ async fn main() -> anyhow::Result<()> {
let config_path = options
.config
.unwrap_or(sui_config_dir()?.join("gateway.conf"));
info!("Gateway config file path: {:?}", config_path);
info!(?config_path, "Gateway config file path");

let server_builder = HttpServerBuilder::default();
let mut ac_builder = AccessControlBuilder::default();
Expand All @@ -68,10 +74,11 @@ async fn main() -> anyhow::Result<()> {
}

let acl = ac_builder.build();
info!("{:?}", acl);
info!(?acl);

let server = server_builder
.set_access_control(acl)
.set_middleware(JsonRpcMetrics::new())
.build(SocketAddr::new(IpAddr::V4(options.host), options.port))
.await?;

Expand All @@ -87,8 +94,66 @@ async fn main() -> anyhow::Result<()> {

let addr = server.local_addr()?;
let server_handle = server.start(module)?;
info!("Sui RPC Gateway listening on local_addr:{}", addr);
info!(local_addr =? addr, "Sui RPC Gateway listening on local_addr");

let prom_binding = PROM_PORT_ADDR.parse().unwrap();
info!("Starting Prometheus HTTP endpoint at {}", PROM_PORT_ADDR);
prometheus_exporter::start(prom_binding).expect("Failed to start Prometheus exporter");

server_handle.await;
Ok(())
}

#[derive(Clone)]
struct JsonRpcMetrics {
/// Counter of requests, route is a label (ie separate timeseries per route)
requests_by_route: IntCounterVec,
/// Request latency, route is a label
req_latency_by_route: HistogramVec,
/// Failed requests by route
errors_by_route: IntCounterVec,
}

impl JsonRpcMetrics {
pub fn new() -> Self {
Self {
requests_by_route: register_int_counter_vec!(
"rpc_requests_by_route",
"Number of requests by route",
&["route"]
)
.unwrap(),
req_latency_by_route: register_histogram_vec!(
"req_latency_by_route",
"Latency of a request by route",
&["route"]
)
.unwrap(),
errors_by_route: register_int_counter_vec!(
"errors_by_route",
"Number of errors by route",
&["route"]
)
.unwrap(),
}
}
}

impl Middleware for JsonRpcMetrics {
type Instant = Instant;

fn on_request(&self) -> Instant {
Instant::now()
}

fn on_result(&self, name: &str, success: bool, started_at: Instant) {
self.requests_by_route.with_label_values(&[name]).inc();
let req_latency_secs = (Instant::now() - started_at).as_secs_f64();
self.req_latency_by_route
.with_label_values(&[name])
.observe(req_latency_secs);
if !success {
self.errors_by_route.with_label_values(&[name]).inc();
}
}
}
12 changes: 9 additions & 3 deletions sui/src/bin/validator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ use sui_types::{
};
use tracing::{error, info};

const PROM_PORT_ADDR: &str = "127.0.0.1:9184";

#[derive(Parser)]
#[clap(
name = "Sui Validator",
Expand Down Expand Up @@ -95,11 +97,15 @@ async fn main() -> Result<(), anyhow::Error> {
.join(CONSENSUS_DB_NAME)
.join(encode_bytes_hex(&authority.public_key));

info!(
"Initializing authority {:?} listening on {} (public addr: {})",
authority.public_key, listen_address, authority.network_address
info!(authority =? authority.public_key, public_addr =? authority.network_address,
"Initializing authority listening on {}", listen_address
);

// TODO: Switch from prometheus exporter. See https://github.com/MystenLabs/sui/issues/1907
let prom_binding = PROM_PORT_ADDR.parse().unwrap();
info!("Starting Prometheus HTTP endpoint at {}", PROM_PORT_ADDR);
prometheus_exporter::start(prom_binding).expect("Failed to start Prometheus exporter");

// Pass in the newtwork parameters of all authorities
let net = network_config.get_authority_infos();
if let Err(e) = make_server(
Expand Down
2 changes: 2 additions & 0 deletions sui_core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ fdlimit = "0.2.1"
schemars = "0.8.8"
multiaddr = "0.14.0"
mysten-network = { git = "https://github.com/MystenLabs/mysten-infra", rev = "3b7daedf91fd8937dde26e905b8114cac459b866" }
prometheus_exporter = "0.8.4"
once_cell = "1.10.0"

sui-adapter = { path = "../sui_programmability/adapter" }
sui-framework = { path = "../sui_programmability/framework" }
Expand Down
Loading

0 comments on commit 7341474

Please sign in to comment.