Skip to content

Commit

Permalink
Merge pull request AleoNet#1090 from niklaslong/average-conn-time
Browse files Browse the repository at this point in the history
Introduce the average connection duration metric
  • Loading branch information
ljedrz authored Aug 27, 2021
2 parents 7b10dba + fdda56d commit 3f75504
Show file tree
Hide file tree
Showing 6 changed files with 106 additions and 7 deletions.
84 changes: 82 additions & 2 deletions metrics/grafana/dashboard.json
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@
},
"gridPos": {
"h": 10,
"w": 12,
"w": 6,
"x": 0,
"y": 0
},
Expand Down Expand Up @@ -139,6 +139,86 @@
"title": "states of current connections",
"type": "timeseries"
},
{
"datasource": null,
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"graph": false,
"legend": false,
"tooltip": false
},
"lineInterpolation": "smooth",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "always",
"spanNulls": true
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "s"
},
"overrides": []
},
"gridPos": {
"h": 10,
"w": 6,
"x": 6,
"y": 0
},
"id": 24,
"interval": null,
"options": {
"graph": {},
"legend": {
"calcs": [
"mean"
],
"displayMode": "table",
"placement": "bottom"
},
"tooltipOptions": {
"mode": "single"
}
},
"pluginVersion": "7.5.6",
"targets": [
{
"exemplar": true,
"expr": "rate(snarkos_connections_average_duration_sum[$__rate_interval])/rate(snarkos_connections_average_duration_count[$__rate_interval])",
"interval": "",
"legendFormat": "values (rolling average on short interval)",
"refId": "A"
}
],
"title": "connection duration",
"type": "timeseries"
},
{
"datasource": null,
"fieldConfig": {
Expand Down Expand Up @@ -1191,5 +1271,5 @@
"timezone": "",
"title": "snarkOS node",
"uid": "PAzNcaCGz",
"version": 16
"version": 18
}
1 change: 1 addition & 0 deletions metrics/src/names.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ pub mod connections {
pub const CONNECTING: &str = "snarkos_connections_connecting_total";
pub const CONNECTED: &str = "snarkos_connections_connected_total";
pub const DISCONNECTED: &str = "snarkos_connections_disconnected_total";
pub const DURATION: &str = "snarkos_connections_average_duration";
}

pub mod handshakes {
Expand Down
4 changes: 3 additions & 1 deletion metrics/src/snapshots.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,14 +77,16 @@ pub struct NodeOutboundStats {
pub all_failures: u64,
}

#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct NodeConnectionStats {
/// The number of all connections the node has accepted.
pub all_accepted: u64,
/// The number of all connections the node has initiated.
pub all_initiated: u64,
/// The number of rejected inbound connection requests.
pub all_rejected: u64,
/// The average connection duration (in seconds).
pub average_duration: f64,
/// Number of currently connecting peers.
pub connecting_peers: u32,
/// Number of currently connected peers.
Expand Down
5 changes: 5 additions & 0 deletions metrics/src/stats.rs
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,8 @@ pub struct ConnectionStats {
connected_peers: DiscreteGauge,
/// Number of known disconnected peers.
disconnected_peers: DiscreteGauge,
/// Tracks connection durations (once closed).
duration: CircularHistogram,
}

impl ConnectionStats {
Expand All @@ -198,6 +200,7 @@ impl ConnectionStats {
connecting_peers: DiscreteGauge::new(),
connected_peers: DiscreteGauge::new(),
disconnected_peers: DiscreteGauge::new(),
duration: CircularHistogram::new(),
}
}

Expand All @@ -206,6 +209,7 @@ impl ConnectionStats {
all_accepted: self.all_accepted.read(),
all_initiated: self.all_initiated.read(),
all_rejected: self.all_rejected.read(),
average_duration: self.duration.average(),
connecting_peers: self.connecting_peers.read() as u32,
connected_peers: self.connected_peers.read() as u32,
disconnected_peers: self.disconnected_peers.read() as u32,
Expand Down Expand Up @@ -372,6 +376,7 @@ impl Recorder for Stats {

fn record_histogram(&self, key: &Key, value: f64) {
let metric = match key.name() {
connections::DURATION => &self.connections.duration,
misc::BLOCK_PROCESSING_TIME => &self.misc.block_processing_time,
internal_rtt::GETPEERS => &self.internal_rtt.getpeers,
internal_rtt::GETSYNC => &self.internal_rtt.getsync,
Expand Down
12 changes: 11 additions & 1 deletion network/src/peers/peer/peer_quality.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
// You should have received a copy of the GNU General Public License
// along with the snarkOS library. If not, see <https://www.gnu.org/licenses/>.

use snarkos_metrics as metrics;

use std::time::Instant;

use chrono::{DateTime, Utc};
Expand Down Expand Up @@ -69,11 +71,19 @@ impl PeerQuality {
}

pub fn disconnected(&mut self) {
let disconnect_timestamp = chrono::Utc::now();

self.see();
self.last_disconnected = Some(chrono::Utc::now());
self.last_disconnected = Some(disconnect_timestamp);
self.disconnected_count += 1;
self.expecting_pong = false;
self.remaining_sync_blocks = 0;
self.total_sync_blocks = 0;

if let Some(last_connected) = self.last_connected {
if let Ok(elapsed) = disconnect_timestamp.signed_duration_since(last_connected).to_std() {
metrics::histogram!(metrics::connections::DURATION, elapsed);
}
}
}
}
7 changes: 4 additions & 3 deletions rpc/documentation/public_endpoints/getnodestats.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,10 @@ None
| `connections.all_accepted` | u64 | The number of connection requests the node has received |
| `connections.all_initiated` | u64 | The number of connection requests the node has made |
| `connections.all_rejected` | u64 | The number of connection requests the node has rejected |
| `connections.connected_peers` | u16 | The number of currently connected peers |
| `connections.connecting_peers` | u16 | The number of currently connecting peers |
| `connections.disconnected_peers` | u16 | The number of known disconnected peers |
| `connections.average_duration` | f64 | The average connection duration in seconds |
| `connections.connected_peers` | u32 | The number of currently connected peers |
| `connections.connecting_peers` | u32 | The number of currently connecting peers |
| `connections.disconnected_peers` | u32 | The number of known disconnected peers |
| `handshakes.failures_init` | u64 | The number of failed handshakes as the initiator |
| `handshakes.failures_resp` | u64 | The number of failed handshakes as the responder |
| `handshakes.successes_init` | u64 | The number of successful handshakes as the initiator |
Expand Down

0 comments on commit 3f75504

Please sign in to comment.