Skip to content

Commit

Permalink
Merge branch 'gary/report_gen_status' into 'master'
Browse files Browse the repository at this point in the history
feat: NODE-1037 - Report node generation status

Write the generation (Gen1, Gen2, Unknown) to prometheus. 

Sets the structure for 'guestos_tool' - a rust tool with siblings in the other OS's: setupos_tool, hostos_tool. These will eventually share logic in a common rust library. 

See merge request dfinity-lab/public/ic!15118
  • Loading branch information
garym-dfinity committed Oct 6, 2023
2 parents d628aa3 + 7879351 commit 82a4c9a
Show file tree
Hide file tree
Showing 10 changed files with 401 additions and 0 deletions.
10 changes: 10 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ members = [
"rs/http_endpoints/metrics",
"rs/http_utils",
"rs/ic_os/deterministic_ips",
"rs/ic_os/guestos_tool",
"rs/ic_os/launch-single-vm",
"rs/ic_os/setupos-inject-configuration",
"rs/ic_os/setupos-disable-checks",
Expand Down
1 change: 1 addition & 0 deletions ic-os/guestos/defs.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ def image_deps(mode, malicious = False):

# additional files to install
"//publish/binaries:canister_sandbox": "/opt/ic/bin/canister_sandbox:0755",
"//publish/binaries:guestos_tool": "/opt/ic/bin/guestos_tool:0755",
"//publish/binaries:ic-btc-adapter": "/opt/ic/bin/ic-btc-adapter:0755",
"//publish/binaries:ic-consensus-pool-util": "/opt/ic/bin/ic-consensus-pool-util:0755",
"//publish/binaries:ic-https-outcalls-adapter": "/opt/ic/bin/ic-https-outcalls-adapter:0755",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
[Unit]
Description=Write node generation status
After=node_exporter.service

[Service]
Type=oneshot
ExecStart=/opt/ic/bin/guestos_tool set-hardware-gen-metric
RemainAfterExit=true

[Install]
WantedBy=multi-user.target
2 changes: 2 additions & 0 deletions publish/binaries/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ LINUX_ONLY = [
"sevctl",
"vsock_guest",
"vsock_host",
"guestos_tool",
]

NO_STRIP = [
Expand All @@ -25,6 +26,7 @@ BINARIES = {
"certificate-syncer": "//rs/boundary_node/certificate_issuance/certificate_syncer:certificate-syncer",
"denylist-updater": "//rs/boundary_node/denylist_updater:denylist-updater",
"e2e-test-driver": "//rs/scenario_tests:e2e-test-driver",
"guestos_tool": "//rs/ic_os/guestos_tool:guestos_tool",
"ic-admin": "//rs/registry/admin:ic-admin",
"ic-backup": "//rs/backup:ic-backup",
"ic-balance-exporter": "//rs/boundary_node/ic_balance_exporter:ic-balance-exporter",
Expand Down
26 changes: 26 additions & 0 deletions rs/ic_os/guestos_tool/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
load("@rules_rust//rust:defs.bzl", "rust_binary")

DEPENDENCIES = [
"@crate_index//:anyhow",
"@crate_index//:clap_4_0_0",
"@crate_index//:itertools",
"@crate_index//:regex",
]

MACRO_DEPENDENCIES = []

ALIASES = {}

rust_binary(
name = "guestos_tool",
srcs = glob(["src/**/*.rs"]),
aliases = ALIASES,
crate_name = "guestos_tool",
edition = "2021",
proc_macro_deps = MACRO_DEPENDENCIES,
target_compatible_with = [
"@platforms//os:linux",
],
visibility = ["//visibility:public"],
deps = DEPENDENCIES,
)
15 changes: 15 additions & 0 deletions rs/ic_os/guestos_tool/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
[package]
name = "guestos_tool"
version = "1.0.0"
edition = "2021"

[[bin]]
name = "guestos_tool"
path = "src/main.rs"

[dependencies]
anyhow = {version = "^1"}
clap = {version = "^4", features = ["derive"]}
itertools = {version = "^0.10.0"}
regex = {version = "^1.3"}

46 changes: 46 additions & 0 deletions rs/ic_os/guestos_tool/src/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
use anyhow::Result;
use clap::{Parser, Subcommand};
use std::path::Path;

mod node_gen;
use node_gen::get_node_gen_metric;

mod prometheus_metric;
use prometheus_metric::write_single_metric;

#[derive(Subcommand)]
pub enum Commands {
SetHardwareGenMetric {
#[arg(
short = 'o',
long = "output",
default_value = "/run/node_exporter/collector_textfile/node_gen.prom"
)]
/// Filename to write the prometheus metric for node generation.
/// Fails if directory doesn't exist.
output_path: String,
},
}

#[derive(Parser)]
#[command()]
struct GuestOSArgs {
#[command(subcommand)]
command: Option<Commands>,
}

pub fn main() -> Result<()> {
#[cfg(not(target_os = "linux"))]
{
eprintln!("ERROR: this only runs on Linux.");
std::process::exit(1);
}
let opts = GuestOSArgs::parse();

match opts.command {
Some(Commands::SetHardwareGenMetric { output_path }) => {
write_single_metric(&get_node_gen_metric(), Path::new(&output_path))
}
None => Ok(()),
}
}
131 changes: 131 additions & 0 deletions rs/ic_os/guestos_tool/src/node_gen.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
use anyhow::{anyhow, Context, Result};
use regex::Regex;
use std::fmt;
use std::fs;

use crate::prometheus_metric::{LabelPair, MetricType, PrometheusMetric};

#[derive(Eq, PartialEq, Debug)]
pub enum HardwareGen {
Gen1,
Gen2,
Unknown,
}

impl fmt::Display for HardwareGen {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let s: String = match self {
HardwareGen::Gen1 => "Gen1".into(),
HardwareGen::Gen2 => "Gen2".into(),
HardwareGen::Unknown => "GenUnknown".into(),
};
write!(f, "{}", s)
}
}

/// Given the cpu model line from /proc/cpuinfo, parse and return node generation.
fn parse_hardware_gen(cpu_model_line: &str) -> Result<HardwareGen> {
let re = Regex::new(r"model name\s*:\s*AMD\s*EPYC\s+(\S+)\s+(\S+)\s+(\S+)")?;
let captures = re
.captures(cpu_model_line)
.with_context(|| format!("Detected non-AMD CPU: {}", cpu_model_line))?;

let epyc_model_number = captures
.get(1)
.with_context(|| format!("Could not parse AMD EPYC model number: {}", cpu_model_line))?;
let epyc_model_number = epyc_model_number.as_str();

match epyc_model_number.chars().last() {
Some('2') => Ok(HardwareGen::Gen1),
Some('3') => Ok(HardwareGen::Gen2),
Some(_) => {
eprintln!(
"CPU model other than EPYC Rome or Milan: {}",
cpu_model_line
);
Ok(HardwareGen::Unknown)
}
None => Err(anyhow!(
"Could not parse AMD EPYC model number: {}",
epyc_model_number
)),
}
}

fn get_cpu_model_string() -> Result<String> {
let cpu_info = fs::read_to_string("/proc/cpuinfo")?;
cpu_info
.lines()
.find(|line| line.starts_with("model name"))
.map(|line| line.to_string())
.ok_or(anyhow!("Error parsing cpu info: {}", cpu_info))
}

fn get_node_gen() -> Result<HardwareGen> {
let cpu_model_line = get_cpu_model_string()?;
println!("Found CPU model: {cpu_model_line}");
parse_hardware_gen(&cpu_model_line)
}

/// Gather CPU info and return CPU metric
/// Sample output:
/// """
/// # HELP node_gen Generation of Node Hardware
/// # TYPE node_gen gauge
/// node_gen{gen="Gen1"} 0
/// """
pub fn get_node_gen_metric() -> PrometheusMetric {
let gen = match get_node_gen() {
Ok(gen) => gen,
Err(e) => {
eprintln!("Error getting node gen: {e}");
HardwareGen::Unknown
}
};

let gen_string = gen.to_string();
println!("Determined node generation: {gen_string}");

let metric_value = match gen {
HardwareGen::Unknown => 0.0,
_ => 1.0,
};

PrometheusMetric {
name: "node_gen".into(),
help: "Generation of Node Hardware".into(),
metric_type: MetricType::Gauge,
labels: [LabelPair {
label: "gen".into(),
value: gen_string.clone(),
}]
.to_vec(),
value: metric_value,
}
}

#[cfg(test)]
pub mod tests {
use super::*;
#[test]
fn test_parse_hardware_gen() {
assert_eq!(
parse_hardware_gen("model name : AMD EPYC 7302 16-Core Processor").unwrap(),
HardwareGen::Gen1
);
assert_eq!(
parse_hardware_gen("model name : AMD EPYC 7313 32-Core Processor").unwrap(),
HardwareGen::Gen2
);
assert_eq!(
parse_hardware_gen("model name : AMD EPYC 7543 32-Core Processor").unwrap(),
HardwareGen::Gen2
);
assert!(
parse_hardware_gen("model name : Intel Fake Lake i5-1040 32-Core Processor")
.is_err()
);
assert!(parse_hardware_gen("Fast times at Ridgemont High").is_err());
assert!(parse_hardware_gen("").is_err());
}
}
Loading

0 comments on commit 82a4c9a

Please sign in to comment.