forked from dfinity/ic
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'gary/report_gen_status' into 'master'
feat: NODE-1037 - Report node generation status Write the generation (Gen1, Gen2, Unknown) to prometheus. Sets the structure for 'guestos_tool' - a rust tool with siblings in the other OS's: setupos_tool, hostos_tool. These will eventually share logic in a common rust library. See merge request dfinity-lab/public/ic!15118
- Loading branch information
Showing
10 changed files
with
401 additions
and
0 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
11 changes: 11 additions & 0 deletions
11
ic-os/guestos/rootfs/etc/systemd/system/setup-node-gen-status.service
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
[Unit] | ||
Description=Write node generation status | ||
After=node_exporter.service | ||
|
||
[Service] | ||
Type=oneshot | ||
ExecStart=/opt/ic/bin/guestos_tool set-hardware-gen-metric | ||
RemainAfterExit=true | ||
|
||
[Install] | ||
WantedBy=multi-user.target |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
load("@rules_rust//rust:defs.bzl", "rust_binary") | ||
|
||
DEPENDENCIES = [ | ||
"@crate_index//:anyhow", | ||
"@crate_index//:clap_4_0_0", | ||
"@crate_index//:itertools", | ||
"@crate_index//:regex", | ||
] | ||
|
||
MACRO_DEPENDENCIES = [] | ||
|
||
ALIASES = {} | ||
|
||
rust_binary( | ||
name = "guestos_tool", | ||
srcs = glob(["src/**/*.rs"]), | ||
aliases = ALIASES, | ||
crate_name = "guestos_tool", | ||
edition = "2021", | ||
proc_macro_deps = MACRO_DEPENDENCIES, | ||
target_compatible_with = [ | ||
"@platforms//os:linux", | ||
], | ||
visibility = ["//visibility:public"], | ||
deps = DEPENDENCIES, | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
[package] | ||
name = "guestos_tool" | ||
version = "1.0.0" | ||
edition = "2021" | ||
|
||
[[bin]] | ||
name = "guestos_tool" | ||
path = "src/main.rs" | ||
|
||
[dependencies] | ||
anyhow = {version = "^1"} | ||
clap = {version = "^4", features = ["derive"]} | ||
itertools = {version = "^0.10.0"} | ||
regex = {version = "^1.3"} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
use anyhow::Result; | ||
use clap::{Parser, Subcommand}; | ||
use std::path::Path; | ||
|
||
mod node_gen; | ||
use node_gen::get_node_gen_metric; | ||
|
||
mod prometheus_metric; | ||
use prometheus_metric::write_single_metric; | ||
|
||
#[derive(Subcommand)] | ||
pub enum Commands { | ||
SetHardwareGenMetric { | ||
#[arg( | ||
short = 'o', | ||
long = "output", | ||
default_value = "/run/node_exporter/collector_textfile/node_gen.prom" | ||
)] | ||
/// Filename to write the prometheus metric for node generation. | ||
/// Fails if directory doesn't exist. | ||
output_path: String, | ||
}, | ||
} | ||
|
||
#[derive(Parser)] | ||
#[command()] | ||
struct GuestOSArgs { | ||
#[command(subcommand)] | ||
command: Option<Commands>, | ||
} | ||
|
||
pub fn main() -> Result<()> { | ||
#[cfg(not(target_os = "linux"))] | ||
{ | ||
eprintln!("ERROR: this only runs on Linux."); | ||
std::process::exit(1); | ||
} | ||
let opts = GuestOSArgs::parse(); | ||
|
||
match opts.command { | ||
Some(Commands::SetHardwareGenMetric { output_path }) => { | ||
write_single_metric(&get_node_gen_metric(), Path::new(&output_path)) | ||
} | ||
None => Ok(()), | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,131 @@ | ||
use anyhow::{anyhow, Context, Result}; | ||
use regex::Regex; | ||
use std::fmt; | ||
use std::fs; | ||
|
||
use crate::prometheus_metric::{LabelPair, MetricType, PrometheusMetric}; | ||
|
||
#[derive(Eq, PartialEq, Debug)] | ||
pub enum HardwareGen { | ||
Gen1, | ||
Gen2, | ||
Unknown, | ||
} | ||
|
||
impl fmt::Display for HardwareGen { | ||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||
let s: String = match self { | ||
HardwareGen::Gen1 => "Gen1".into(), | ||
HardwareGen::Gen2 => "Gen2".into(), | ||
HardwareGen::Unknown => "GenUnknown".into(), | ||
}; | ||
write!(f, "{}", s) | ||
} | ||
} | ||
|
||
/// Given the cpu model line from /proc/cpuinfo, parse and return node generation. | ||
fn parse_hardware_gen(cpu_model_line: &str) -> Result<HardwareGen> { | ||
let re = Regex::new(r"model name\s*:\s*AMD\s*EPYC\s+(\S+)\s+(\S+)\s+(\S+)")?; | ||
let captures = re | ||
.captures(cpu_model_line) | ||
.with_context(|| format!("Detected non-AMD CPU: {}", cpu_model_line))?; | ||
|
||
let epyc_model_number = captures | ||
.get(1) | ||
.with_context(|| format!("Could not parse AMD EPYC model number: {}", cpu_model_line))?; | ||
let epyc_model_number = epyc_model_number.as_str(); | ||
|
||
match epyc_model_number.chars().last() { | ||
Some('2') => Ok(HardwareGen::Gen1), | ||
Some('3') => Ok(HardwareGen::Gen2), | ||
Some(_) => { | ||
eprintln!( | ||
"CPU model other than EPYC Rome or Milan: {}", | ||
cpu_model_line | ||
); | ||
Ok(HardwareGen::Unknown) | ||
} | ||
None => Err(anyhow!( | ||
"Could not parse AMD EPYC model number: {}", | ||
epyc_model_number | ||
)), | ||
} | ||
} | ||
|
||
fn get_cpu_model_string() -> Result<String> { | ||
let cpu_info = fs::read_to_string("/proc/cpuinfo")?; | ||
cpu_info | ||
.lines() | ||
.find(|line| line.starts_with("model name")) | ||
.map(|line| line.to_string()) | ||
.ok_or(anyhow!("Error parsing cpu info: {}", cpu_info)) | ||
} | ||
|
||
fn get_node_gen() -> Result<HardwareGen> { | ||
let cpu_model_line = get_cpu_model_string()?; | ||
println!("Found CPU model: {cpu_model_line}"); | ||
parse_hardware_gen(&cpu_model_line) | ||
} | ||
|
||
/// Gather CPU info and return CPU metric | ||
/// Sample output: | ||
/// """ | ||
/// # HELP node_gen Generation of Node Hardware | ||
/// # TYPE node_gen gauge | ||
/// node_gen{gen="Gen1"} 0 | ||
/// """ | ||
pub fn get_node_gen_metric() -> PrometheusMetric { | ||
let gen = match get_node_gen() { | ||
Ok(gen) => gen, | ||
Err(e) => { | ||
eprintln!("Error getting node gen: {e}"); | ||
HardwareGen::Unknown | ||
} | ||
}; | ||
|
||
let gen_string = gen.to_string(); | ||
println!("Determined node generation: {gen_string}"); | ||
|
||
let metric_value = match gen { | ||
HardwareGen::Unknown => 0.0, | ||
_ => 1.0, | ||
}; | ||
|
||
PrometheusMetric { | ||
name: "node_gen".into(), | ||
help: "Generation of Node Hardware".into(), | ||
metric_type: MetricType::Gauge, | ||
labels: [LabelPair { | ||
label: "gen".into(), | ||
value: gen_string.clone(), | ||
}] | ||
.to_vec(), | ||
value: metric_value, | ||
} | ||
} | ||
|
||
#[cfg(test)] | ||
pub mod tests { | ||
use super::*; | ||
#[test] | ||
fn test_parse_hardware_gen() { | ||
assert_eq!( | ||
parse_hardware_gen("model name : AMD EPYC 7302 16-Core Processor").unwrap(), | ||
HardwareGen::Gen1 | ||
); | ||
assert_eq!( | ||
parse_hardware_gen("model name : AMD EPYC 7313 32-Core Processor").unwrap(), | ||
HardwareGen::Gen2 | ||
); | ||
assert_eq!( | ||
parse_hardware_gen("model name : AMD EPYC 7543 32-Core Processor").unwrap(), | ||
HardwareGen::Gen2 | ||
); | ||
assert!( | ||
parse_hardware_gen("model name : Intel Fake Lake i5-1040 32-Core Processor") | ||
.is_err() | ||
); | ||
assert!(parse_hardware_gen("Fast times at Ridgemont High").is_err()); | ||
assert!(parse_hardware_gen("").is_err()); | ||
} | ||
} |
Oops, something went wrong.