Skip to content

Commit

Permalink
[Cluster test] Simple cluster test that reboots one validator
Browse files Browse the repository at this point in the history
This diff introduces simple 'skeleton' for cluster tests and have one simple experiment with rebooting validator.
It does not monitor metrics yet, this is going to be next part.

It also does certain things in simplified way, for example instead of having SSH client library it just uses ssh CLI, which is obviously sub optimal.

Right now it just runs experiments for rebooting random validator and waits for it to come back.
  • Loading branch information
Andrey Chursin authored and calibra-opensource committed Jul 22, 2019
1 parent e209584 commit 4cb3760
Show file tree
Hide file tree
Showing 11 changed files with 208 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -59,3 +59,6 @@ Temporary Items

# Generated VM config in vm_genesis
language/vm/vm_genesis/genesis/vm_config.toml

# local cargo config
.cargo/config
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ members = [
"storage/storage_proto",
"storage/storage_service",
"testsuite",
"testsuite/cluster_test",
"testsuite/libra_fuzzer",
"types",
"vm_validator",
Expand Down
11 changes: 11 additions & 0 deletions testsuite/cluster_test/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
[package]
name = "cluster_test"
version = "0.1.0"
authors = ["Libra Association <[email protected]>"]
license = "Apache-2.0"
publish = false
edition = "2018"

[dependencies]
failure = { path = "../../common/failure_ext", package = "failure_ext" }
rand = "0.6.5"
29 changes: 29 additions & 0 deletions testsuite/cluster_test/src/cluster.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
use crate::instance::Instance;
use failure::{self, prelude::*};
use rand::prelude::*;
use std::{
fs::File,
io::{BufRead, BufReader},
};

pub struct Cluster {
instances: Vec<Instance>, // guaranteed non-empty
}

impl Cluster {
pub fn discover() -> failure::Result<Self> {
let f = File::open("instances.txt")?;
let f = BufReader::new(f);
let mut instances = vec![];
for line in f.lines() {
instances.push(Instance::new(line?));
}
ensure!(!instances.is_empty(), "instances.txt is empty");
Ok(Self { instances })
}

pub fn random_instance(&self) -> Instance {
let mut rnd = rand::thread_rng();
self.instances.choose(&mut rnd).unwrap().clone()
}
}
9 changes: 9 additions & 0 deletions testsuite/cluster_test/src/effects/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
mod reboot;

use failure;
pub use reboot::Reboot;

pub trait Effect {
fn apply(&self) -> failure::Result<()>;
fn is_complete(&self) -> bool;
}
48 changes: 48 additions & 0 deletions testsuite/cluster_test/src/effects/reboot.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
use crate::{effects::Effect, instance::Instance};
use failure;

pub struct Reboot {
instance: Instance,
}

impl Reboot {
pub fn new(instance: Instance) -> Self {
Self { instance }
}
}

impl Effect for Reboot {
fn apply(&self) -> failure::Result<()> {
println!("Rebooting {}", self.instance);
self.instance.run_cmd(vec![
"touch /dev/shm/cluster_test_reboot; nohup sudo /usr/sbin/reboot &",
])
}

fn is_complete(&self) -> bool {
if self.instance.check_ac_port() {
match self
.instance
.run_cmd(vec!["! cat /dev/shm/cluster_test_reboot"])
{
Ok(..) => {
println!("Rebooting {} complete", self.instance);
true
}
Err(..) => {
println!(
"Rebooting {} in progress - did not reboot yet",
self.instance
);
false
}
}
} else {
println!(
"Rebooting {} in progress - waiting for connection",
self.instance
);
false
}
}
}
7 changes: 7 additions & 0 deletions testsuite/cluster_test/src/experiments/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
mod reboot_random_validator;

pub use reboot_random_validator::RebootRandomValidator;

pub trait Experiment {
fn run(&self) -> failure::Result<()>;
}
31 changes: 31 additions & 0 deletions testsuite/cluster_test/src/experiments/reboot_random_validator.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
use crate::{
cluster::Cluster,
effects::{Effect, Reboot},
experiments::Experiment,
instance::Instance,
};
use failure;
use std::{thread, time::Duration};

pub struct RebootRandomValidator {
instance: Instance,
}

impl RebootRandomValidator {
pub fn new(cluster: &Cluster) -> Self {
Self {
instance: cluster.random_instance(),
}
}
}

impl Experiment for RebootRandomValidator {
fn run(&self) -> failure::Result<()> {
let reboot = Reboot::new(self.instance.clone());
reboot.apply()?;
while !reboot.is_complete() {
thread::sleep(Duration::from_secs(5));
}
Ok(())
}
}
54 changes: 54 additions & 0 deletions testsuite/cluster_test/src/instance.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
use failure::{self, prelude::*};
use std::{
ffi::OsStr,
fmt,
process::{Command, Stdio},
};

#[derive(Clone)]
pub struct Instance {
ip: String,
}

impl Instance {
pub fn new<I>(ip: I) -> Instance
where
I: Into<String>,
{
Instance { ip: ip.into() }
}

pub fn run_cmd<I, S>(&self, args: I) -> failure::Result<()>
where
I: IntoIterator<Item = S>,
S: AsRef<OsStr>,
{
let ssh_dest = format!("ec2-user@{}", self.ip);
let ssh_args = vec!["-i", "/libra_rsa", ssh_dest.as_str()];
let mut ssh_cmd = Command::new("ssh");
ssh_cmd.args(ssh_args).args(args).stderr(Stdio::null());
let status = ssh_cmd.status()?;
ensure!(
status.success(),
"Failed with code {}",
status.code().unwrap_or(-1)
);
Ok(())
}

pub fn check_ac_port(&self) -> bool {
let mut cmd = Command::new("nc");
cmd.args(vec!["-w", "1", "-z", self.ip.as_str(), "30307"]);
let status = cmd.status();
match status {
Err(..) => false,
Ok(exit_status) => exit_status.success(),
}
}
}

impl fmt::Display for Instance {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", self.ip)
}
}
4 changes: 4 additions & 0 deletions testsuite/cluster_test/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
pub mod cluster;
pub mod effects;
pub mod experiments;
pub mod instance;
11 changes: 11 additions & 0 deletions testsuite/cluster_test/src/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
use cluster_test::{
cluster::Cluster,
experiments::{Experiment, RebootRandomValidator},
};

pub fn main() {
let cluster = Cluster::discover().expect("Failed to discover cluster");
let experiment = RebootRandomValidator::new(&cluster);
experiment.run().expect("Failed to run experiment");
println!("OK");
}

0 comments on commit 4cb3760

Please sign in to comment.