Skip to content

Commit

Permalink
Merge pull request valeriansaliou#103 from Eijebong/stop-bullying-me
Browse files Browse the repository at this point in the history
Add a `enable_timer_backoff` config option to avoid spamming reminders
  • Loading branch information
valeriansaliou authored Feb 3, 2022
2 parents 1d0c637 + 47dd602 commit 3115a40
Show file tree
Hide file tree
Showing 7 changed files with 27 additions and 4 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,7 @@ Use the sample [config.cfg](https://github.com/valeriansaliou/vigil/blob/master/

* `startup_notification` (type: _boolean_, allowed: `true`, `false`, default: `true`) — Whether to send startup notification or not (stating that systems are `healthy`)
* `reminder_interval` (type: _integer_, allowed: seconds, no default) — Interval at which downtime reminder notifications should be sent (if any)
* `enable_timer_backoff` (type: _bool_, allowed: `true`, `false`, default: `false`) — If enabled, the downtime reminder interval will get larger as remainders are sent. The value will be `reminder_interval` * N with N being the number of reminders sent since the service went down.

**[notify.email]**

Expand Down
1 change: 1 addition & 0 deletions config.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ queue_loaded_retry_delay = 500

startup_notification = true
reminder_interval = 300
enable_timer_backoff = false

[notify.email]

Expand Down
20 changes: 16 additions & 4 deletions src/aggregator/manager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,11 @@ fn scan_and_bump_states() -> Option<BumpedStates> {
let mut should_notify = (store.states.status != Status::Dead && general_status == Status::Dead)
|| (store.states.status == Status::Dead && general_status != Status::Dead);

// Reset the backoff counter when we're back to being healthy
if store.states.status != Status::Healthy && general_status == Status::Healthy {
store.states.backoff_counter = 1;
}

// Check if should re-notify? (in case status did not change; only if dead)
// Notice: this is used to send periodic reminders of downtime (ie. 'still down' messages)
if has_changed == false && should_notify == false && general_status == Status::Dead {
Expand All @@ -240,14 +245,21 @@ fn scan_and_bump_states() -> Option<BumpedStates> {
SystemTime::now().duration_since(last_notified)
{
// Duration since last notified exceeds reminder interval, should re-notify
if duration_since_notified >= Duration::from_secs(reminder_interval) {
// We use backoff_counter all the time because if it's disabled then the
// value is 1 at all time thus not impacting the interval
if duration_since_notified
>= Duration::from_secs(reminder_interval * store.states.backoff_counter.pow(2))
{
info!("should re-notify about unchanged status");

should_notify = true
should_notify = true;
if notify.enable_timer_backoff {
store.states.backoff_counter += 1;
}
} else {
debug!(
"should not re-notify about unchanged status (interval: {})",
reminder_interval
"should not re-notify about unchanged status (interval: {}, backoff_counter: {})",
reminder_interval, store.states.backoff_counter
);
}
}
Expand Down
3 changes: 3 additions & 0 deletions src/config/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,9 @@ pub struct ConfigNotify {
pub startup_notification: bool,

pub reminder_interval: Option<u64>,
#[serde(default = "defaults::enable_timer_backoff")]
pub enable_timer_backoff: bool,

pub email: Option<ConfigNotifyEmail>,
pub twilio: Option<ConfigNotifyTwilio>,
pub slack: Option<ConfigNotifySlack>,
Expand Down
4 changes: 4 additions & 0 deletions src/config/defaults.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,3 +94,7 @@ pub fn notify_slack_mention_channel() -> bool {
pub fn notify_generic_reminders_only() -> bool {
false
}

pub fn enable_timer_backoff() -> bool {
false
}
1 change: 1 addition & 0 deletions src/prober/manager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ lazy_static! {
status: Status::Healthy,
date: None,
probes: IndexMap::new(),
backoff_counter: 1,
},
notified: None,
}));
Expand Down
1 change: 1 addition & 0 deletions src/prober/states.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ pub struct ServiceStates {
pub status: Status,
pub date: Option<String>,
pub probes: IndexMap<String, ServiceStatesProbe>,
pub backoff_counter: u64,
}

#[derive(Serialize)]
Expand Down

0 comments on commit 3115a40

Please sign in to comment.