Skip to content

Commit

Permalink
Merge pull request kubevirt#8038 from sradco/update_orphaned_vm_alert…
Browse files Browse the repository at this point in the history
…_rule

Update orphaned vm alert
  • Loading branch information
kubevirt-bot authored Aug 3, 2022
2 parents 65d21a2 + d1c9ed7 commit 82a415c
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 20 deletions.
47 changes: 28 additions & 19 deletions hack/prom-rule-ci/prom-rules-tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -152,25 +152,28 @@ tests:
alertname: VirtOperatorDown
exp_alerts: [ ]


# vmi running on a node with an unready virt-handler pod
# Alert to test when there are VMIs running on a node with an unready virt-handler pod
# Alert should not fire for node with no running VMIs.
- interval: 1m
input_series:
- series: 'kube_pod_info{pod="virt-launcher-testvm-123", node="node01"}'
- series: 'kube_pod_info{pod="virt-handler-asdf", node="node01"}'
values: '1 1 1 1 1 1 1 1 1 1 1'
- series: 'kube_pod_status_ready{pod="virt-handler-asdf", condition="true"}'
values: '_ _ _ _ _ _ _ _ _ _ _'
- series: 'kube_pod_status_ready{pod="virt-handler-asdf", condition="false"}'
values: '1 1 1 1 1 1 1 1 1 1 1'
- series: 'kube_pod_info{pod="virt-handler-asdf", node="node01"}'
values: '0 0 0 0 0 0 0 0 0 0 0'
- series: 'kube_pod_info{pod="virt-launcher-testvm-123", node="node01"}'
values: '1 1 1 1 1 1 1 1 1 1 1'
- series: 'kube_pod_info{pod="virt-launcher-vmi", node="node02"}'
- series: 'kube_pod_info{pod="virt-handler-asdfg", node="node02"}'
values: '1 1 1 1 1 1 1 1 1 1 1'
- series: 'kube_pod_status_ready{pod="virt-handler-asdfg", condition="true"}'
values: '1 1 1 1 1 1 1 1 1 1 1'
- series: 'kube_pod_info{pod="virt-handler-asdfg", node="node02"}'
- series: 'kube_pod_info{pod="virt-launcher-vmi", node="node02"}'
values: '1 1 1 1 1 1 1 1 1 1 1'

- series: 'kube_pod_info{pod="virt-handler-abcd", node="node03"}'
values: '1 1 1 1 1 1 1 1 1 1 1'
- series: 'kube_pod_status_ready{pod="virt-handler-abcd", condition="true"}'
values: '0 0 0 0 0 0 0 0 0 0 0'
- series: 'kube_pod_info{pod="virt-launcher-novmi", node="node03"}'
values: '_ _ _ _ _ _ _ _ _ _ _'

alert_rule_test:
# no alert before 10 minutes
Expand All @@ -189,22 +192,29 @@ tests:
kubernetes_operator_part_of: "kubevirt"
kubernetes_operator_component: "kubevirt"


# vmi running on a node without a virt-handler pod
# Alert to test when there are VMIs running on a node without a virt-handler pod
# Alert should not fire for node with no running VMIs.
- interval: 1m
input_series:
- series: 'kube_pod_info{pod="virt-launcher-testvm-123", node="node01"}'
values: '1 1 1 1 1 1 1 1 1 1 1'
- series: 'kube_pod_status_ready{pod="virt-handler-asdf", condition="true"}'
values: '_ _ _ _ _ _ _ _ _ _ _'
- series: 'kube_pod_info{pod="virt-handler-asdf", node="node01"}'
values: '_ _ _ _ _ _ _ _ _ _ _'
- series: 'kube_pod_info{pod="virt-launcher-vmi", node="node02"}'
- series: 'kube_pod_status_ready{pod="virt-handler-asdf", condition="true"}'
values: '_ _ _ _ _ _ _ _ _ _ _'
- series: 'kube_pod_info{pod="virt-launcher-testvm-123", node="node01"}'
values: '1 1 1 1 1 1 1 1 1 1 1'
- series: 'kube_pod_info{pod="virt-handler-asdfg", node="node02"}'
values: '1 1 1 1 1 1 1 1 1 1 1'
- series: 'kube_pod_status_ready{pod="virt-handler-asdfg", condition="true"}'
values: '1 1 1 1 1 1 1 1 1 1 1'
- series: 'kube_pod_info{pod="virt-handler-asdfg", node="node02"}'
- series: 'kube_pod_info{pod="virt-launcher-vmi", node="node02"}'
values: '1 1 1 1 1 1 1 1 1 1 1'
- series: 'kube_pod_info{pod="virt-handler-abcd", node="node03"}'
values: '_ _ _ _ _ _ _ _ _ _ _'
- series: 'kube_pod_status_ready{pod="virt-handler-abcd", condition="true"}'
values: '_ _ _ _ _ _ _ _ _ _ _'
- series: 'kube_pod_info{pod="virt-launcher-novmi", node="node03"}'
values: '_ _ _ _ _ _ _ _ _ _ _'


alert_rule_test:
# no alert before 10 minutes
Expand All @@ -223,7 +233,6 @@ tests:
kubernetes_operator_part_of: "kubevirt"
kubernetes_operator_component: "kubevirt"


# Some virt controllers are not ready
- interval: 1m
input_series:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -421,7 +421,7 @@ func NewPrometheusRuleSpec(ns string, workloadUpdatesEnabled bool) *v1.Prometheu
},
{
Alert: "OrphanedVirtualMachineInstances",
Expr: intstr.FromString("((count by (node) (kube_pod_status_ready{condition='true',pod=~'virt-handler.*'} * on(pod) group_left(node) kube_pod_info{pod=~'virt-handler.*'})) or (count by (node)(kube_pod_info{pod=~'virt-launcher.*'})*0)) == 0"),
Expr: intstr.FromString("(((sum by (node) (kube_pod_status_ready{condition='true',pod=~'virt-handler.*'} * on(pod) group_left(node) sum by(pod,node)(kube_pod_info{pod=~'virt-handler.*',node!=''})) ) == 1) or (count by (node)( kube_pod_info{pod=~'virt-launcher.*',node!=''})*0)) == 0"),
For: "10m",
Annotations: map[string]string{
"summary": "No ready virt-handler pod detected on node {{ $labels.node }} with running vmis for more than 10 minutes",
Expand Down

0 comments on commit 82a415c

Please sign in to comment.