Skip to content

Commit

Permalink
Merge pull request kubevirt#8944 from machadovilaca/add-e2e-test-for-…
Browse files Browse the repository at this point in the history
…KubeVirtComponentExceedsRequested-CPU-and-Memory

Add e2e tests for KubeVirtComponentExceedsRequestedCPU and KubeVirtComponentExceedsRequestedMemory alerts
  • Loading branch information
kubevirt-bot authored May 3, 2023
2 parents 08f1b5c + 8cdea60 commit 590a27b
Show file tree
Hide file tree
Showing 7 changed files with 523 additions and 377 deletions.
18 changes: 7 additions & 11 deletions hack/prom-rule-ci/prom-rules-tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,37 +9,33 @@ tests:
# Pod is using more CPU than expected
- interval: 1m
input_series:
- series: 'node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{namespace="ci",pod="virt-controller-8546c99968-x9jgg",node="node1"}'
values: '2+0x10'
- series: 'container_cpu_usage_seconds_total{namespace="ci",pod="virt-controller-8546c99968-x9jgg",node="node1"}'
values: '1+1x6'
- series: 'kube_pod_container_resource_requests{namespace="ci",container="virt-controller",resource="cpu",pod="virt-controller-8546c99968-x9jgg",node="node1"}'
values: '0+0x6'

alert_rule_test:
- eval_time: 5m
- eval_time: 6m
alertname: KubeVirtComponentExceedsRequestedCPU
exp_alerts:
- exp_annotations:
description: "Container virt-controller in pod virt-controller-8546c99968-x9jgg cpu usage exceeds the CPU requested"
summary: "The container is using more CPU than what is defined in the containers resource requests"
description: "Pod virt-controller-8546c99968-x9jgg cpu usage exceeds the CPU requested"
summary: "The containers in the pod are using more CPU than what is defined in the containers resource requests"
runbook_url: "https://kubevirt.io/monitoring/runbooks/KubeVirtComponentExceedsRequestedCPU"
exp_labels:
severity: "warning"
operator_health_impact: "none"
kubernetes_operator_part_of: "kubevirt"
kubernetes_operator_component: "kubevirt"
pod: "virt-controller-8546c99968-x9jgg"
container: "virt-controller"
namespace: ci
node: node1
resource: cpu

# Pod is using more memory than expected
- interval: 1m
input_series:
- series: 'container_memory_working_set_bytes{namespace="ci",container="",pod="virt-controller-8546c99968-x9jgg",node="node1"}'
values: "157286400 157286400 157286400 157286400 157286400 157286400 157286400 157286400"
values: "157286400+0x5"
- series: 'kube_pod_container_resource_requests{namespace="ci",container="virt-controller",resource="memory",pod="virt-controller-8546c99968-x9jgg",node="node1"}'
values: "118325248 118325248 118325248 118325248 118325248 118325248 118325248 118325248"
values: "118325248+0x5"

alert_rule_test:
- eval_time: 5m
Expand Down
14 changes: 9 additions & 5 deletions pkg/virt-operator/resource/generate/components/prometheus.go
Original file line number Diff line number Diff line change
Expand Up @@ -478,8 +478,11 @@ func NewPrometheusRuleSpec(ns string, workloadUpdatesEnabled bool) *v1.Prometheu
},
{
Alert: "KubeVirtComponentExceedsRequestedMemory",
Expr: intstr.FromString(fmt.Sprintf(`((kube_pod_container_resource_requests{namespace="%s",container=~"virt-controller|virt-api|virt-handler|virt-operator",resource="memory"}) - on(pod) group_left(node) container_memory_working_set_bytes{container="",namespace="%s"}) < 0`, ns, ns)),
For: "5m",
Expr: intstr.FromString(
// In 'container_memory_working_set_bytes', 'container=""' filters the accumulated metric for the pod slice to measure total Memory usage for all containers within the pod
fmt.Sprintf(`((kube_pod_container_resource_requests{namespace="%s",container=~"virt-controller|virt-api|virt-handler|virt-operator",resource="memory"}) - on(pod) group_left(node) container_memory_working_set_bytes{container="",namespace="%s"}) < 0`, ns, ns),
),
For: "5m",
Annotations: map[string]string{
"description": "Container {{ $labels.container }} in pod {{ $labels.pod }} memory usage exceeds the memory requested",
"summary": "The container is using more memory than what is defined in the containers resource requests",
Expand All @@ -493,12 +496,13 @@ func NewPrometheusRuleSpec(ns string, workloadUpdatesEnabled bool) *v1.Prometheu
{
Alert: "KubeVirtComponentExceedsRequestedCPU",
Expr: intstr.FromString(
fmt.Sprintf(`((kube_pod_container_resource_requests{namespace="%s",container=~"virt-controller|virt-api|virt-handler|virt-operator",resource="cpu"}) - on(pod) group_left(node) node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{namespace="%s"}) < 0`, ns, ns),
// In 'container_cpu_usage_seconds_total', 'container=""' filters the accumulated metric for the pod slice to measure total CPU usage for all containers within the pod
fmt.Sprintf(`((kube_pod_container_resource_requests{namespace="%s",container=~"virt-controller|virt-api|virt-handler|virt-operator",resource="cpu"}) - on(pod) sum(rate(container_cpu_usage_seconds_total{container="",namespace="%s"}[5m])) by (pod)) < 0`, ns, ns),
),
For: "5m",
Annotations: map[string]string{
"description": "Container {{ $labels.container }} in pod {{ $labels.pod }} cpu usage exceeds the CPU requested",
"summary": "The container is using more CPU than what is defined in the containers resource requests",
"description": "Pod {{ $labels.pod }} cpu usage exceeds the CPU requested",
"summary": "The containers in the pod are using more CPU than what is defined in the containers resource requests",
"runbook_url": fmt.Sprintf(runbookURLTemplate, "KubeVirtComponentExceedsRequestedCPU"),
},
Labels: map[string]string{
Expand Down
5 changes: 5 additions & 0 deletions tests/monitoring/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,16 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library")
go_library(
name = "go_default_library",
srcs = [
"component_monitoring.go",
"monitoring.go",
"prometheus_utils.go",
"scaling_utils.go",
"vm_monitoring.go",
],
importpath = "kubevirt.io/kubevirt/tests/monitoring",
visibility = ["//visibility:public"],
deps = [
"//pkg/apimachinery/patch:go_default_library",
"//pkg/virtctl/pause:go_default_library",
"//staging/src/kubevirt.io/api/core/v1:go_default_library",
"//staging/src/kubevirt.io/client-go/kubecli:go_default_library",
Expand All @@ -30,12 +33,14 @@ go_library(
"//vendor/github.com/onsi/gomega:go_default_library",
"//vendor/github.com/onsi/gomega/types:go_default_library",
"//vendor/github.com/prometheus/client_golang/api/prometheus/v1:go_default_library",
"//vendor/k8s.io/api/apps/v1:go_default_library",
"//vendor/k8s.io/api/autoscaling/v1:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/api/rbac/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/errors:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/types:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/intstr:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/rand:go_default_library",
],
Expand Down
Loading

0 comments on commit 590a27b

Please sign in to comment.