Skip to content

Commit

Permalink
Add kubernetes cluster status dashboard
Browse files Browse the repository at this point in the history
Signed-off-by: Xabier Larrakoetxea <[email protected]>
  • Loading branch information
slok committed May 15, 2019
1 parent a913037 commit 923c080
Show file tree
Hide file tree
Showing 2 changed files with 190 additions and 0 deletions.
6 changes: 6 additions & 0 deletions dashboard-examples/Readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,10 @@ This is a gitlab based dashboard example.

![](https://i.imgur.com/RGlygHF.png)

## Kubernetes status

This is a port from [this](https://grafana.com/dashboards/5315) Grafana dashboard (with very small changes). Mainly shows the usage of the gauges.

![](https://i.imgur.com/N5jtCFT.png)

[red]: https://www.weave.works/blog/the-red-method-key-metrics-for-microservices-architecture/
184 changes: 184 additions & 0 deletions dashboard-examples/kubernetes-cluster-status.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
{
"version": "v1",
"datasources": {
"prometheus": {
"prometheus": {
"address": "http://127.0.0.1:9090"
}
}
},
"dashboard": {
"variables": {
"interval": {
"interval": { "steps": 50 }
}
},
"widgets": [
{
"title": "Control plane UP",
"gridPos": { "w": 50 },
"singlestat": {
"query": {
"datasourceID": "prometheus",
"expr": "sum(up{job=~\"apiserver|kube-scheduler|kube-controller-manager\"} == 0) or vector(0)"
},
"valueText": "{{ if (gt .value 0.0) }}DOWN{{else}}UP{{end}}",
"thresholds": [
{ "color": "#299c46" },
{ "color": "#d44a3a", "startValue": 1 }
]
}
},
{
"title": "Alerts firing",
"gridPos": { "w": 50 },
"singlestat": {
"query": {
"datasourceID": "prometheus",
"expr": "sum(ALERTS{alertstate=\"firing\",alertname!=\"DeadMansSwitch\"})"
},
"unit": "none",
"thresholds": [
{ "color": "#299c46" },
{ "startValue": 3, "color": "#FF780A" },
{ "startValue": 5, "color": "#d44a3a" }
]
}
},
{
"title": "APIservers UP",
"gridPos": { "w": 25 },
"gauge": {
"percentValue": true,
"max": 100,
"query": {
"datasourceID": "prometheus",
"expr": "(sum(up{job=\"apiserver\"} == 1) / count(up{job=\"apiserver\"})) * 100"
},
"thresholds": [
{ "color": "#d44a3a" },
{ "startValue": 50, "color": "#FF780A" },
{ "startValue": 80, "color": "#299c46" }
]
}
},
{
"title": "Kubelets UP",
"gridPos": { "w": 25 },
"gauge": {
"percentValue": true,
"max": 100,
"query": {
"datasourceID": "prometheus",
"expr": "(sum(up{job=\"kubelet\"} == 1) / count(up{job=\"kubelet\"})) * 100"
},
"thresholds": [
{ "color": "#d44a3a" },
{ "startValue": 50, "color": "#FF780A" },
{ "startValue": 80, "color": "#299c46" }
]
}
},
{
"title": "Schedulers UP",
"gridPos": { "w": 25 },
"gauge": {
"percentValue": true,
"max": 100,
"query": {
"datasourceID": "prometheus",
"expr": "(sum(up{job=\"kube-scheduler\"} == 1) / count(up{job=\"kube-scheduler\"})) * 100"
},
"thresholds": [
{ "color": "#d44a3a" },
{ "startValue": 50, "color": "#FF780A" },
{ "startValue": 80, "color": "#299c46" }
]
}
},
{
"title": "Crashlooping control-plane pods",
"gridPos": { "w": 25 },
"singlestat": {
"query": {
"datasourceID": "prometheus",
"expr": "count(increase(kube_pod_container_status_restarts{namespace=~\"kube-system|tectonic-system\"}[1h])) or vector(0)"
},
"thresholds": [
{ "color": "#299c46" },
{ "startValue": 1, "color": "#FF780A" },
{ "startValue": 3, "color": "#d44a3a" }
]
}
},
{
"title": "CPU utilization",
"gridPos": { "w": 25 },
"gauge": {
"percentValue": true,
"max": 100,
"query": {
"datasourceID": "prometheus",
"expr": "sum(100 - (avg by (instance) (rate(node_cpu_seconds_total{job=\"node-exporter\",mode=\"idle\"}[5m])) * 100)) / count(node_cpu_seconds_total{job=\"node-exporter\",mode=\"idle\"})"
},
"thresholds": [
{ "color": "#299c46" },
{ "startValue": 80, "color": "#FF780A" },
{ "startValue": 90, "color": "#d44a3a" }
]
}
},
{
"title": "Memory utilization",
"gridPos": { "w": 25 },
"gauge": {
"percentValue": true,
"max": 100,
"query": {
"datasourceID": "prometheus",
"expr": "((sum(node_memory_MemTotal_bytes) - sum(node_memory_MemFree_bytes) - sum(node_memory_Buffers_bytes) - sum(node_memory_Cached_bytes)) / sum(node_memory_MemTotal_bytes)) * 100"
},
"thresholds": [
{ "color": "#299c46" },
{ "startValue": 80, "color": "#FF780A" },
{ "startValue": 90, "color": "#d44a3a" }
]
}
},
{
"title": "Filesystem utilization",
"gridPos": { "w": 25 },
"gauge": {
"percentValue": true,
"max": 100,
"query": {
"datasourceID": "prometheus",
"expr": "(sum(node_filesystem_size_bytes{device!=\"rootfs\"}) - sum(node_filesystem_free_bytes{device!=\"rootfs\"})) / sum(node_filesystem_size_bytes{device!=\"rootfs\"})"
},
"thresholds": [
{ "color": "#299c46" },
{ "startValue": 80, "color": "#FF780A" },
{ "startValue": 90, "color": "#d44a3a" }
]
}
},
{
"title": "Pod utilization",
"gridPos": { "w": 25 },
"gauge": {
"percentValue": true,
"max": 100,
"query": {
"datasourceID": "prometheus",
"expr": "100 - (sum(kube_node_status_capacity_pods) - sum(kube_pod_info)) / sum(kube_node_status_capacity_pods) * 100"
},
"thresholds": [
{ "color": "#299c46" },
{ "startValue": 80, "color": "#FF780A" },
{ "startValue": 90, "color": "#d44a3a" }
]
}
}
]
}
}

0 comments on commit 923c080

Please sign in to comment.