forked from grafana/loki
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathalerts.libsonnet
74 lines (74 loc) · 2.31 KB
/
alerts.libsonnet
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
{
prometheusAlerts+:: {
groups+: [
{
name: 'promtail_alerts',
rules: [
{
alert: 'PromtailRequestsErrors',
expr: |||
100 * sum(rate(promtail_request_duration_seconds_count{status_code=~"5..|failed"}[1m])) by (namespace, job, route, instance)
/
sum(rate(promtail_request_duration_seconds_count[1m])) by (namespace, job, route, instance)
> 10
|||,
'for': '15m',
labels: {
severity: 'critical',
},
annotations: {
message: |||
{{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}% errors.
|||,
},
},
{
alert: 'PromtailRequestLatency',
expr: |||
job_status_code:promtail_request_duration_seconds:99quantile > 1
|||,
'for': '15m',
labels: {
severity: 'critical',
},
annotations: {
message: |||
{{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}s 99th percentile latency.
|||,
},
},
{
alert: 'PromtailFileLagging',
expr: |||
abs(promtail_file_bytes_total - promtail_read_bytes_total) > 1e6
|||,
'for': '15m',
labels: {
severity: 'warning',
},
annotations: {
message: |||
{{ $labels.instance }} {{ $labels.job }} {{ $labels.path }} has been lagging by more than 1MB for more than 15m.
|||,
},
},
{
alert: 'PromtailFileMissing',
expr: |||
count by (path,instance,job) (promtail_file_bytes_total) unless count by (path,instance,job) (promtail_read_bytes_total)
|||,
'for': '15m',
labels: {
severity: 'critical',
},
annotations: {
message: |||
{{ $labels.instance }} {{ $labels.job }} {{ $labels.path }} matches the glob but is not being tailed.
|||,
},
},
],
},
],
},
}