Skip to content
Snippets Groups Projects
Unverified Commit b55c2825 authored by Frederic Branczyk's avatar Frederic Branczyk Committed by GitHub
Browse files

Merge pull request #610 from lilic/add-more-alerts

Add PrometheusOperatorListErrors and fix PrometheusOperatorWatchErrors threshold 
parents 1917a572 d88cb263
No related branches found
No related tags found
No related merge requests found
...@@ -4,16 +4,29 @@ ...@@ -4,16 +4,29 @@
{ {
name: 'prometheus-operator', name: 'prometheus-operator',
rules: [ rules: [
{
alert: 'PrometheusOperatorListErrors',
expr: |||
(sum by (controller,namespace) (rate(prometheus_operator_list_operations_failed_total{%(prometheusOperatorSelector)s}[1h])) / sum by (controller,namespace) (rate(prometheus_operator_list_operations_total{%(prometheusOperatorSelector)s}[1h]))) > 0.4
||| % $._config,
labels: {
severity: 'warning',
},
annotations: {
message: 'Errors while performing List operations in controller {{$labels.controller}} in {{$labels.namespace}} namespace.',
},
'for': '15m',
},
{ {
alert: 'PrometheusOperatorWatchErrors', alert: 'PrometheusOperatorWatchErrors',
expr: ||| expr: |||
(sum by (controller,namespace) (rate(prometheus_operator_watch_operations_failed_total{%(prometheusOperatorSelector)s}[1h])) / sum by (controller,namespace) (rate(prometheus_operator_watch_operations_total{%(prometheusOperatorSelector)s}[1h]))) > 0.1 (sum by (controller,namespace) (rate(prometheus_operator_watch_operations_failed_total{%(prometheusOperatorSelector)s}[1h])) / sum by (controller,namespace) (rate(prometheus_operator_watch_operations_total{%(prometheusOperatorSelector)s}[1h]))) > 0.4
||| % $._config, ||| % $._config,
labels: { labels: {
severity: 'warning', severity: 'warning',
}, },
annotations: { annotations: {
message: 'Errors while performing watch operations in controller {{$labels.controller}} in {{$labels.namespace}} namespace.', message: 'Errors while performing Watch operations in controller {{$labels.controller}} in {{$labels.namespace}} namespace.',
}, },
'for': '15m', 'for': '15m',
}, },
......
...@@ -1793,12 +1793,21 @@ spec: ...@@ -1793,12 +1793,21 @@ spec:
severity: warning severity: warning
- name: prometheus-operator - name: prometheus-operator
rules: rules:
- alert: PrometheusOperatorListErrors
annotations:
message: Errors while performing List operations in controller {{$labels.controller}}
in {{$labels.namespace}} namespace.
expr: |
(sum by (controller,namespace) (rate(prometheus_operator_list_operations_failed_total{job="prometheus-operator",namespace="monitoring"}[1h])) / sum by (controller,namespace) (rate(prometheus_operator_list_operations_total{job="prometheus-operator",namespace="monitoring"}[1h]))) > 0.4
for: 15m
labels:
severity: warning
- alert: PrometheusOperatorWatchErrors - alert: PrometheusOperatorWatchErrors
annotations: annotations:
message: Errors while performing watch operations in controller {{$labels.controller}} message: Errors while performing Watch operations in controller {{$labels.controller}}
in {{$labels.namespace}} namespace. in {{$labels.namespace}} namespace.
expr: | expr: |
(sum by (controller,namespace) (rate(prometheus_operator_watch_operations_failed_total{job="prometheus-operator",namespace="monitoring"}[1h])) / sum by (controller,namespace) (rate(prometheus_operator_watch_operations_total{job="prometheus-operator",namespace="monitoring"}[1h]))) > 0.1 (sum by (controller,namespace) (rate(prometheus_operator_watch_operations_failed_total{job="prometheus-operator",namespace="monitoring"}[1h])) / sum by (controller,namespace) (rate(prometheus_operator_watch_operations_total{job="prometheus-operator",namespace="monitoring"}[1h]))) > 0.4
for: 15m for: 15m
labels: labels:
severity: warning severity: warning
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment