diff --git a/jsonnet/kube-prometheus/alerts/alertmanager.libsonnet b/jsonnet/kube-prometheus/alerts/alertmanager.libsonnet index c8aba879c33dfbc7286292cdc1671e61cec9a23a..42ae3f98353e23d5d4dbc527ff3d17b7a4018220 100644 --- a/jsonnet/kube-prometheus/alerts/alertmanager.libsonnet +++ b/jsonnet/kube-prometheus/alerts/alertmanager.libsonnet @@ -30,6 +30,21 @@ severity: 'warning', }, }, + { + alert:'AlertmanagerMembersInconsistent', + annotations:{ + message: 'Alertmanager has not found all other members of the cluster.', + }, + expr: ||| + alertmanager_cluster_members{%(alertmanagerSelector)s} + != on (service) + count by (service) (alertmanager_cluster_members{%(alertmanagerSelector)s}) + ||| % $._config, + 'for': '5m', + labels: { + severity: 'critical', + }, + }, ], }, ], diff --git a/jsonnet/kube-prometheus/alerts/tests.yaml b/jsonnet/kube-prometheus/alerts/tests.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8cfc3aa7c648c6a273803a7f7ef52f4e1079d8a2 --- /dev/null +++ b/jsonnet/kube-prometheus/alerts/tests.yaml @@ -0,0 +1,44 @@ +# TODO(metalmatze): This file is temporarily saved here for later reference +# until we find out how to integrate the tests into our jsonnet stack. + +rule_files: + - rules.yaml + +evaluation_interval: 1m + +tests: + - interval: 1m + input_series: + - series: 'alertmanager_cluster_members{job="alertmanager-main",instance="10.10.10.0",namespace="monitoring",pod="alertmanager-main-0",service="alertmanager-main"}' + values: '3 3 3 3 3 2 2 2 2 2 2 1 1 1 1 1 1 0 0 0 0 0 0' + - series: 'alertmanager_cluster_members{job="alertmanager-main",instance="10.10.10.1",namespace="monitoring",pod="alertmanager-main-1",service="alertmanager-main"}' + values: '3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3' + - series: 'alertmanager_cluster_members{job="alertmanager-main",instance="10.10.10.2",namespace="monitoring",pod="alertmanager-main-2",service="alertmanager-main"}' + values: '3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3' + alert_rule_test: + - eval_time: 5m + alertname: AlertmanagerMembersInconsistent + - eval_time: 11m + alertname: AlertmanagerMembersInconsistent + exp_alerts: + - exp_labels: + service: 'alertmanager-main' + severity: critical + exp_annotations: + message: 'Alertmanager has not found all other members of the cluster.' + - eval_time: 17m + alertname: AlertmanagerMembersInconsistent + exp_alerts: + - exp_labels: + service: 'alertmanager-main' + severity: critical + exp_annotations: + message: 'Alertmanager has not found all other members of the cluster.' + - eval_time: 23m + alertname: AlertmanagerMembersInconsistent + exp_alerts: + - exp_labels: + service: 'alertmanager-main' + severity: critical + exp_annotations: + message: 'Alertmanager has not found all other members of the cluster.'