From 49835437f784c28a6c34a5f69bc2b79907d9ff80 Mon Sep 17 00:00:00 2001
From: Matthias Loibl <mail@matthiasloibl.com>
Date: Fri, 26 Oct 2018 16:39:39 +0200
Subject: [PATCH] Add AlertmanagerMembersInconsistent alerting rule

---
 .../alerts/alertmanager.libsonnet             | 15 +++++++
 jsonnet/kube-prometheus/alerts/tests.yaml     | 44 +++++++++++++++++++
 2 files changed, 59 insertions(+)
 create mode 100644 jsonnet/kube-prometheus/alerts/tests.yaml

diff --git a/jsonnet/kube-prometheus/alerts/alertmanager.libsonnet b/jsonnet/kube-prometheus/alerts/alertmanager.libsonnet
index c8aba879..42ae3f98 100644
--- a/jsonnet/kube-prometheus/alerts/alertmanager.libsonnet
+++ b/jsonnet/kube-prometheus/alerts/alertmanager.libsonnet
@@ -30,6 +30,21 @@
               severity: 'warning',
             },
           },
+          {
+            alert:'AlertmanagerMembersInconsistent',
+            annotations:{
+              message: 'Alertmanager has not found all other members of the cluster.',
+            },
+            expr: |||
+              alertmanager_cluster_members{%(alertmanagerSelector)s}
+                != on (service)
+              count by (service) (alertmanager_cluster_members{%(alertmanagerSelector)s})
+            ||| % $._config,
+            'for': '5m',
+            labels: {
+              severity: 'critical',
+            },
+          },
         ],
       },
     ],
diff --git a/jsonnet/kube-prometheus/alerts/tests.yaml b/jsonnet/kube-prometheus/alerts/tests.yaml
new file mode 100644
index 00000000..8cfc3aa7
--- /dev/null
+++ b/jsonnet/kube-prometheus/alerts/tests.yaml
@@ -0,0 +1,44 @@
+# TODO(metalmatze): This file is temporarily saved here for later reference
+# until we find out how to integrate the tests into our jsonnet stack.
+
+rule_files:
+  - rules.yaml
+
+evaluation_interval: 1m
+
+tests:
+  - interval: 1m
+    input_series:
+      - series: 'alertmanager_cluster_members{job="alertmanager-main",instance="10.10.10.0",namespace="monitoring",pod="alertmanager-main-0",service="alertmanager-main"}'
+        values: '3 3 3 3 3 2 2 2 2 2 2 1 1 1 1 1 1 0 0 0 0 0 0'
+      - series: 'alertmanager_cluster_members{job="alertmanager-main",instance="10.10.10.1",namespace="monitoring",pod="alertmanager-main-1",service="alertmanager-main"}'
+        values: '3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3'
+      - series: 'alertmanager_cluster_members{job="alertmanager-main",instance="10.10.10.2",namespace="monitoring",pod="alertmanager-main-2",service="alertmanager-main"}'
+        values: '3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3'
+    alert_rule_test:
+      - eval_time: 5m
+        alertname: AlertmanagerMembersInconsistent
+      - eval_time: 11m
+        alertname: AlertmanagerMembersInconsistent
+        exp_alerts:
+          - exp_labels:
+              service: 'alertmanager-main'
+              severity: critical
+            exp_annotations:
+              message: 'Alertmanager has not found all other members of the cluster.'
+      - eval_time: 17m
+        alertname: AlertmanagerMembersInconsistent
+        exp_alerts:
+          - exp_labels:
+              service: 'alertmanager-main'
+              severity: critical
+            exp_annotations:
+              message: 'Alertmanager has not found all other members of the cluster.'
+      - eval_time: 23m
+        alertname: AlertmanagerMembersInconsistent
+        exp_alerts:
+          - exp_labels:
+              service: 'alertmanager-main'
+              severity: critical
+            exp_annotations:
+              message: 'Alertmanager has not found all other members of the cluster.'
-- 
GitLab