From 49835437f784c28a6c34a5f69bc2b79907d9ff80 Mon Sep 17 00:00:00 2001 From: Matthias Loibl <mail@matthiasloibl.com> Date: Fri, 26 Oct 2018 16:39:39 +0200 Subject: [PATCH] Add AlertmanagerMembersInconsistent alerting rule --- .../alerts/alertmanager.libsonnet | 15 +++++++ jsonnet/kube-prometheus/alerts/tests.yaml | 44 +++++++++++++++++++ 2 files changed, 59 insertions(+) create mode 100644 jsonnet/kube-prometheus/alerts/tests.yaml diff --git a/jsonnet/kube-prometheus/alerts/alertmanager.libsonnet b/jsonnet/kube-prometheus/alerts/alertmanager.libsonnet index c8aba879..42ae3f98 100644 --- a/jsonnet/kube-prometheus/alerts/alertmanager.libsonnet +++ b/jsonnet/kube-prometheus/alerts/alertmanager.libsonnet @@ -30,6 +30,21 @@ severity: 'warning', }, }, + { + alert:'AlertmanagerMembersInconsistent', + annotations:{ + message: 'Alertmanager has not found all other members of the cluster.', + }, + expr: ||| + alertmanager_cluster_members{%(alertmanagerSelector)s} + != on (service) + count by (service) (alertmanager_cluster_members{%(alertmanagerSelector)s}) + ||| % $._config, + 'for': '5m', + labels: { + severity: 'critical', + }, + }, ], }, ], diff --git a/jsonnet/kube-prometheus/alerts/tests.yaml b/jsonnet/kube-prometheus/alerts/tests.yaml new file mode 100644 index 00000000..8cfc3aa7 --- /dev/null +++ b/jsonnet/kube-prometheus/alerts/tests.yaml @@ -0,0 +1,44 @@ +# TODO(metalmatze): This file is temporarily saved here for later reference +# until we find out how to integrate the tests into our jsonnet stack. + +rule_files: + - rules.yaml + +evaluation_interval: 1m + +tests: + - interval: 1m + input_series: + - series: 'alertmanager_cluster_members{job="alertmanager-main",instance="10.10.10.0",namespace="monitoring",pod="alertmanager-main-0",service="alertmanager-main"}' + values: '3 3 3 3 3 2 2 2 2 2 2 1 1 1 1 1 1 0 0 0 0 0 0' + - series: 'alertmanager_cluster_members{job="alertmanager-main",instance="10.10.10.1",namespace="monitoring",pod="alertmanager-main-1",service="alertmanager-main"}' + values: '3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3' + - series: 'alertmanager_cluster_members{job="alertmanager-main",instance="10.10.10.2",namespace="monitoring",pod="alertmanager-main-2",service="alertmanager-main"}' + values: '3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3' + alert_rule_test: + - eval_time: 5m + alertname: AlertmanagerMembersInconsistent + - eval_time: 11m + alertname: AlertmanagerMembersInconsistent + exp_alerts: + - exp_labels: + service: 'alertmanager-main' + severity: critical + exp_annotations: + message: 'Alertmanager has not found all other members of the cluster.' + - eval_time: 17m + alertname: AlertmanagerMembersInconsistent + exp_alerts: + - exp_labels: + service: 'alertmanager-main' + severity: critical + exp_annotations: + message: 'Alertmanager has not found all other members of the cluster.' + - eval_time: 23m + alertname: AlertmanagerMembersInconsistent + exp_alerts: + - exp_labels: + service: 'alertmanager-main' + severity: critical + exp_annotations: + message: 'Alertmanager has not found all other members of the cluster.' -- GitLab