Skip to content
Snippets Groups Projects
Unverified Commit 6f488250 authored by Frederic Branczyk's avatar Frederic Branczyk Committed by GitHub
Browse files

Merge pull request #576 from simonpasquier/fix-alertmanager-config-inconsistent-alert

Fix AlertmanagerConfigInconsistent alert
parents 97ca4616 0a43e859
No related branches found
No related tags found
No related merge requests found
......@@ -7,10 +7,15 @@
{
alert: 'AlertmanagerConfigInconsistent',
annotations: {
message: 'The configuration of the instances of the Alertmanager cluster `{{$labels.service}}` are out of sync.',
message: |||
The configuration of the instances of the Alertmanager cluster `{{ $labels.namespace }}/{{ $labels.service }}` are out of sync.
{{ range printf "alertmanager_config_hash{namespace=\"%s\",service=\"%s\"}" $labels.namespace $labels.service | query }}
Configuration hash for pod {{ .Labels.pod }} is "{{ printf "%.f" .Value }}"
{{ end }}
|||,
},
expr: |||
count_values("config_hash", alertmanager_config_hash{%(alertmanagerSelector)s}) BY (service) / ON(service) GROUP_LEFT() label_replace(max(prometheus_operator_spec_replicas{%(prometheusOperatorSelector)s,controller="alertmanager"}) by (name, job, namespace, controller), "service", "alertmanager-$1", "name", "(.*)") != 1
count by(namespace,service) (count_values by(namespace,service) ("config_hash", alertmanager_config_hash{%(alertmanagerSelector)s})) != 1
||| % $._config,
'for': '5m',
labels: {
......
......@@ -1776,10 +1776,13 @@ spec:
rules:
- alert: AlertmanagerConfigInconsistent
annotations:
message: The configuration of the instances of the Alertmanager cluster `{{$labels.service}}`
are out of sync.
message: |
The configuration of the instances of the Alertmanager cluster `{{ $labels.namespace }}/{{ $labels.service }}` are out of sync.
{{ range printf "alertmanager_config_hash{namespace=\"%s\",service=\"%s\"}" $labels.namespace $labels.service | query }}
Configuration hash for pod {{ .Labels.pod }} is "{{ printf "%.f" .Value }}"
{{ end }}
expr: |
count_values("config_hash", alertmanager_config_hash{job="alertmanager-main",namespace="monitoring"}) BY (service) / ON(service) GROUP_LEFT() label_replace(max(prometheus_operator_spec_replicas{job="prometheus-operator",namespace="monitoring",controller="alertmanager"}) by (name, job, namespace, controller), "service", "alertmanager-$1", "name", "(.*)") != 1
count by(namespace,service) (count_values by(namespace,service) ("config_hash", alertmanager_config_hash{job="alertmanager-main",namespace="monitoring"})) != 1
for: 5m
labels:
severity: critical
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment