diff --git a/jsonnet/kube-prometheus/alerts/alertmanager.libsonnet b/jsonnet/kube-prometheus/alerts/alertmanager.libsonnet index bda69d00f3dde050a1b458412a2a8e8a341c2118..bcabf4d9fce152864c3f43be051647c63341b117 100644 --- a/jsonnet/kube-prometheus/alerts/alertmanager.libsonnet +++ b/jsonnet/kube-prometheus/alerts/alertmanager.libsonnet @@ -7,10 +7,15 @@ { alert: 'AlertmanagerConfigInconsistent', annotations: { - message: 'The configuration of the instances of the Alertmanager cluster `{{$labels.service}}` are out of sync.', + message: ||| + The configuration of the instances of the Alertmanager cluster `{{ $labels.namespace }}/{{ $labels.service }}` are out of sync. + {{ range printf "alertmanager_config_hash{namespace=\"%s\",service=\"%s\"}" $labels.namespace $labels.service | query }} + Configuration hash for pod {{ .Labels.pod }} is "{{ printf "%.f" .Value }}" + {{ end }} + |||, }, expr: ||| - count_values("config_hash", alertmanager_config_hash{%(alertmanagerSelector)s}) BY (service) / ON(service) GROUP_LEFT() label_replace(max(prometheus_operator_spec_replicas{%(prometheusOperatorSelector)s,controller="alertmanager"}) by (name, job, namespace, controller), "service", "alertmanager-$1", "name", "(.*)") != 1 + count by(namespace,service) (count_values by(namespace,service) ("config_hash", alertmanager_config_hash{%(alertmanagerSelector)s})) != 1 ||| % $._config, 'for': '5m', labels: { diff --git a/manifests/prometheus-rules.yaml b/manifests/prometheus-rules.yaml index 141fc8ab15568415515838a17c74ff00d1c35610..ef5a4dae66928d1903710b684f2f4f60af37b962 100644 --- a/manifests/prometheus-rules.yaml +++ b/manifests/prometheus-rules.yaml @@ -1776,10 +1776,13 @@ spec: rules: - alert: AlertmanagerConfigInconsistent annotations: - message: The configuration of the instances of the Alertmanager cluster `{{$labels.service}}` - are out of sync. + message: | + The configuration of the instances of the Alertmanager cluster `{{ $labels.namespace }}/{{ $labels.service }}` are out of sync. + {{ range printf "alertmanager_config_hash{namespace=\"%s\",service=\"%s\"}" $labels.namespace $labels.service | query }} + Configuration hash for pod {{ .Labels.pod }} is "{{ printf "%.f" .Value }}" + {{ end }} expr: | - count_values("config_hash", alertmanager_config_hash{job="alertmanager-main",namespace="monitoring"}) BY (service) / ON(service) GROUP_LEFT() label_replace(max(prometheus_operator_spec_replicas{job="prometheus-operator",namespace="monitoring",controller="alertmanager"}) by (name, job, namespace, controller), "service", "alertmanager-$1", "name", "(.*)") != 1 + count by(namespace,service) (count_values by(namespace,service) ("config_hash", alertmanager_config_hash{job="alertmanager-main",namespace="monitoring"})) != 1 for: 5m labels: severity: critical