From acd8924d5789286864418ca17d9ba72b2aac5a13 Mon Sep 17 00:00:00 2001 From: Matthias Loibl <mail@matthiasloibl.com> Date: Mon, 8 Oct 2018 15:29:18 +0200 Subject: [PATCH] Add triggered_total metric to alertmanager controller Update client_golang for wrappable registerer --- .../alerts/alertmanager.libsonnet | 2 +- .../alerts/prometheus-operator.libsonnet | 19 +++---------------- jsonnetfile.lock.json | 2 +- manifests/prometheus-rules.yaml | 18 +++++------------- 4 files changed, 10 insertions(+), 31 deletions(-) diff --git a/jsonnet/kube-prometheus/alerts/alertmanager.libsonnet b/jsonnet/kube-prometheus/alerts/alertmanager.libsonnet index 87363b2b..c2e440c5 100644 --- a/jsonnet/kube-prometheus/alerts/alertmanager.libsonnet +++ b/jsonnet/kube-prometheus/alerts/alertmanager.libsonnet @@ -10,7 +10,7 @@ message: 'The configuration of the instances of the Alertmanager cluster `{{$labels.service}}` are out of sync.', }, expr: ||| - count_values("config_hash", alertmanager_config_hash{%(alertmanagerSelector)s}) BY (service) / ON(service) GROUP_LEFT() label_replace(prometheus_operator_alertmanager_spec_replicas{%(prometheusOperatorSelector)s}, "service", "alertmanager-$1", "alertmanager", "(.*)") != 1 + count_values("config_hash", alertmanager_config_hash{%(alertmanagerSelector)s}) BY (service) / ON(service) GROUP_LEFT() label_replace(prometheus_operator_spec_replicas{%(prometheusOperatorSelector)s}, "service", "alertmanager-$1", "alertmanager", "(.*)") != 1 ||| % $._config, 'for': '5m', labels: { diff --git a/jsonnet/kube-prometheus/alerts/prometheus-operator.libsonnet b/jsonnet/kube-prometheus/alerts/prometheus-operator.libsonnet index f851caa0..a430c505 100644 --- a/jsonnet/kube-prometheus/alerts/prometheus-operator.libsonnet +++ b/jsonnet/kube-prometheus/alerts/prometheus-operator.libsonnet @@ -5,28 +5,15 @@ name: 'prometheus-operator', rules: [ { - alert: 'PrometheusOperatorAlertmanagerReconcileErrors', + alert: 'PrometheusOperatorReconcileErrors', expr: ||| - rate(prometheus_operator_alertmanager_reconcile_errors_total{%(prometheusOperatorSelector)s}[5m]) > 0.1 + rate(prometheus_operator_reconcile_errors_total{%(prometheusOperatorSelector)s}[5m]) > 0.1 ||| % $._config, labels: { severity: 'warning', }, annotations: { - message: 'Errors while reconciling Alertmanager in {{ $labels.namespace }} Namespace.', - }, - 'for': '10m', - }, - { - alert: 'PrometheusOperatorPrometheusReconcileErrors', - expr: ||| - rate(prometheus_operator_prometheus_reconcile_errors_total{%(prometheusOperatorSelector)s}[5m]) > 0.1 - ||| % $._config, - labels: { - severity: 'warning', - }, - annotations: { - message: 'Errors while reconciling Prometheus in {{ $labels.namespace }} Namespace.', + message: 'Errors while reconciling {{ $labels.controller }} in {{ $labels.namespace }} Namespace.', }, 'for': '10m', }, diff --git a/jsonnetfile.lock.json b/jsonnetfile.lock.json index 443c00a7..8ed8c60e 100644 --- a/jsonnetfile.lock.json +++ b/jsonnetfile.lock.json @@ -8,7 +8,7 @@ "subdir": "contrib/kube-prometheus/jsonnet/kube-prometheus" } }, - "version": "049c48c931bfb3cd72efd313b7a47d2244456db0" + "version": "d874b5bc21649dd9d07ab42dd3bdea515038953e" }, { "name": "ksonnet", diff --git a/manifests/prometheus-rules.yaml b/manifests/prometheus-rules.yaml index db10ae31..519997c6 100644 --- a/manifests/prometheus-rules.yaml +++ b/manifests/prometheus-rules.yaml @@ -947,7 +947,7 @@ spec: message: The configuration of the instances of the Alertmanager cluster `{{$labels.service}}` are out of sync. expr: | - count_values("config_hash", alertmanager_config_hash{job="alertmanager-main"}) BY (service) / ON(service) GROUP_LEFT() label_replace(prometheus_operator_alertmanager_spec_replicas{job="prometheus-operator"}, "service", "alertmanager-$1", "alertmanager", "(.*)") != 1 + count_values("config_hash", alertmanager_config_hash{job="alertmanager-main"}) BY (service) / ON(service) GROUP_LEFT() label_replace(prometheus_operator_spec_replicas{job="prometheus-operator"}, "service", "alertmanager-$1", "alertmanager", "(.*)") != 1 for: 5m labels: severity: critical @@ -1099,20 +1099,12 @@ spec: severity: warning - name: prometheus-operator rules: - - alert: PrometheusOperatorAlertmanagerReconcileErrors + - alert: PrometheusOperatorReconcileErrors annotations: - message: Errors while reconciling Alertmanager in {{ $labels.namespace }} - Namespace. + message: Errors while reconciling {{ $labels.controller }} in {{ $labels.namespace + }} Namespace. expr: | - rate(prometheus_operator_alertmanager_reconcile_errors_total{job="prometheus-operator"}[5m]) > 0.1 - for: 10m - labels: - severity: warning - - alert: PrometheusOperatorPrometheusReconcileErrors - annotations: - message: Errors while reconciling Prometheus in {{ $labels.namespace }} Namespace. - expr: | - rate(prometheus_operator_prometheus_reconcile_errors_total{job="prometheus-operator"}[5m]) > 0.1 + rate(prometheus_operator_reconcile_errors_total{job="prometheus-operator"}[5m]) > 0.1 for: 10m labels: severity: warning -- GitLab