Skip to content
Snippets Groups Projects
Commit acd8924d authored by Matthias Loibl's avatar Matthias Loibl
Browse files

Add triggered_total metric to alertmanager controller

Update client_golang for wrappable registerer
parent f855d9ca
No related branches found
No related tags found
No related merge requests found
...@@ -10,7 +10,7 @@ ...@@ -10,7 +10,7 @@
message: 'The configuration of the instances of the Alertmanager cluster `{{$labels.service}}` are out of sync.', message: 'The configuration of the instances of the Alertmanager cluster `{{$labels.service}}` are out of sync.',
}, },
expr: ||| expr: |||
count_values("config_hash", alertmanager_config_hash{%(alertmanagerSelector)s}) BY (service) / ON(service) GROUP_LEFT() label_replace(prometheus_operator_alertmanager_spec_replicas{%(prometheusOperatorSelector)s}, "service", "alertmanager-$1", "alertmanager", "(.*)") != 1 count_values("config_hash", alertmanager_config_hash{%(alertmanagerSelector)s}) BY (service) / ON(service) GROUP_LEFT() label_replace(prometheus_operator_spec_replicas{%(prometheusOperatorSelector)s}, "service", "alertmanager-$1", "alertmanager", "(.*)") != 1
||| % $._config, ||| % $._config,
'for': '5m', 'for': '5m',
labels: { labels: {
......
...@@ -5,28 +5,15 @@ ...@@ -5,28 +5,15 @@
name: 'prometheus-operator', name: 'prometheus-operator',
rules: [ rules: [
{ {
alert: 'PrometheusOperatorAlertmanagerReconcileErrors', alert: 'PrometheusOperatorReconcileErrors',
expr: ||| expr: |||
rate(prometheus_operator_alertmanager_reconcile_errors_total{%(prometheusOperatorSelector)s}[5m]) > 0.1 rate(prometheus_operator_reconcile_errors_total{%(prometheusOperatorSelector)s}[5m]) > 0.1
||| % $._config, ||| % $._config,
labels: { labels: {
severity: 'warning', severity: 'warning',
}, },
annotations: { annotations: {
message: 'Errors while reconciling Alertmanager in {{ $labels.namespace }} Namespace.', message: 'Errors while reconciling {{ $labels.controller }} in {{ $labels.namespace }} Namespace.',
},
'for': '10m',
},
{
alert: 'PrometheusOperatorPrometheusReconcileErrors',
expr: |||
rate(prometheus_operator_prometheus_reconcile_errors_total{%(prometheusOperatorSelector)s}[5m]) > 0.1
||| % $._config,
labels: {
severity: 'warning',
},
annotations: {
message: 'Errors while reconciling Prometheus in {{ $labels.namespace }} Namespace.',
}, },
'for': '10m', 'for': '10m',
}, },
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
"subdir": "contrib/kube-prometheus/jsonnet/kube-prometheus" "subdir": "contrib/kube-prometheus/jsonnet/kube-prometheus"
} }
}, },
"version": "049c48c931bfb3cd72efd313b7a47d2244456db0" "version": "d874b5bc21649dd9d07ab42dd3bdea515038953e"
}, },
{ {
"name": "ksonnet", "name": "ksonnet",
......
...@@ -947,7 +947,7 @@ spec: ...@@ -947,7 +947,7 @@ spec:
message: The configuration of the instances of the Alertmanager cluster `{{$labels.service}}` message: The configuration of the instances of the Alertmanager cluster `{{$labels.service}}`
are out of sync. are out of sync.
expr: | expr: |
count_values("config_hash", alertmanager_config_hash{job="alertmanager-main"}) BY (service) / ON(service) GROUP_LEFT() label_replace(prometheus_operator_alertmanager_spec_replicas{job="prometheus-operator"}, "service", "alertmanager-$1", "alertmanager", "(.*)") != 1 count_values("config_hash", alertmanager_config_hash{job="alertmanager-main"}) BY (service) / ON(service) GROUP_LEFT() label_replace(prometheus_operator_spec_replicas{job="prometheus-operator"}, "service", "alertmanager-$1", "alertmanager", "(.*)") != 1
for: 5m for: 5m
labels: labels:
severity: critical severity: critical
...@@ -1099,20 +1099,12 @@ spec: ...@@ -1099,20 +1099,12 @@ spec:
severity: warning severity: warning
- name: prometheus-operator - name: prometheus-operator
rules: rules:
- alert: PrometheusOperatorAlertmanagerReconcileErrors - alert: PrometheusOperatorReconcileErrors
annotations: annotations:
message: Errors while reconciling Alertmanager in {{ $labels.namespace }} message: Errors while reconciling {{ $labels.controller }} in {{ $labels.namespace
Namespace. }} Namespace.
expr: | expr: |
rate(prometheus_operator_alertmanager_reconcile_errors_total{job="prometheus-operator"}[5m]) > 0.1 rate(prometheus_operator_reconcile_errors_total{job="prometheus-operator"}[5m]) > 0.1
for: 10m
labels:
severity: warning
- alert: PrometheusOperatorPrometheusReconcileErrors
annotations:
message: Errors while reconciling Prometheus in {{ $labels.namespace }} Namespace.
expr: |
rate(prometheus_operator_prometheus_reconcile_errors_total{job="prometheus-operator"}[5m]) > 0.1
for: 10m for: 10m
labels: labels:
severity: warning severity: warning
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment