diff --git a/jsonnet/kube-prometheus/alerts/alerts.libsonnet b/jsonnet/kube-prometheus/alerts/alerts.libsonnet index 3521aa824bb65b0d2d2952237dcfdbd836c42ace..adc461303474c64a86543b64abd6d83c4e9cd04b 100644 --- a/jsonnet/kube-prometheus/alerts/alerts.libsonnet +++ b/jsonnet/kube-prometheus/alerts/alerts.libsonnet @@ -1,4 +1,3 @@ (import 'alertmanager.libsonnet') + (import 'general.libsonnet') + -(import 'node.libsonnet') + -(import 'prometheus-operator.libsonnet') +(import 'node.libsonnet') diff --git a/jsonnet/kube-prometheus/alerts/prometheus-operator.libsonnet b/jsonnet/kube-prometheus/alerts/prometheus-operator.libsonnet deleted file mode 100644 index 731994a2fba904c9961dab1d78a2b5f28ef68a2a..0000000000000000000000000000000000000000 --- a/jsonnet/kube-prometheus/alerts/prometheus-operator.libsonnet +++ /dev/null @@ -1,63 +0,0 @@ -{ - prometheusAlerts+:: { - groups+: [ - { - name: 'prometheus-operator', - rules: [ - { - alert: 'PrometheusOperatorListErrors', - expr: ||| - (sum by (controller,namespace) (rate(prometheus_operator_list_operations_failed_total{%(prometheusOperatorSelector)s}[10m])) / sum by (controller,namespace) (rate(prometheus_operator_list_operations_total{%(prometheusOperatorSelector)s}[10m]))) > 0.4 - ||| % $._config, - labels: { - severity: 'warning', - }, - annotations: { - message: 'Errors while performing List operations in controller {{$labels.controller}} in {{$labels.namespace}} namespace.', - }, - 'for': '15m', - }, - { - alert: 'PrometheusOperatorWatchErrors', - expr: ||| - (sum by (controller,namespace) (rate(prometheus_operator_watch_operations_failed_total{%(prometheusOperatorSelector)s}[10m])) / sum by (controller,namespace) (rate(prometheus_operator_watch_operations_total{%(prometheusOperatorSelector)s}[10m]))) > 0.4 - ||| % $._config, - labels: { - severity: 'warning', - }, - annotations: { - message: 'Errors while performing Watch operations in controller {{$labels.controller}} in {{$labels.namespace}} namespace.', - }, - 'for': '15m', - }, - { - alert: 'PrometheusOperatorReconcileErrors', - expr: ||| - rate(prometheus_operator_reconcile_errors_total{%(prometheusOperatorSelector)s}[5m]) > 0.1 - ||| % $._config, - labels: { - severity: 'warning', - }, - annotations: { - message: 'Errors while reconciling {{ $labels.controller }} in {{ $labels.namespace }} Namespace.', - }, - 'for': '10m', - }, - { - alert: 'PrometheusOperatorNodeLookupErrors', - expr: ||| - rate(prometheus_operator_node_address_lookup_errors_total{%(prometheusOperatorSelector)s}[5m]) > 0.1 - ||| % $._config, - labels: { - severity: 'warning', - }, - annotations: { - message: 'Errors while reconciling Prometheus in {{ $labels.namespace }} Namespace.', - }, - 'for': '10m', - }, - ], - }, - ], - }, -} diff --git a/jsonnet/kube-prometheus/jsonnetfile.json b/jsonnet/kube-prometheus/jsonnetfile.json index 30e7f660b8910c119767acfefacaa77238d779e2..4da94526a3ee99e7c45a2aea31214847fe80085f 100644 --- a/jsonnet/kube-prometheus/jsonnetfile.json +++ b/jsonnet/kube-prometheus/jsonnetfile.json @@ -28,6 +28,15 @@ }, "version": "release-0.42" }, + { + "source": { + "git": { + "remote": "https://github.com/prometheus-operator/prometheus-operator", + "subdir": "jsonnet/mixin" + } + }, + "version": "master" + }, { "source": { "git": { diff --git a/jsonnet/kube-prometheus/kube-prometheus.libsonnet b/jsonnet/kube-prometheus/kube-prometheus.libsonnet index 1670ca5ddbba54b44bc1d6675b17a01c11ef47ae..1bdcf4f6c1b38e7e02d2f9a312af5044a445af0c 100644 --- a/jsonnet/kube-prometheus/kube-prometheus.libsonnet +++ b/jsonnet/kube-prometheus/kube-prometheus.libsonnet @@ -10,6 +10,7 @@ local kubeRbacProxyContainer = import './kube-rbac-proxy/container.libsonnet'; (import 'github.com/prometheus/node_exporter/docs/node-mixin/mixin.libsonnet') + (import './alertmanager/alertmanager.libsonnet') + (import 'github.com/prometheus-operator/prometheus-operator/jsonnet/prometheus-operator/prometheus-operator.libsonnet') + +(import 'github.com/prometheus-operator/prometheus-operator/jsonnet/mixin/mixin.libsonnet') + (import './prometheus/prometheus.libsonnet') + (import './prometheus-adapter/prometheus-adapter.libsonnet') + (import 'github.com/kubernetes-monitoring/kubernetes-mixin/mixin.libsonnet') + diff --git a/jsonnetfile.lock.json b/jsonnetfile.lock.json index 6c0add96906f087d001541a0e5a8760ecc903454..77843e0fb2d2d0f724891194dbcb758645f182d1 100644 --- a/jsonnetfile.lock.json +++ b/jsonnetfile.lock.json @@ -4,7 +4,7 @@ { "source": { "git": { - "remote": "https://github.com/brancz/kubernetes-grafana.git", + "remote": "https://github.com/brancz/kubernetes-grafana", "subdir": "grafana" } }, @@ -14,7 +14,7 @@ { "source": { "git": { - "remote": "https://github.com/etcd-io/etcd.git", + "remote": "https://github.com/etcd-io/etcd", "subdir": "Documentation/etcd-mixin" } }, @@ -24,7 +24,7 @@ { "source": { "git": { - "remote": "https://github.com/grafana/grafonnet-lib.git", + "remote": "https://github.com/grafana/grafonnet-lib", "subdir": "grafonnet" } }, @@ -34,7 +34,7 @@ { "source": { "git": { - "remote": "https://github.com/grafana/jsonnet-libs.git", + "remote": "https://github.com/grafana/jsonnet-libs", "subdir": "grafana-builder" } }, @@ -44,7 +44,7 @@ { "source": { "git": { - "remote": "https://github.com/ksonnet/ksonnet-lib.git", + "remote": "https://github.com/ksonnet/ksonnet-lib", "subdir": "" } }, @@ -55,7 +55,7 @@ { "source": { "git": { - "remote": "https://github.com/kubernetes-monitoring/kubernetes-mixin.git", + "remote": "https://github.com/kubernetes-monitoring/kubernetes-mixin", "subdir": "" } }, @@ -65,7 +65,7 @@ { "source": { "git": { - "remote": "https://github.com/kubernetes-monitoring/kubernetes-mixin.git", + "remote": "https://github.com/kubernetes-monitoring/kubernetes-mixin", "subdir": "lib/promgrafonnet" } }, @@ -75,7 +75,7 @@ { "source": { "git": { - "remote": "https://github.com/kubernetes/kube-state-metrics.git", + "remote": "https://github.com/kubernetes/kube-state-metrics", "subdir": "jsonnet/kube-state-metrics" } }, @@ -85,7 +85,7 @@ { "source": { "git": { - "remote": "https://github.com/kubernetes/kube-state-metrics.git", + "remote": "https://github.com/kubernetes/kube-state-metrics", "subdir": "jsonnet/kube-state-metrics-mixin" } }, @@ -95,7 +95,17 @@ { "source": { "git": { - "remote": "https://github.com/prometheus-operator/prometheus-operator.git", + "remote": "https://github.com/prometheus-operator/prometheus-operator", + "subdir": "jsonnet/mixin" + } + }, + "version": "64863c320adddf1ab7da9cd0af1bea6978f9a72b", + "sum": "vqz67twCROf5kVgo/61luBOx25Mk7Okbt8YP+/7xjT0=" + }, + { + "source": { + "git": { + "remote": "https://github.com/prometheus-operator/prometheus-operator", "subdir": "jsonnet/prometheus-operator" } }, @@ -105,7 +115,7 @@ { "source": { "git": { - "remote": "https://github.com/prometheus/node_exporter.git", + "remote": "https://github.com/prometheus/node_exporter", "subdir": "docs/node-mixin" } }, @@ -115,7 +125,7 @@ { "source": { "git": { - "remote": "https://github.com/prometheus/prometheus.git", + "remote": "https://github.com/prometheus/prometheus", "subdir": "documentation/prometheus-mixin" } }, diff --git a/manifests/prometheus-rules.yaml b/manifests/prometheus-rules.yaml index 166f21596af3281a1d7a630c2bea792b613396be..0c7e5f250dc454fc06713beefd330eacd1d0212c 100644 --- a/manifests/prometheus-rules.yaml +++ b/manifests/prometheus-rules.yaml @@ -1044,6 +1044,53 @@ spec: node_md_disks{state="fail"} > 0 labels: severity: warning + - name: prometheus-operator + rules: + - alert: PrometheusOperatorListErrors + annotations: + description: Errors while performing List operations in controller {{$labels.controller}} + in {{$labels.namespace}} namespace. + runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-prometheusoperatorlisterrors + summary: Errors while performing list operations in controller. + expr: | + (sum by (controller,namespace) (rate(prometheus_operator_list_operations_failed_total{job="prometheus-operator",namespace="monitoring"}[10m])) / sum by (controller,namespace) (rate(prometheus_operator_list_operations_total{job="prometheus-operator",namespace="monitoring"}[10m]))) > 0.4 + for: 15m + labels: + severity: warning + - alert: PrometheusOperatorWatchErrors + annotations: + description: Errors while performing watch operations in controller {{$labels.controller}} + in {{$labels.namespace}} namespace. + runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-prometheusoperatorwatcherrors + summary: Errors while performing watch operations in controller. + expr: | + (sum by (controller,namespace) (rate(prometheus_operator_watch_operations_failed_total{job="prometheus-operator",namespace="monitoring"}[10m])) / sum by (controller,namespace) (rate(prometheus_operator_watch_operations_total{job="prometheus-operator",namespace="monitoring"}[10m]))) > 0.4 + for: 15m + labels: + severity: warning + - alert: PrometheusOperatorReconcileErrors + annotations: + description: '{{ $value | humanizePercentage }} of reconciling operations + failed for {{ $labels.controller }} controller in {{ $labels.namespace }} + namespace.' + runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-prometheusoperatorreconcileerrors + summary: Errors while reconciling controller. + expr: | + (sum by (controller,namespace) (rate(prometheus_operator_reconcile_errors_total{job="prometheus-operator",namespace="monitoring"}[5m]))) / (sum by (controller,namespace) (rate(prometheus_operator_reconcile_operations_total{job="prometheus-operator",namespace="monitoring"}[5m]))) > 0.1 + for: 10m + labels: + severity: warning + - alert: PrometheusOperatorNodeLookupErrors + annotations: + description: Errors while reconciling Prometheus in {{ $labels.namespace }} + Namespace. + runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-prometheusoperatornodelookuperrors + summary: Errors while reconciling Prometheus. + expr: | + rate(prometheus_operator_node_address_lookup_errors_total{job="prometheus-operator",namespace="monitoring"}[5m]) > 0.1 + for: 10m + labels: + severity: warning - name: kubernetes-apps rules: - alert: KubePodCrashLooping @@ -2031,40 +2078,3 @@ spec: for: 2m labels: severity: warning - - name: prometheus-operator - rules: - - alert: PrometheusOperatorListErrors - annotations: - message: Errors while performing List operations in controller {{$labels.controller}} - in {{$labels.namespace}} namespace. - expr: | - (sum by (controller,namespace) (rate(prometheus_operator_list_operations_failed_total{job="prometheus-operator",namespace="monitoring"}[10m])) / sum by (controller,namespace) (rate(prometheus_operator_list_operations_total{job="prometheus-operator",namespace="monitoring"}[10m]))) > 0.4 - for: 15m - labels: - severity: warning - - alert: PrometheusOperatorWatchErrors - annotations: - message: Errors while performing Watch operations in controller {{$labels.controller}} - in {{$labels.namespace}} namespace. - expr: | - (sum by (controller,namespace) (rate(prometheus_operator_watch_operations_failed_total{job="prometheus-operator",namespace="monitoring"}[10m])) / sum by (controller,namespace) (rate(prometheus_operator_watch_operations_total{job="prometheus-operator",namespace="monitoring"}[10m]))) > 0.4 - for: 15m - labels: - severity: warning - - alert: PrometheusOperatorReconcileErrors - annotations: - message: Errors while reconciling {{ $labels.controller }} in {{ $labels.namespace - }} Namespace. - expr: | - rate(prometheus_operator_reconcile_errors_total{job="prometheus-operator",namespace="monitoring"}[5m]) > 0.1 - for: 10m - labels: - severity: warning - - alert: PrometheusOperatorNodeLookupErrors - annotations: - message: Errors while reconciling Prometheus in {{ $labels.namespace }} Namespace. - expr: | - rate(prometheus_operator_node_address_lookup_errors_total{job="prometheus-operator",namespace="monitoring"}[5m]) > 0.1 - for: 10m - labels: - severity: warning