diff --git a/assets/prometheus/prometheus.yaml b/assets/prometheus/prometheus.yaml index d48d56483973dc1eef38f90abc80a7848f33bff7..08df4789771f3b48d8769d66e3e0305901bb5357 100644 --- a/assets/prometheus/prometheus.yaml +++ b/assets/prometheus/prometheus.yaml @@ -1,3 +1,22 @@ +alerting: + alertmanagers: + - kubernetes_sd_configs: + - role: endpoints + relabel_configs: + - action: keep + regex: alertmanager-main + source_labels: + - __meta_kubernetes_service_name + - action: keep + regex: monitoring + source_labels: + - __meta_kubernetes_namespace + - action: keep + regex: web + source_labels: + - __meta_kubernetes_endpoint_port_name + scheme: http + global: scrape_interval: 15s evaluation_interval: 15s diff --git a/assets/prometheus/rules/kubernetes.rules b/assets/prometheus/rules/kubernetes.rules index 216c0ccde6f71b762b2488dd22fe563136bb87ce..c0dddb92f652159f742b800945721001f05a9b61 100644 --- a/assets/prometheus/rules/kubernetes.rules +++ b/assets/prometheus/rules/kubernetes.rules @@ -286,18 +286,6 @@ ALERT K8SControllerManagerDown description = "There is no running K8S controller manager. Deployments and replication controllers are not making progress.", } -ALERT K8SMoreThanOneController - IF count by (job,cluster) (up{job=~"kube-scheduler|kube-controller-manager"}) > 1 - FOR 5m - LABELS { - service = "k8s", - severity = "critical", - } - ANNOTATIONS { - summary = "More than one controller node is active", - description = "There is more than one {{ $labels.job }} managing the cluster. Cluster behaviour is undefined.", - } - ALERT K8SConntrackTableFull IF 100*node_nf_conntrack_entries / node_nf_conntrack_entries_limit > 50 FOR 10m diff --git a/manifests/prometheus/prometheus-k8s-rules.yaml b/manifests/prometheus/prometheus-k8s-rules.yaml index c01a7f3e4856ae4fcbf44d2f2fc7dd8589418b33..6e83500ea122092eaea6d8b5f2b276109b910083 100644 --- a/manifests/prometheus/prometheus-k8s-rules.yaml +++ b/manifests/prometheus/prometheus-k8s-rules.yaml @@ -341,18 +341,6 @@ data: description = "There is no running K8S controller manager. Deployments and replication controllers are not making progress.", } - ALERT K8SMoreThanOneController - IF count by (job,cluster) (up{job=~"kube-scheduler|kube-controller-manager"}) > 1 - FOR 5m - LABELS { - service = "k8s", - severity = "critical", - } - ANNOTATIONS { - summary = "More than one controller node is active", - description = "There is more than one {{ $labels.job }} managing the cluster. Cluster behaviour is undefined.", - } - ALERT K8SConntrackTableFull IF 100*node_nf_conntrack_entries / node_nf_conntrack_entries_limit > 50 FOR 10m