Skip to content
Snippets Groups Projects
Commit 915677ea authored by Frederic Branczyk's avatar Frederic Branczyk Committed by GitHub
Browse files

Revert "alerting rules: replace severity with action"

parent a1afce87
No related branches found
No related tags found
No related merge requests found
......@@ -4,7 +4,7 @@ ALERT AlertmanagerConfigInconsistent
label_replace(prometheus_operator_alertmanager_spec_replicas, "service", "alertmanager-$1", "alertmanager", "(.*)") != 1
FOR 5m
LABELS {
severity = "page"
severity = "critical"
}
ANNOTATIONS {
summary = "Alertmanager configurations are inconsistent",
......@@ -17,7 +17,7 @@ ALERT AlertmanagerDownOrMissing
sum by(job) (up) != 1
FOR 5m
LABELS {
severity = "ticket"
severity = "warning"
}
ANNOTATIONS {
summary = "Alertmanager down or not discovered",
......@@ -28,7 +28,7 @@ ALERT FailedReload
IF alertmanager_config_last_reload_successful == 0
FOR 10m
LABELS {
severity = "ticket"
severity = "warning"
}
ANNOTATIONS {
summary = "Alertmanager configuration reload has failed",
......
......@@ -4,7 +4,7 @@ Alert TargetDown
IF 100 * (count(up == 0) / count(up)) > 3
FOR 10m
LABELS {
severity = "ticket"
severity = "warning"
}
ANNOTATIONS {
summary = "Targets are down",
......@@ -15,6 +15,9 @@ Alert TargetDown
ALERT DeadMansSwitch
IF vector(1)
LABELS {
severity = "none",
}
ANNOTATIONS {
summary = "Alerting DeadMansSwitch",
description = "This is a DeadMansSwitch meant to ensure that the entire Alerting pipeline is functional.",
......@@ -26,7 +29,7 @@ ALERT TooManyOpenFileDescriptors
IF 100 * (process_open_fds / process_max_fds) > 95
FOR 10m
LABELS {
severity = "page"
severity = "critical"
}
ANNOTATIONS {
summary = "too many open file descriptors",
......@@ -40,7 +43,7 @@ ALERT FdExhaustionClose
IF predict_linear(instance:fd_utilization[1h], 3600 * 4) > 1
FOR 10m
LABELS {
severity = "ticket"
severity = "warning"
}
ANNOTATIONS {
summary = "file descriptors soon exhausted",
......@@ -52,7 +55,7 @@ ALERT FdExhaustionClose
IF predict_linear(instance:fd_utilization[10m], 3600) > 1
FOR 10m
LABELS {
severity = "page"
severity = "critical"
}
ANNOTATIONS {
summary = "file descriptors soon exhausted",
......
......@@ -2,7 +2,7 @@ ALERT K8SApiserverDown
IF absent({job="apiserver"}) or (count by(cluster) (up{job="apiserver"} == 1) < count by(cluster) (up{job="apiserver"}))
FOR 5m
LABELS {
severity = "page"
severity = "critical"
}
ANNOTATIONS {
summary = "API server unreachable",
......@@ -20,7 +20,7 @@ ALERT K8SApiServerLatency
) / 1e6 > 1.0
FOR 10m
LABELS {
severity = "ticket"
severity = "warning"
}
ANNOTATIONS {
summary = "Kubernetes apiserver latency is high",
......
......@@ -2,7 +2,7 @@ ALERT K8SControllerManagerDown
IF absent(up{job="kube-controller-manager"}) or (count by(cluster) (up{job="kube-controller-manager"} == 1) == 0)
FOR 5m
LABELS {
severity = "page",
severity = "critical",
}
ANNOTATIONS {
summary = "Controller manager is down",
......
......@@ -2,7 +2,7 @@ ALERT K8SSchedulerDown
IF absent(up{job="kube-scheduler"}) or (count by(cluster) (up{job="kube-scheduler"} == 1) == 0)
FOR 5m
LABELS {
severity = "page",
severity = "critical",
}
ANNOTATIONS {
summary = "Scheduler is down",
......
......@@ -2,7 +2,7 @@ ALERT K8SNodeNotReady
IF kube_node_status_ready{condition="true"} == 0
FOR 1h
LABELS {
severity = "ticket",
severity = "warning",
}
ANNOTATIONS {
summary = "Node status is NotReady",
......@@ -20,7 +20,7 @@ ALERT K8SManyNodesNotReady
) > 0.2
FOR 1m
LABELS {
severity = "page",
severity = "critical",
}
ANNOTATIONS {
summary = "Many K8s nodes are Not Ready",
......@@ -31,7 +31,7 @@ ALERT K8SKubeletDown
IF count by (cluster) (up{job="kubelet"} == 0) / count by (cluster) (up{job="kubelet"}) > 0.03
FOR 1h
LABELS {
severity = "ticket",
severity = "warning",
}
ANNOTATIONS {
summary = "Many Kubelets cannot be scraped",
......@@ -42,7 +42,7 @@ ALERT K8SKubeletDown
IF absent(up{job="kubelet"}) or count by (cluster) (up{job="kubelet"} == 0) / count by (cluster) (up{job="kubelet"}) > 0.1
FOR 1h
LABELS {
severity = "page",
severity = "critical",
}
ANNOTATIONS {
summary = "Many Kubelets cannot be scraped",
......@@ -52,7 +52,7 @@ ALERT K8SKubeletDown
ALERT K8SKubeletTooManyPods
IF kubelet_running_pod_count > 100
LABELS {
severity = "ticket",
severity = "warning",
}
ANNOTATIONS {
summary = "Kubelet is close to pod limit",
......
......@@ -2,7 +2,7 @@ ALERT NodeExporterDown
IF up{job="node-exporter"} == 0
FOR 10m
LABELS {
severity = "ticket"
severity = "warning"
}
ANNOTATIONS {
summary = "node-exporter cannot be scraped",
......
......@@ -2,7 +2,7 @@ ALERT FailedReload
IF prometheus_config_last_reload_successful == 0
FOR 10m
LABELS {
severity = "ticket"
severity = "warning"
}
ANNOTATIONS {
summary = "Prometheus configuration reload has failed",
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment