From a1afce8707f1c14871147e2dd398551a0040f72c Mon Sep 17 00:00:00 2001
From: Frederic Branczyk <fbranczyk@gmail.com>
Date: Thu, 15 Jun 2017 09:34:59 +0200
Subject: [PATCH] alerting rules: replace severity with action

---
 assets/prometheus/rules/alertmanager.rules            |  6 +++---
 assets/prometheus/rules/general.rules                 | 11 ++++-------
 assets/prometheus/rules/kube-apiserver.rules          |  4 ++--
 assets/prometheus/rules/kube-controller-manager.rules |  2 +-
 assets/prometheus/rules/kube-scheduler.rules          |  2 +-
 assets/prometheus/rules/kubelet.rules                 | 10 +++++-----
 assets/prometheus/rules/node.rules                    |  2 +-
 assets/prometheus/rules/prometheus.rules              |  2 +-
 8 files changed, 18 insertions(+), 21 deletions(-)

diff --git a/assets/prometheus/rules/alertmanager.rules b/assets/prometheus/rules/alertmanager.rules
index 71bdc687..30a70ee3 100644
--- a/assets/prometheus/rules/alertmanager.rules
+++ b/assets/prometheus/rules/alertmanager.rules
@@ -4,7 +4,7 @@ ALERT AlertmanagerConfigInconsistent
        label_replace(prometheus_operator_alertmanager_spec_replicas, "service", "alertmanager-$1", "alertmanager", "(.*)") != 1
   FOR 5m
   LABELS {
-    severity = "critical"
+    severity = "page"
   }
   ANNOTATIONS {
     summary = "Alertmanager configurations are inconsistent",
@@ -17,7 +17,7 @@ ALERT AlertmanagerDownOrMissing
        sum by(job) (up) != 1
   FOR 5m
   LABELS {
-    severity = "warning"
+    severity = "ticket"
   }
   ANNOTATIONS {
     summary = "Alertmanager down or not discovered",
@@ -28,7 +28,7 @@ ALERT FailedReload
   IF alertmanager_config_last_reload_successful == 0
   FOR 10m
   LABELS {
-    severity = "warning"
+    severity = "ticket"
   }
   ANNOTATIONS {
     summary = "Alertmanager configuration reload has failed",
diff --git a/assets/prometheus/rules/general.rules b/assets/prometheus/rules/general.rules
index 9e26ab9a..7b406f07 100644
--- a/assets/prometheus/rules/general.rules
+++ b/assets/prometheus/rules/general.rules
@@ -4,7 +4,7 @@ Alert TargetDown
   IF 100 * (count(up == 0) / count(up)) > 3
   FOR 10m
   LABELS {
-    severity = "warning"
+    severity = "ticket"
   }
   ANNOTATIONS {
     summary = "Targets are down",
@@ -15,9 +15,6 @@ Alert TargetDown
 
 ALERT DeadMansSwitch
   IF vector(1)
-  LABELS {
-    severity = "none",
-  }
   ANNOTATIONS {
     summary = "Alerting DeadMansSwitch",
     description = "This is a DeadMansSwitch meant to ensure that the entire Alerting pipeline is functional.",
@@ -29,7 +26,7 @@ ALERT TooManyOpenFileDescriptors
   IF 100 * (process_open_fds / process_max_fds) > 95
   FOR 10m
   LABELS {
-    severity = "critical"
+    severity = "page"
   }
   ANNOTATIONS {
     summary = "too many open file descriptors",
@@ -43,7 +40,7 @@ ALERT FdExhaustionClose
   IF predict_linear(instance:fd_utilization[1h], 3600 * 4) > 1
   FOR 10m
   LABELS {
-    severity = "warning"
+    severity = "ticket"
   }
   ANNOTATIONS {
     summary = "file descriptors soon exhausted",
@@ -55,7 +52,7 @@ ALERT FdExhaustionClose
   IF predict_linear(instance:fd_utilization[10m], 3600) > 1
   FOR 10m
   LABELS {
-    severity = "critical"
+    severity = "page"
   }
   ANNOTATIONS {
     summary = "file descriptors soon exhausted",
diff --git a/assets/prometheus/rules/kube-apiserver.rules b/assets/prometheus/rules/kube-apiserver.rules
index c041881a..be6dc97f 100644
--- a/assets/prometheus/rules/kube-apiserver.rules
+++ b/assets/prometheus/rules/kube-apiserver.rules
@@ -2,7 +2,7 @@ ALERT K8SApiserverDown
   IF absent({job="apiserver"}) or (count by(cluster) (up{job="apiserver"} == 1) < count by(cluster) (up{job="apiserver"}))
   FOR 5m
   LABELS {
-    severity = "critical"
+    severity = "page"
   }
   ANNOTATIONS {
     summary = "API server unreachable",
@@ -20,7 +20,7 @@ ALERT K8SApiServerLatency
     ) / 1e6 > 1.0
   FOR 10m
   LABELS {
-    severity = "warning"
+    severity = "ticket"
   }
   ANNOTATIONS {
     summary = "Kubernetes apiserver latency is high",
diff --git a/assets/prometheus/rules/kube-controller-manager.rules b/assets/prometheus/rules/kube-controller-manager.rules
index f75e2768..90546273 100644
--- a/assets/prometheus/rules/kube-controller-manager.rules
+++ b/assets/prometheus/rules/kube-controller-manager.rules
@@ -2,7 +2,7 @@ ALERT K8SControllerManagerDown
   IF absent(up{job="kube-controller-manager"}) or (count by(cluster) (up{job="kube-controller-manager"} == 1) == 0)
   FOR 5m
   LABELS {
-    severity = "critical",
+    severity = "page",
   }
   ANNOTATIONS {
     summary = "Controller manager is down",
diff --git a/assets/prometheus/rules/kube-scheduler.rules b/assets/prometheus/rules/kube-scheduler.rules
index 6eff4bcd..80e954dd 100644
--- a/assets/prometheus/rules/kube-scheduler.rules
+++ b/assets/prometheus/rules/kube-scheduler.rules
@@ -2,7 +2,7 @@ ALERT K8SSchedulerDown
   IF absent(up{job="kube-scheduler"}) or (count by(cluster) (up{job="kube-scheduler"} == 1) == 0)
   FOR 5m
   LABELS {
-    severity = "critical",
+    severity = "page",
   }
   ANNOTATIONS {
     summary = "Scheduler is down",
diff --git a/assets/prometheus/rules/kubelet.rules b/assets/prometheus/rules/kubelet.rules
index cbcd576c..124d8dd0 100644
--- a/assets/prometheus/rules/kubelet.rules
+++ b/assets/prometheus/rules/kubelet.rules
@@ -2,7 +2,7 @@ ALERT K8SNodeNotReady
   IF kube_node_status_ready{condition="true"} == 0
   FOR 1h
   LABELS {
-    severity = "warning",
+    severity = "ticket",
   }
   ANNOTATIONS {
     summary = "Node status is NotReady",
@@ -20,7 +20,7 @@ ALERT K8SManyNodesNotReady
       ) > 0.2
   FOR 1m
   LABELS {
-    severity = "critical",
+    severity = "page",
   }
   ANNOTATIONS {
     summary = "Many K8s nodes are Not Ready",
@@ -31,7 +31,7 @@ ALERT K8SKubeletDown
   IF count by (cluster) (up{job="kubelet"} == 0) / count by (cluster) (up{job="kubelet"}) > 0.03
   FOR 1h
   LABELS {
-    severity = "warning",
+    severity = "ticket",
   }
   ANNOTATIONS {
     summary = "Many Kubelets cannot be scraped",
@@ -42,7 +42,7 @@ ALERT K8SKubeletDown
   IF absent(up{job="kubelet"}) or count by (cluster) (up{job="kubelet"} == 0) / count by (cluster) (up{job="kubelet"}) > 0.1
   FOR 1h
   LABELS {
-    severity = "critical",
+    severity = "page",
   }
   ANNOTATIONS {
     summary = "Many Kubelets cannot be scraped",
@@ -52,7 +52,7 @@ ALERT K8SKubeletDown
 ALERT K8SKubeletTooManyPods
   IF kubelet_running_pod_count > 100
   LABELS {
-    severity = "warning",
+    severity = "ticket",
   }
   ANNOTATIONS {
     summary = "Kubelet is close to pod limit",
diff --git a/assets/prometheus/rules/node.rules b/assets/prometheus/rules/node.rules
index 8fd5b7d0..9844947a 100644
--- a/assets/prometheus/rules/node.rules
+++ b/assets/prometheus/rules/node.rules
@@ -2,7 +2,7 @@ ALERT NodeExporterDown
   IF up{job="node-exporter"} == 0
   FOR 10m
   LABELS {
-    severity = "warning"
+    severity = "ticket"
   }
   ANNOTATIONS {
     summary = "node-exporter cannot be scraped",
diff --git a/assets/prometheus/rules/prometheus.rules b/assets/prometheus/rules/prometheus.rules
index 05c278f1..c29ed6ca 100644
--- a/assets/prometheus/rules/prometheus.rules
+++ b/assets/prometheus/rules/prometheus.rules
@@ -2,7 +2,7 @@ ALERT FailedReload
   IF prometheus_config_last_reload_successful == 0
   FOR 10m
   LABELS {
-    severity = "warning"
+    severity = "ticket"
   }
   ANNOTATIONS {
     summary = "Prometheus configuration reload has failed",
-- 
GitLab