From 8b6ee5c18b9323888cc1c146beed819984656f92 Mon Sep 17 00:00:00 2001
From: Alexander Holte-Davidsen <alexander@kit.no>
Date: Mon, 5 Mar 2018 09:52:51 +0100
Subject: [PATCH] Add summary to Alertmanager rules where missing - updated
 accoring to guidelines

---
 assets/prometheus/rules/alertmanager.rules.yaml |  3 +++
 assets/prometheus/rules/kubelet.rules.yaml      |  1 +
 assets/prometheus/rules/kubernetes.rules.yaml   |  6 ++++++
 assets/prometheus/rules/node.rules.yaml         |  2 ++
 assets/prometheus/rules/prometheus.rules.yaml   |  5 +++++
 manifests/prometheus/prometheus-k8s-rules.yaml  | 17 +++++++++++++++++
 6 files changed, 34 insertions(+)

diff --git a/assets/prometheus/rules/alertmanager.rules.yaml b/assets/prometheus/rules/alertmanager.rules.yaml
index fdfdfd0f..5e51f75b 100644
--- a/assets/prometheus/rules/alertmanager.rules.yaml
+++ b/assets/prometheus/rules/alertmanager.rules.yaml
@@ -11,6 +11,7 @@ groups:
     annotations:
       description: The configuration of the instances of the Alertmanager cluster
         `{{$labels.service}}` are out of sync.
+      summary: Configuration out of sync
   - alert: AlertmanagerDownOrMissing
     expr: label_replace(prometheus_operator_alertmanager_spec_replicas, "job", "alertmanager-$1",
       "alertmanager", "(.*)") / ON(job) GROUP_RIGHT() sum(up) BY (job) != 1
@@ -20,6 +21,7 @@ groups:
     annotations:
       description: An unexpected number of Alertmanagers are scraped or Alertmanagers
         disappeared from discovery.
+      summary: Alertmanager down or missing
   - alert: AlertmanagerFailedReload
     expr: alertmanager_config_last_reload_successful == 0
     for: 10m
@@ -28,3 +30,4 @@ groups:
     annotations:
       description: Reloading Alertmanager's configuration has failed for {{ $labels.namespace
         }}/{{ $labels.pod}}.
+      summary: Alertmanager's configuration reload failed
diff --git a/assets/prometheus/rules/kubelet.rules.yaml b/assets/prometheus/rules/kubelet.rules.yaml
index 0edd7878..85547dd6 100644
--- a/assets/prometheus/rules/kubelet.rules.yaml
+++ b/assets/prometheus/rules/kubelet.rules.yaml
@@ -26,6 +26,7 @@ groups:
       severity: warning
     annotations:
       description: Prometheus failed to scrape {{ $value }}% of kubelets.
+      summary: Prometheus failed to scrape
   - alert: K8SKubeletDown
     expr: (absent(up{job="kubelet"} == 1) or count(up{job="kubelet"} == 0) / count(up{job="kubelet"}))
       * 100 > 10
diff --git a/assets/prometheus/rules/kubernetes.rules.yaml b/assets/prometheus/rules/kubernetes.rules.yaml
index f961ce6b..288841b7 100644
--- a/assets/prometheus/rules/kubernetes.rules.yaml
+++ b/assets/prometheus/rules/kubernetes.rules.yaml
@@ -51,6 +51,7 @@ groups:
     annotations:
       description: the API server has a 99th percentile latency of {{ $value }} seconds
         for {{$labels.verb}} {{$labels.resource}}
+      summary: API server high latency
   - alert: APIServerLatencyHigh
     expr: apiserver_latency_seconds:quantile{quantile="0.99",subresource!="log",verb!~"^(?:WATCH|WATCHLIST|PROXY|CONNECT)$"}
       > 4
@@ -60,6 +61,7 @@ groups:
     annotations:
       description: the API server has a 99th percentile latency of {{ $value }} seconds
         for {{$labels.verb}} {{$labels.resource}}
+      summary: API server high latency
   - alert: APIServerErrorsHigh
     expr: rate(apiserver_request_count{code=~"^(?:5..)$"}[5m]) / rate(apiserver_request_count[5m])
       * 100 > 2
@@ -68,6 +70,7 @@ groups:
       severity: warning
     annotations:
       description: API server returns errors for {{ $value }}% of requests
+      summary: API server request errors
   - alert: APIServerErrorsHigh
     expr: rate(apiserver_request_count{code=~"^(?:5..)$"}[5m]) / rate(apiserver_request_count[5m])
       * 100 > 5
@@ -84,12 +87,14 @@ groups:
     annotations:
       description: No API servers are reachable or all have disappeared from service
         discovery
+      summary: No API servers are reachable
 
   - alert: K8sCertificateExpirationNotice
     labels:
       severity: warning
     annotations:
       description: Kubernetes API Certificate is expiring soon (less than 7 days)
+      summary: Kubernetes API Certificate is expiering soon
     expr: sum(apiserver_client_certificate_expiration_seconds_bucket{le="604800"}) > 0
 
   - alert: K8sCertificateExpirationNotice
@@ -97,4 +102,5 @@ groups:
       severity: critical
     annotations:
       description: Kubernetes API Certificate is expiring in less than 1 day
+      summary: Kubernetes API Certificate is expiering
     expr: sum(apiserver_client_certificate_expiration_seconds_bucket{le="86400"}) > 0
diff --git a/assets/prometheus/rules/node.rules.yaml b/assets/prometheus/rules/node.rules.yaml
index 0e7e1bbd..d14f0870 100644
--- a/assets/prometheus/rules/node.rules.yaml
+++ b/assets/prometheus/rules/node.rules.yaml
@@ -26,6 +26,7 @@ groups:
     annotations:
       description: Prometheus could not scrape a node-exporter for more than 10m,
         or node-exporters have disappeared from discovery
+      summary: Prometheus could not scrape a node-exporter
   - alert: NodeDiskRunningFull
     expr: predict_linear(node_filesystem_free[6h], 3600 * 24) < 0
     for: 30m
@@ -42,3 +43,4 @@ groups:
     annotations:
       description: device {{$labels.device}} on node {{$labels.instance}} is running
         full within the next 2 hours (mounted at {{$labels.mountpoint}})
+      summary: Node disk is running full
diff --git a/assets/prometheus/rules/prometheus.rules.yaml b/assets/prometheus/rules/prometheus.rules.yaml
index e006ba9b..43f2808c 100644
--- a/assets/prometheus/rules/prometheus.rules.yaml
+++ b/assets/prometheus/rules/prometheus.rules.yaml
@@ -8,6 +8,7 @@ groups:
       severity: warning
     annotations:
       description: Reloading Prometheus' configuration has failed for {{$labels.namespace}}/{{$labels.pod}}
+      summary: Reloading Promehteus' configuration failed
 
   - alert: PrometheusNotificationQueueRunningFull
     expr: predict_linear(prometheus_notifications_queue_length[5m], 60 * 30) > prometheus_notifications_queue_capacity
@@ -17,6 +18,7 @@ groups:
     annotations:
       description: Prometheus' alert notification queue is running full for {{$labels.namespace}}/{{
         $labels.pod}}
+      summary: Prometheus' alert notification queue is running full  
 
   - alert: PrometheusErrorSendingAlerts
     expr: rate(prometheus_notifications_errors_total[5m]) / rate(prometheus_notifications_sent_total[5m])
@@ -27,6 +29,7 @@ groups:
     annotations:
       description: Errors while sending alerts from Prometheus {{$labels.namespace}}/{{
         $labels.pod}} to Alertmanager {{$labels.Alertmanager}}
+      summary: Errors while sending alert from Prometheus
 
   - alert: PrometheusErrorSendingAlerts
     expr: rate(prometheus_notifications_errors_total[5m]) / rate(prometheus_notifications_sent_total[5m])
@@ -37,6 +40,7 @@ groups:
     annotations:
       description: Errors while sending alerts from Prometheus {{$labels.namespace}}/{{
         $labels.pod}} to Alertmanager {{$labels.Alertmanager}}
+      summary: Errors while sending alerts from Prometheus
 
   - alert: PrometheusNotConnectedToAlertmanagers
     expr: prometheus_notifications_alertmanagers_discovered < 1
@@ -46,6 +50,7 @@ groups:
     annotations:
       description: Prometheus {{ $labels.namespace }}/{{ $labels.pod}} is not connected
         to any Alertmanagers
+      summary: Prometheus is not connected to any Alertmanagers
 
   - alert: PrometheusTSDBReloadsFailing
     expr: increase(prometheus_tsdb_reloads_failures_total[2h]) > 0
diff --git a/manifests/prometheus/prometheus-k8s-rules.yaml b/manifests/prometheus/prometheus-k8s-rules.yaml
index c7cb14ac..05368dc1 100644
--- a/manifests/prometheus/prometheus-k8s-rules.yaml
+++ b/manifests/prometheus/prometheus-k8s-rules.yaml
@@ -20,6 +20,7 @@ data:
         annotations:
           description: The configuration of the instances of the Alertmanager cluster
             `{{$labels.service}}` are out of sync.
+          summary: Configuration out of sync
       - alert: AlertmanagerDownOrMissing
         expr: label_replace(prometheus_operator_alertmanager_spec_replicas, "job", "alertmanager-$1",
           "alertmanager", "(.*)") / ON(job) GROUP_RIGHT() sum(up) BY (job) != 1
@@ -29,6 +30,7 @@ data:
         annotations:
           description: An unexpected number of Alertmanagers are scraped or Alertmanagers
             disappeared from discovery.
+          summary: Alertmanager down or missing
       - alert: AlertmanagerFailedReload
         expr: alertmanager_config_last_reload_successful == 0
         for: 10m
@@ -37,6 +39,7 @@ data:
         annotations:
           description: Reloading Alertmanager's configuration has failed for {{ $labels.namespace
             }}/{{ $labels.pod}}.
+          summary: Alertmanager's configuration reload failed
   etcd3.rules.yaml: |+
     groups:
     - name: ./etcd3.rules
@@ -363,6 +366,7 @@ data:
           severity: warning
         annotations:
           description: Prometheus failed to scrape {{ $value }}% of kubelets.
+          summary: Prometheus failed to scrape
       - alert: K8SKubeletDown
         expr: (absent(up{job="kubelet"} == 1) or count(up{job="kubelet"} == 0) / count(up{job="kubelet"}))
           * 100 > 10
@@ -436,6 +440,7 @@ data:
         annotations:
           description: the API server has a 99th percentile latency of {{ $value }} seconds
             for {{$labels.verb}} {{$labels.resource}}
+          summary: API server high latency
       - alert: APIServerLatencyHigh
         expr: apiserver_latency_seconds:quantile{quantile="0.99",subresource!="log",verb!~"^(?:WATCH|WATCHLIST|PROXY|CONNECT)$"}
           > 4
@@ -445,6 +450,7 @@ data:
         annotations:
           description: the API server has a 99th percentile latency of {{ $value }} seconds
             for {{$labels.verb}} {{$labels.resource}}
+          summary: API server high latency
       - alert: APIServerErrorsHigh
         expr: rate(apiserver_request_count{code=~"^(?:5..)$"}[5m]) / rate(apiserver_request_count[5m])
           * 100 > 2
@@ -453,6 +459,7 @@ data:
           severity: warning
         annotations:
           description: API server returns errors for {{ $value }}% of requests
+          summary: API server request errors
       - alert: APIServerErrorsHigh
         expr: rate(apiserver_request_count{code=~"^(?:5..)$"}[5m]) / rate(apiserver_request_count[5m])
           * 100 > 5
@@ -469,12 +476,14 @@ data:
         annotations:
           description: No API servers are reachable or all have disappeared from service
             discovery
+          summary: No API servers are reachable
     
       - alert: K8sCertificateExpirationNotice
         labels:
           severity: warning
         annotations:
           description: Kubernetes API Certificate is expiring soon (less than 7 days)
+          summary: Kubernetes API Certificate is expiering soon
         expr: sum(apiserver_client_certificate_expiration_seconds_bucket{le="604800"}) > 0
     
       - alert: K8sCertificateExpirationNotice
@@ -482,6 +491,7 @@ data:
           severity: critical
         annotations:
           description: Kubernetes API Certificate is expiring in less than 1 day
+          summary: Kubernetes API Certificate is expiering
         expr: sum(apiserver_client_certificate_expiration_seconds_bucket{le="86400"}) > 0
   node.rules.yaml: |+
     groups:
@@ -512,6 +522,7 @@ data:
         annotations:
           description: Prometheus could not scrape a node-exporter for more than 10m,
             or node-exporters have disappeared from discovery
+          summary: Prometheus could not scrape a node-exporter
       - alert: NodeDiskRunningFull
         expr: predict_linear(node_filesystem_free[6h], 3600 * 24) < 0
         for: 30m
@@ -528,6 +539,7 @@ data:
         annotations:
           description: device {{$labels.device}} on node {{$labels.instance}} is running
             full within the next 2 hours (mounted at {{$labels.mountpoint}})
+          summary: Node disk is running full
   prometheus.rules.yaml: |+
     groups:
     - name: prometheus.rules
@@ -539,6 +551,7 @@ data:
           severity: warning
         annotations:
           description: Reloading Prometheus' configuration has failed for {{$labels.namespace}}/{{$labels.pod}}
+          summary: Reloading Promehteus' configuration failed
     
       - alert: PrometheusNotificationQueueRunningFull
         expr: predict_linear(prometheus_notifications_queue_length[5m], 60 * 30) > prometheus_notifications_queue_capacity
@@ -548,6 +561,7 @@ data:
         annotations:
           description: Prometheus' alert notification queue is running full for {{$labels.namespace}}/{{
             $labels.pod}}
+          summary: Prometheus' alert notification queue is running full  
     
       - alert: PrometheusErrorSendingAlerts
         expr: rate(prometheus_notifications_errors_total[5m]) / rate(prometheus_notifications_sent_total[5m])
@@ -558,6 +572,7 @@ data:
         annotations:
           description: Errors while sending alerts from Prometheus {{$labels.namespace}}/{{
             $labels.pod}} to Alertmanager {{$labels.Alertmanager}}
+          summary: Errors while sending alert from Prometheus
     
       - alert: PrometheusErrorSendingAlerts
         expr: rate(prometheus_notifications_errors_total[5m]) / rate(prometheus_notifications_sent_total[5m])
@@ -568,6 +583,7 @@ data:
         annotations:
           description: Errors while sending alerts from Prometheus {{$labels.namespace}}/{{
             $labels.pod}} to Alertmanager {{$labels.Alertmanager}}
+          summary: Errors while sending alerts from Prometheus
     
       - alert: PrometheusNotConnectedToAlertmanagers
         expr: prometheus_notifications_alertmanagers_discovered < 1
@@ -577,6 +593,7 @@ data:
         annotations:
           description: Prometheus {{ $labels.namespace }}/{{ $labels.pod}} is not connected
             to any Alertmanagers
+          summary: Prometheus is not connected to any Alertmanagers
     
       - alert: PrometheusTSDBReloadsFailing
         expr: increase(prometheus_tsdb_reloads_failures_total[2h]) > 0
-- 
GitLab