From af5fb9ee09ecbc33a69b6aae19d85f710d571822 Mon Sep 17 00:00:00 2001
From: Sergiusz Urbaniak <sergiusz.urbaniak@gmail.com>
Date: Thu, 23 Aug 2018 13:38:13 +0200
Subject: [PATCH] kubernetes-prometheus: regenerate

Signed-off-by: Sergiusz Urbaniak <sergiusz.urbaniak@gmail.com>
---
 manifests/grafana-dashboardDefinitions.yaml | 32 ++++++++++-----------
 manifests/node-exporter-daemonset.yaml      |  2 +-
 manifests/prometheus-rules.yaml             | 21 ++++++++++----
 3 files changed, 33 insertions(+), 22 deletions(-)

diff --git a/manifests/grafana-dashboardDefinitions.yaml b/manifests/grafana-dashboardDefinitions.yaml
index af2b2b0a..db2d0939 100644
--- a/manifests/grafana-dashboardDefinitions.yaml
+++ b/manifests/grafana-dashboardDefinitions.yaml
@@ -1920,7 +1920,7 @@ items:
                           "steppedLine": false,
                           "targets": [
                               {
-                                  "expr": ":node_cpu_utilisation:avg1m",
+                                  "expr": "1 - avg(rate(node_cpu{mode=\"idle\"}[1m]))",
                                   "format": "time_series",
                                   "instant": true,
                                   "intervalFactor": 2,
@@ -2172,7 +2172,7 @@ items:
                           "steppedLine": false,
                           "targets": [
                               {
-                                  "expr": ":node_memory_utilisation:",
+                                  "expr": "1 - sum(:node_memory_MemFreeCachedBuffers:sum) / sum(:node_memory_MemTotal:sum)",
                                   "format": "time_series",
                                   "instant": true,
                                   "intervalFactor": 2,
@@ -2256,7 +2256,7 @@ items:
                           "steppedLine": false,
                           "targets": [
                               {
-                                  "expr": "sum(kube_pod_container_resource_requests_memory_bytes) / sum(node_memory_MemTotal)",
+                                  "expr": "sum(kube_pod_container_resource_requests_memory_bytes) / sum(:node_memory_MemTotal:sum)",
                                   "format": "time_series",
                                   "instant": true,
                                   "intervalFactor": 2,
@@ -2340,7 +2340,7 @@ items:
                           "steppedLine": false,
                           "targets": [
                               {
-                                  "expr": "sum(kube_pod_container_resource_limits_memory_bytes) / sum(node_memory_MemTotal)",
+                                  "expr": "sum(kube_pod_container_resource_limits_memory_bytes) / sum(:node_memory_MemTotal:sum)",
                                   "format": "time_series",
                                   "instant": true,
                                   "intervalFactor": 2,
@@ -2435,7 +2435,7 @@ items:
                           "steppedLine": false,
                           "targets": [
                               {
-                                  "expr": "sum(irate(container_cpu_usage_seconds_total[1m])) by (namespace)",
+                                  "expr": "sum(namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate) by (namespace)",
                                   "format": "time_series",
                                   "intervalFactor": 2,
                                   "legendFormat": "{{namespace}}",
@@ -2664,7 +2664,7 @@ items:
                           ],
                           "targets": [
                               {
-                                  "expr": "sum(rate(container_cpu_usage_seconds_total[5m])) by (namespace)",
+                                  "expr": "sum(namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate) by (namespace)",
                                   "format": "table",
                                   "instant": true,
                                   "intervalFactor": 2,
@@ -2682,7 +2682,7 @@ items:
                                   "step": 10
                               },
                               {
-                                  "expr": "sum(rate(container_cpu_usage_seconds_total[5m])) by (namespace) / sum(kube_pod_container_resource_requests_cpu_cores) by (namespace)",
+                                  "expr": "sum(namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate) by (namespace) / sum(kube_pod_container_resource_requests_cpu_cores) by (namespace)",
                                   "format": "table",
                                   "instant": true,
                                   "intervalFactor": 2,
@@ -2700,7 +2700,7 @@ items:
                                   "step": 10
                               },
                               {
-                                  "expr": "sum(rate(container_cpu_usage_seconds_total[5m])) by (namespace) / sum(kube_pod_container_resource_limits_cpu_cores) by (namespace)",
+                                  "expr": "sum(namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate) by (namespace) / sum(kube_pod_container_resource_limits_cpu_cores) by (namespace)",
                                   "format": "table",
                                   "instant": true,
                                   "intervalFactor": 2,
@@ -3247,7 +3247,7 @@ items:
                           "steppedLine": false,
                           "targets": [
                               {
-                                  "expr": "sum(irate(container_cpu_usage_seconds_total{namespace=\"$namespace\"}[1m])) by (pod_name)",
+                                  "expr": "sum(namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{namespace=\"$namespace\"}) by (pod_name)",
                                   "format": "time_series",
                                   "intervalFactor": 2,
                                   "legendFormat": "{{pod_name}}",
@@ -3476,7 +3476,7 @@ items:
                           ],
                           "targets": [
                               {
-                                  "expr": "sum(label_replace(rate(container_cpu_usage_seconds_total{namespace=\"$namespace\"}[5m]), \"pod\", \"$1\", \"pod_name\", \"(.*)\")) by (pod)",
+                                  "expr": "sum(label_replace(namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{namespace=\"$namespace\"}, \"pod\", \"$1\", \"pod_name\", \"(.*)\")) by (pod)",
                                   "format": "table",
                                   "instant": true,
                                   "intervalFactor": 2,
@@ -3494,7 +3494,7 @@ items:
                                   "step": 10
                               },
                               {
-                                  "expr": "sum(label_replace(rate(container_cpu_usage_seconds_total{namespace=\"$namespace\"}[5m]), \"pod\", \"$1\", \"pod_name\", \"(.*)\")) by (pod) / sum(kube_pod_container_resource_requests_cpu_cores{namespace=\"$namespace\"}) by (pod)",
+                                  "expr": "sum(label_replace(namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{namespace=\"$namespace\"}, \"pod\", \"$1\", \"pod_name\", \"(.*)\")) by (pod) / sum(kube_pod_container_resource_requests_cpu_cores{namespace=\"$namespace\"}) by (pod)",
                                   "format": "table",
                                   "instant": true,
                                   "intervalFactor": 2,
@@ -3512,7 +3512,7 @@ items:
                                   "step": 10
                               },
                               {
-                                  "expr": "sum(label_replace(rate(container_cpu_usage_seconds_total{namespace=\"$namespace\"}[5m]), \"pod\", \"$1\", \"pod_name\", \"(.*)\")) by (pod) / sum(kube_pod_container_resource_limits_cpu_cores{namespace=\"$namespace\"}) by (pod)",
+                                  "expr": "sum(label_replace(namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{namespace=\"$namespace\"}, \"pod\", \"$1\", \"pod_name\", \"(.*)\")) by (pod) / sum(kube_pod_container_resource_limits_cpu_cores{namespace=\"$namespace\"}) by (pod)",
                                   "format": "table",
                                   "instant": true,
                                   "intervalFactor": 2,
@@ -4086,7 +4086,7 @@ items:
                           "steppedLine": false,
                           "targets": [
                               {
-                                  "expr": "sum(irate(container_cpu_usage_seconds_total{namespace=\"$namespace\", pod_name=\"$pod\", container_name!=\"POD\"}[1m])) by (container_name)",
+                                  "expr": "sum(namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{namespace=\"$namespace\", pod_name=\"$pod\", container_name!=\"POD\"}) by (container_name)",
                                   "format": "time_series",
                                   "intervalFactor": 2,
                                   "legendFormat": "{{container_name}}",
@@ -4315,7 +4315,7 @@ items:
                           ],
                           "targets": [
                               {
-                                  "expr": "sum(label_replace(rate(container_cpu_usage_seconds_total{namespace=\"$namespace\", pod_name=\"$pod\", container_name!=\"POD\"}[5m]), \"container\", \"$1\", \"container_name\", \"(.*)\")) by (container)",
+                                  "expr": "sum(label_replace(namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{namespace=\"$namespace\", pod_name=\"$pod\", container_name!=\"POD\"}, \"container\", \"$1\", \"container_name\", \"(.*)\")) by (container)",
                                   "format": "table",
                                   "instant": true,
                                   "intervalFactor": 2,
@@ -4333,7 +4333,7 @@ items:
                                   "step": 10
                               },
                               {
-                                  "expr": "sum(label_replace(rate(container_cpu_usage_seconds_total{namespace=\"$namespace\", pod_name=\"$pod\"}[5m]), \"container\", \"$1\", \"container_name\", \"(.*)\")) by (container) / sum(kube_pod_container_resource_requests_cpu_cores{namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
+                                  "expr": "sum(label_replace(namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{namespace=\"$namespace\", pod_name=\"$pod\"}, \"container\", \"$1\", \"container_name\", \"(.*)\")) by (container) / sum(kube_pod_container_resource_requests_cpu_cores{namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
                                   "format": "table",
                                   "instant": true,
                                   "intervalFactor": 2,
@@ -4351,7 +4351,7 @@ items:
                                   "step": 10
                               },
                               {
-                                  "expr": "sum(label_replace(rate(container_cpu_usage_seconds_total{namespace=\"$namespace\", pod_name=\"$pod\"}[5m]), \"container\", \"$1\", \"container_name\", \"(.*)\")) by (container) / sum(kube_pod_container_resource_limits_cpu_cores{namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
+                                  "expr": "sum(label_replace(namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{namespace=\"$namespace\", pod_name=\"$pod\"}, \"container\", \"$1\", \"container_name\", \"(.*)\")) by (container) / sum(kube_pod_container_resource_limits_cpu_cores{namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
                                   "format": "table",
                                   "instant": true,
                                   "intervalFactor": 2,
diff --git a/manifests/node-exporter-daemonset.yaml b/manifests/node-exporter-daemonset.yaml
index a2669187..b3febf8c 100644
--- a/manifests/node-exporter-daemonset.yaml
+++ b/manifests/node-exporter-daemonset.yaml
@@ -76,5 +76,5 @@ spec:
           path: /sys
         name: sys
       - hostPath:
-          path: /root
+          path: /
         name: root
diff --git a/manifests/prometheus-rules.yaml b/manifests/prometheus-rules.yaml
index bf07b9a4..121b974f 100644
--- a/manifests/prometheus-rules.yaml
+++ b/manifests/prometheus-rules.yaml
@@ -13,6 +13,11 @@ spec:
     - expr: |
         sum(rate(container_cpu_usage_seconds_total{job="kubelet", image!=""}[5m])) by (namespace)
       record: namespace:container_cpu_usage_seconds_total:sum_rate
+    - expr: |
+        sum by (namespace, pod_name, container_name) (
+          rate(container_cpu_usage_seconds_total{job="kubelet", image!=""}[5m])
+        )
+      record: namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate
     - expr: |
         sum(container_memory_usage_bytes{job="kubelet", image!=""}) by (namespace)
       record: namespace:container_memory_usage_bytes:sum
@@ -151,6 +156,12 @@ spec:
         /
         sum(node_memory_MemTotal{job="node-exporter"})
       record: ':node_memory_utilisation:'
+    - expr: |
+        sum(node_memory_MemFree{job="node-exporter"} + node_memory_Cached{job="node-exporter"} + node_memory_Buffers{job="node-exporter"})
+      record: :node_memory_MemFreeCachedBuffers:sum
+    - expr: |
+        sum(node_memory_MemTotal{job="node-exporter"})
+      record: :node_memory_MemTotal:sum
     - expr: |
         sum by (node) (
           (node_memory_MemFree{job="node-exporter"} + node_memory_Cached{job="node-exporter"} + node_memory_Buffers{job="node-exporter"})
@@ -796,10 +807,10 @@ spec:
           }}' is experiencing {{ printf "%0.0f" $value }}% errors.'
         runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclienterrors
       expr: |
-        sum(rate(rest_client_requests_total{code!~"2.."}[5m])) by (instance, job) * 100
+        (sum(rate(rest_client_requests_total{code!~"2..|404"}[5m])) by (instance, job)
           /
-        sum(rate(rest_client_requests_total[5m])) by (instance, job)
-          > 1
+        sum(rate(rest_client_requests_total[5m])) by (instance, job))
+        * 100 > 1
       for: 15m
       labels:
         severity: warning
@@ -829,7 +840,7 @@ spec:
           for {{$labels.verb}} {{$labels.resource}}.
         runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapilatencyhigh
       expr: |
-        cluster_quantile:apiserver_request_latencies:histogram_quantile{job="apiserver",quantile="0.99",subresource!="log",verb!~"^(?:WATCH|WATCHLIST|PROXY|CONNECT)$"} > 1
+        cluster_quantile:apiserver_request_latencies:histogram_quantile{job="apiserver",quantile="0.99",subresource!="log",verb!~"^(?:LIST|WATCH|WATCHLIST|PROXY|CONNECT)$"} > 1
       for: 10m
       labels:
         severity: warning
@@ -839,7 +850,7 @@ spec:
           for {{$labels.verb}} {{$labels.resource}}.
         runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapilatencyhigh
       expr: |
-        cluster_quantile:apiserver_request_latencies:histogram_quantile{job="apiserver",quantile="0.99",subresource!="log",verb!~"^(?:WATCH|WATCHLIST|PROXY|CONNECT)$"} > 4
+        cluster_quantile:apiserver_request_latencies:histogram_quantile{job="apiserver",quantile="0.99",subresource!="log",verb!~"^(?:LIST|WATCH|WATCHLIST|PROXY|CONNECT)$"} > 4
       for: 10m
       labels:
         severity: critical
-- 
GitLab