From 621a9054d654e2875136bcbc9623a3b8bf89a9ee Mon Sep 17 00:00:00 2001 From: Sergiusz Urbaniak <sergiusz.urbaniak@gmail.com> Date: Tue, 27 Aug 2019 13:27:18 +0200 Subject: [PATCH] jsonnet: bump kubernetes mixin --- jsonnetfile.lock.json | 12 +- manifests/grafana-dashboardDefinitions.yaml | 2787 ++----------------- manifests/grafana-deployment.yaml | 12 - manifests/prometheus-rules.yaml | 191 +- 4 files changed, 258 insertions(+), 2744 deletions(-) diff --git a/jsonnetfile.lock.json b/jsonnetfile.lock.json index 21e5331c..841acd8f 100644 --- a/jsonnetfile.lock.json +++ b/jsonnetfile.lock.json @@ -27,7 +27,7 @@ "subdir": "" } }, - "version": "0afc72e70df6048c6b65fd3e4968e53b0812b30c" + "version": "24ea0d6e33a415e07ec7b675d74dea3cf01fde73" }, { "name": "grafonnet", @@ -98,6 +98,16 @@ } }, "version": "154d59dee72b894f7245d8d78c9344d1211d521f" + }, + { + "name": "promgrafonnet", + "source": { + "git": { + "remote": "https://github.com/kubernetes-monitoring/kubernetes-mixin", + "subdir": "lib/promgrafonnet" + } + }, + "version": "24ea0d6e33a415e07ec7b675d74dea3cf01fde73" } ] } diff --git a/manifests/grafana-dashboardDefinitions.yaml b/manifests/grafana-dashboardDefinitions.yaml index ebcd9c73..e05fd26d 100644 --- a/manifests/grafana-dashboardDefinitions.yaml +++ b/manifests/grafana-dashboardDefinitions.yaml @@ -2406,7 +2406,7 @@ items: namespace: monitoring - apiVersion: v1 data: - k8s-cluster-rsrc-use.json: |- + k8s-resources-cluster.json: |- { "annotations": { "list": [ @@ -2424,7 +2424,7 @@ items: "rows": [ { "collapse": false, - "height": "250px", + "height": "100px", "panels": [ { "aliasColors": { @@ -2434,7 +2434,8 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 10, + "fill": 1, + "format": "percentunit", "id": 1, "legend": { "avg": false, @@ -2446,7 +2447,7 @@ items: "values": false }, "lines": true, - "linewidth": 0, + "linewidth": 1, "links": [ ], @@ -2459,22 +2460,19 @@ items: ], "spaceLength": 10, - "span": 6, - "stack": true, + "span": 2, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "node:cluster_cpu_utilisation:ratio{cluster=\"$cluster\"}", + "expr": "1 - avg(rate(node_cpu_seconds_total{mode=\"idle\", cluster=\"$cluster\"}[1m]))", "format": "time_series", + "instant": true, "intervalFactor": 2, - "legendFormat": "{{node}}", - "legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use", - "step": 10 + "refId": "A" } ], - "thresholds": [ - - ], + "thresholds": "70,80", "timeFrom": null, "timeShift": null, "title": "CPU Utilisation", @@ -2483,7 +2481,7 @@ items: "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "singlestat", "xaxis": { "buckets": null, "mode": "time", @@ -2495,10 +2493,10 @@ items: }, "yaxes": [ { - "format": "percentunit", + "format": "short", "label": null, "logBase": 1, - "max": 1, + "max": null, "min": 0, "show": true }, @@ -2520,7 +2518,8 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 10, + "fill": 1, + "format": "percentunit", "id": 2, "legend": { "avg": false, @@ -2532,7 +2531,7 @@ items: "values": false }, "lines": true, - "linewidth": 0, + "linewidth": 1, "links": [ ], @@ -2545,31 +2544,28 @@ items: ], "spaceLength": 10, - "span": 6, - "stack": true, + "span": 2, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "node:node_cpu_saturation_load1:{cluster=\"$cluster\"} / scalar(sum(min(kube_pod_info{cluster=\"$cluster\"}) by (node)))", + "expr": "sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable_cpu_cores{cluster=\"$cluster\"})", "format": "time_series", + "instant": true, "intervalFactor": 2, - "legendFormat": "{{node}}", - "legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use", - "step": 10 + "refId": "A" } ], - "thresholds": [ - - ], + "thresholds": "70,80", "timeFrom": null, "timeShift": null, - "title": "CPU Saturation (Load1)", + "title": "CPU Requests Commitment", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "singlestat", "xaxis": { "buckets": null, "mode": "time", @@ -2581,10 +2577,10 @@ items: }, "yaxes": [ { - "format": "percentunit", + "format": "short", "label": null, "logBase": 1, - "max": 1, + "max": null, "min": 0, "show": true }, @@ -2597,19 +2593,7 @@ items: "show": false } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "CPU", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ + }, { "aliasColors": { @@ -2618,7 +2602,8 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 10, + "fill": 1, + "format": "percentunit", "id": 3, "legend": { "avg": false, @@ -2630,7 +2615,7 @@ items: "values": false }, "lines": true, - "linewidth": 0, + "linewidth": 1, "links": [ ], @@ -2643,31 +2628,28 @@ items: ], "spaceLength": 10, - "span": 6, - "stack": true, + "span": 2, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "node:cluster_memory_utilisation:ratio{cluster=\"$cluster\"}", + "expr": "sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable_cpu_cores{cluster=\"$cluster\"})", "format": "time_series", + "instant": true, "intervalFactor": 2, - "legendFormat": "{{node}}", - "legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use", - "step": 10 + "refId": "A" } ], - "thresholds": [ - - ], + "thresholds": "70,80", "timeFrom": null, "timeShift": null, - "title": "Memory Utilisation", + "title": "CPU Limits Commitment", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "singlestat", "xaxis": { "buckets": null, "mode": "time", @@ -2679,10 +2661,10 @@ items: }, "yaxes": [ { - "format": "percentunit", + "format": "short", "label": null, "logBase": 1, - "max": 1, + "max": null, "min": 0, "show": true }, @@ -2704,7 +2686,8 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 10, + "fill": 1, + "format": "percentunit", "id": 4, "legend": { "avg": false, @@ -2716,7 +2699,7 @@ items: "values": false }, "lines": true, - "linewidth": 0, + "linewidth": 1, "links": [ ], @@ -2729,31 +2712,28 @@ items: ], "spaceLength": 10, - "span": 6, - "stack": true, + "span": 2, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "node:node_memory_swap_io_bytes:sum_rate{cluster=\"$cluster\"}", + "expr": "1 - sum(:node_memory_MemFreeCachedBuffers_bytes:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable_memory_bytes{cluster=\"$cluster\"})", "format": "time_series", + "instant": true, "intervalFactor": 2, - "legendFormat": "{{node}}", - "legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use", - "step": 10 + "refId": "A" } ], - "thresholds": [ - - ], + "thresholds": "70,80", "timeFrom": null, "timeShift": null, - "title": "Memory Saturation (Swap I/O)", + "title": "Memory Utilisation", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "singlestat", "xaxis": { "buckets": null, "mode": "time", @@ -2765,7 +2745,7 @@ items: }, "yaxes": [ { - "format": "Bps", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -2781,19 +2761,7 @@ items: "show": false } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Memory", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ + }, { "aliasColors": { @@ -2802,7 +2770,8 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 10, + "fill": 1, + "format": "percentunit", "id": 5, "legend": { "avg": false, @@ -2814,7 +2783,7 @@ items: "values": false }, "lines": true, - "linewidth": 0, + "linewidth": 1, "links": [ ], @@ -2827,31 +2796,28 @@ items: ], "spaceLength": 10, - "span": 6, - "stack": true, + "span": 2, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "node:node_disk_utilisation:avg_irate{cluster=\"$cluster\"} / scalar(:kube_pod_info_node_count:{cluster=\"$cluster\"})", + "expr": "sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable_memory_bytes{cluster=\"$cluster\"})", "format": "time_series", + "instant": true, "intervalFactor": 2, - "legendFormat": "{{node}}", - "legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use", - "step": 10 + "refId": "A" } ], - "thresholds": [ - - ], + "thresholds": "70,80", "timeFrom": null, "timeShift": null, - "title": "Disk IO Utilisation", + "title": "Memory Requests Commitment", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "singlestat", "xaxis": { "buckets": null, "mode": "time", @@ -2863,10 +2829,10 @@ items: }, "yaxes": [ { - "format": "percentunit", + "format": "short", "label": null, "logBase": 1, - "max": 1, + "max": null, "min": 0, "show": true }, @@ -2888,7 +2854,8 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 10, + "fill": 1, + "format": "percentunit", "id": 6, "legend": { "avg": false, @@ -2900,7 +2867,7 @@ items: "values": false }, "lines": true, - "linewidth": 0, + "linewidth": 1, "links": [ ], @@ -2913,31 +2880,28 @@ items: ], "spaceLength": 10, - "span": 6, - "stack": true, + "span": 2, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "node:node_disk_saturation:avg_irate{cluster=\"$cluster\"} / scalar(:kube_pod_info_node_count:{cluster=\"$cluster\"})", + "expr": "sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable_memory_bytes{cluster=\"$cluster\"})", "format": "time_series", + "instant": true, "intervalFactor": 2, - "legendFormat": "{{node}}", - "legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use", - "step": 10 + "refId": "A" } ], - "thresholds": [ - - ], + "thresholds": "70,80", "timeFrom": null, "timeShift": null, - "title": "Disk IO Saturation", + "title": "Memory Limits Commitment", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "singlestat", "xaxis": { "buckets": null, "mode": "time", @@ -2949,10 +2913,10 @@ items: }, "yaxes": [ { - "format": "percentunit", + "format": "short", "label": null, "logBase": 1, - "max": 1, + "max": null, "min": 0, "show": true }, @@ -2970,8 +2934,8 @@ items: "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": true, - "title": "Disk", + "showTitle": false, + "title": "Headlines", "titleSize": "h6" }, { @@ -3011,16 +2975,16 @@ items: ], "spaceLength": 10, - "span": 6, + "span": 12, "stack": true, "steppedLine": false, "targets": [ { - "expr": "node:node_net_utilisation:sum_irate{cluster=\"$cluster\"}", + "expr": "sum(namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{node}}", - "legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use", + "legendFormat": "{{namespace}}", + "legendLink": null, "step": 10 } ], @@ -3029,7 +2993,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Net Utilisation (Transmitted)", + "title": "CPU Usage", "tooltip": { "shared": false, "sort": 0, @@ -3047,7 +3011,7 @@ items: }, "yaxes": [ { - "format": "Bps", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -3063,7 +3027,19 @@ items: "show": false } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ { "aliasColors": { @@ -3072,7 +3048,7 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 10, + "fill": 1, "id": 8, "legend": { "avg": false, @@ -3084,7 +3060,7 @@ items: "values": false }, "lines": true, - "linewidth": 0, + "linewidth": 1, "links": [ ], @@ -3097,1923 +3073,34 @@ items: ], "spaceLength": 10, - "span": 6, - "stack": true, + "span": 12, + "stack": false, "steppedLine": false, - "targets": [ + "styles": [ { - "expr": "node:node_net_saturation:sum_irate{cluster=\"$cluster\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{node}}", - "legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use", - "step": 10 - } - ], - "thresholds": [ + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "Pods", + "colorMode": null, + "colors": [ - ], - "timeFrom": null, - "timeShift": null, - "title": "Net Saturation (Dropped)", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "link": true, + "linkTooltip": "Drill down to pods", + "linkUrl": "/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1", + "pattern": "Value #A", + "thresholds": [ - ] - }, - "yaxes": [ - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Network", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 10, - "id": 9, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 0, - "links": [ - - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 12, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum(max(node_filesystem_size_bytes{fstype=~\"ext[234]|btrfs|xfs|zfs\", cluster=\"$cluster\"} - node_filesystem_avail_bytes{fstype=~\"ext[234]|btrfs|xfs|zfs\", cluster=\"$cluster\"}) by (device,pod,namespace)) by (pod,namespace)\n/ scalar(sum(max(node_filesystem_size_bytes{fstype=~\"ext[234]|btrfs|xfs|zfs\", cluster=\"$cluster\"}) by (device,pod,namespace)))\n* on (namespace, pod) group_left (node) node_namespace_pod:kube_pod_info:{cluster=\"$cluster\"}\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{node}}", - "legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use", - "step": 10 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Disk Capacity", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "percentunit", - "label": null, - "logBase": 1, - "max": 1, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Storage", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "kubernetes-mixin" - ], - "templating": { - "list": [ - { - "current": { - "text": "Prometheus", - "value": "Prometheus" - }, - "hide": 0, - "label": null, - "name": "datasource", - "options": [ - - ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 2, - "includeAll": false, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ - - ], - "query": "label_values(kube_node_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "", - "title": "Kubernetes / USE Method / Cluster", - "uid": "a6e7d1362e1ddbb79db21d5bb40d7137", - "version": 0 - } - kind: ConfigMap - metadata: - name: grafana-dashboard-k8s-cluster-rsrc-use - namespace: monitoring -- apiVersion: v1 - data: - k8s-node-rsrc-use.json: |- - { - "annotations": { - "list": [ - - ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "links": [ - - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 1, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "node:node_cpu_utilisation:avg1m{cluster=\"$cluster\", node=\"$node\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Utilisation", - "legendLink": null, - "step": 10 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "CPU Utilisation", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "percentunit", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "node:node_cpu_saturation_load1:{cluster=\"$cluster\", node=\"$node\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Saturation", - "legendLink": null, - "step": 10 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "CPU Saturation (Load1)", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "percentunit", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "CPU", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "node:node_memory_utilisation:{cluster=\"$cluster\", node=\"$node\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Memory", - "legendLink": null, - "step": 10 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Memory Utilisation", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "percentunit", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 4, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "node:node_memory_swap_io_bytes:sum_rate{cluster=\"$cluster\", node=\"$node\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Swap IO", - "legendLink": null, - "step": 10 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Memory Saturation (Swap I/O)", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Memory", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 5, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "node:node_disk_utilisation:avg_irate{cluster=\"$cluster\", node=\"$node\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Utilisation", - "legendLink": null, - "step": 10 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Disk IO Utilisation", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "percentunit", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 6, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "node:node_disk_saturation:avg_irate{cluster=\"$cluster\", node=\"$node\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Saturation", - "legendLink": null, - "step": 10 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Disk IO Saturation", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "percentunit", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Disk", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 7, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "node:node_net_utilisation:sum_irate{cluster=\"$cluster\", node=\"$node\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Utilisation", - "legendLink": null, - "step": 10 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Net Utilisation (Transmitted)", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 8, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "node:node_net_saturation:sum_irate{cluster=\"$cluster\", node=\"$node\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Saturation", - "legendLink": null, - "step": 10 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Net Saturation (Dropped)", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Net", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 9, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "node:node_filesystem_usage:{cluster=\"$cluster\"}\n* on (namespace, pod) group_left (node) node_namespace_pod:kube_pod_info:{cluster=\"$cluster\", node=\"$node\"}\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{device}}", - "legendLink": null, - "step": 10 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Disk Utilisation", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "percentunit", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Disk", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "kubernetes-mixin" - ], - "templating": { - "list": [ - { - "current": { - "text": "Prometheus", - "value": "Prometheus" - }, - "hide": 0, - "label": null, - "name": "datasource", - "options": [ - - ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 2, - "includeAll": false, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ - - ], - "query": "label_values(kube_node_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "node", - "multi": false, - "name": "node", - "options": [ - - ], - "query": "label_values(kube_node_info{cluster=\"$cluster\"}, node)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "", - "title": "Kubernetes / USE Method / Node", - "uid": "4ac4f123aae0ff6dbaf4f4f66120033b", - "version": 0 - } - kind: ConfigMap - metadata: - name: grafana-dashboard-k8s-node-rsrc-use - namespace: monitoring -- apiVersion: v1 - data: - k8s-resources-cluster.json: |- - { - "annotations": { - "list": [ - - ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "links": [ - - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "100px", - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "format": "percentunit", - "id": 1, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "1 - avg(rate(node_cpu_seconds_total{mode=\"idle\", cluster=\"$cluster\"}[1m]))", - "format": "time_series", - "instant": true, - "intervalFactor": 2, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "CPU Utilisation", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "format": "percentunit", - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable_cpu_cores{cluster=\"$cluster\"})", - "format": "time_series", - "instant": true, - "intervalFactor": 2, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "CPU Requests Commitment", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "format": "percentunit", - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable_cpu_cores{cluster=\"$cluster\"})", - "format": "time_series", - "instant": true, - "intervalFactor": 2, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "CPU Limits Commitment", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "format": "percentunit", - "id": 4, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "1 - sum(:node_memory_MemFreeCachedBuffers_bytes:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable_memory_bytes{cluster=\"$cluster\"})", - "format": "time_series", - "instant": true, - "intervalFactor": 2, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Memory Utilisation", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "format": "percentunit", - "id": 5, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable_memory_bytes{cluster=\"$cluster\"})", - "format": "time_series", - "instant": true, - "intervalFactor": 2, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Memory Requests Commitment", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "format": "percentunit", - "id": 6, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable_memory_bytes{cluster=\"$cluster\"})", - "format": "time_series", - "instant": true, - "intervalFactor": 2, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Memory Limits Commitment", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Headlines", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 10, - "id": 7, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 0, - "links": [ - - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 12, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum(namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{namespace}}", - "legendLink": null, - "step": 10 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "CPU Usage", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "CPU", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 8, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "Pods", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "link": true, - "linkTooltip": "Drill down to pods", - "linkUrl": "/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1", - "pattern": "Value #A", - "thresholds": [ - - ], - "type": "number", - "unit": "short" - }, + ], + "type": "number", + "unit": "short" + }, { "alias": "Workloads", "colorMode": null, @@ -6205,7 +4292,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}) by (pod)", + "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}) by (pod)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", @@ -6488,7 +4575,7 @@ items: ], "targets": [ { - "expr": "sum(container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)", + "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -6506,7 +4593,7 @@ items: "step": 10 }, { - "expr": "sum(container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod) / sum(kube_pod_container_resource_requests_memory_bytes{namespace=\"$namespace\"}) by (pod)", + "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod) / sum(kube_pod_container_resource_requests_memory_bytes{namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -6524,7 +4611,7 @@ items: "step": 10 }, { - "expr": "sum(container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod) / sum(kube_pod_container_resource_limits_memory_bytes{namespace=\"$namespace\"}) by (pod)", + "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod) / sum(kube_pod_container_resource_limits_memory_bytes{namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -7451,7 +5538,7 @@ items: ], "targets": [ { - "expr": "sum(container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\", container!=\"\"}) by (container)", + "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\", container!=\"\"}) by (container)", "format": "table", "instant": true, "intervalFactor": 2, @@ -7469,7 +5556,7 @@ items: "step": 10 }, { - "expr": "sum(container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_container_resource_requests_memory_bytes{namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_container_resource_requests_memory_bytes{namespace=\"$namespace\", pod=\"$pod\"}) by (container)", "format": "table", "instant": true, "intervalFactor": 2, @@ -7487,7 +5574,7 @@ items: "step": 10 }, { - "expr": "sum(container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\"}) by (container) / sum(kube_pod_container_resource_limits_memory_bytes{namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\"}) by (container) / sum(kube_pod_container_resource_limits_memory_bytes{namespace=\"$namespace\", pod=\"$pod\"}) by (container)", "format": "table", "instant": true, "intervalFactor": 2, @@ -8142,7 +6229,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(\n container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", @@ -8371,7 +6458,7 @@ items: ], "targets": [ { - "expr": "sum(\n container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", "format": "table", "instant": true, "intervalFactor": 2, @@ -8389,7 +6476,7 @@ items: "step": 10 }, { - "expr": "sum(\n container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", "format": "table", "instant": true, "intervalFactor": 2, @@ -8407,7 +6494,7 @@ items: "step": 10 }, { - "expr": "sum(\n container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", "format": "table", "instant": true, "intervalFactor": 2, @@ -9107,7 +7194,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(\n container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", + "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{workload}} - {{workload_type}}", @@ -9381,7 +7468,7 @@ items: "step": 10 }, { - "expr": "sum(\n container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", + "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", "format": "table", "instant": true, "intervalFactor": 2, @@ -9399,7 +7486,7 @@ items: "step": 10 }, { - "expr": "sum(\n container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", + "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", "format": "table", "instant": true, "intervalFactor": 2, @@ -9417,7 +7504,7 @@ items: "step": 10 }, { - "expr": "sum(\n container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", + "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", "format": "table", "instant": true, "intervalFactor": 2, @@ -14040,36 +12127,16 @@ items: ], "spaceLength": 10, "span": 6, - "stack": false, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "max(node_load1{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"})", + "expr": "(\n (1 - rate(node_cpu_seconds_total{job=\"node-exporter\", mode=\"idle\", instance=\"$instance\"}[$__interval]))\n/ ignoring(cpu) group_left\n count without (cpu)( node_cpu_seconds_total{job=\"node-exporter\", mode=\"idle\", instance=\"$instance\"})\n)\n", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "load 1m", + "interval": "1m", + "intervalFactor": 5, + "legendFormat": "{{cpu}}", "refId": "A" - }, - { - "expr": "max(node_load5{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "load 5m", - "refId": "B" - }, - { - "expr": "max(node_load15{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "load 15m", - "refId": "C" - }, - { - "expr": "count(node_cpu_seconds_total{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\", mode=\"user\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "logical cores", - "refId": "D" } ], "thresholds": [ @@ -14077,7 +12144,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "System load", + "title": "CPU Usage", "tooltip": { "shared": false, "sort": 0, @@ -14095,19 +12162,19 @@ items: }, "yaxes": [ { - "format": "short", + "format": "percentunit", "label": null, "logBase": 1, - "max": null, - "min": null, + "max": 1, + "min": 0, "show": true }, { - "format": "short", + "format": "percentunit", "label": null, "logBase": 1, - "max": null, - "min": null, + "max": 1, + "min": 0, "show": true } ] @@ -14120,7 +12187,7 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, + "fill": 0, "gridPos": { }, @@ -14156,115 +12223,32 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum by (cpu) (irate(node_cpu_seconds_total{cluster=\"$cluster\", job=\"node-exporter\", mode!=\"idle\", instance=\"$instance\"}[5m]))", + "expr": "node_load1{job=\"node-exporter\", instance=\"$instance\"}", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{cpu}}", + "legendFormat": "1m load average", "refId": "A" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Usage Per Core", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ + }, { - "format": "percentunit", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "expr": "node_load5{job=\"node-exporter\", instance=\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "5m load average", + "refId": "B" }, { - "format": "percentunit", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": false, - "collapsed": false, - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { - - }, - "id": 4, - "legend": { - "alignAsTable": "true", - "avg": "true", - "current": "true", - "max": "false", - "min": "false", - "rightSide": "true", - "show": "true", - "total": "false", - "values": "true" - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 9, - "stack": false, - "steppedLine": false, - "targets": [ + "expr": "node_load15{job=\"node-exporter\", instance=\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "15m load average", + "refId": "C" + }, { - "expr": "max (sum by (cpu) (irate(node_cpu_seconds_total{cluster=\"$cluster\", job=\"node-exporter\", mode!=\"idle\", instance=\"$instance\"}[2m])) ) * 100\n", + "expr": "count(node_cpu_seconds_total{job=\"node-exporter\", instance=\"$instance\", mode=\"idle\"})", "format": "time_series", - "intervalFactor": 10, - "legendFormat": "{{ cpu }}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "logical cores", + "refId": "D" } ], "thresholds": [ @@ -14272,7 +12256,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "CPU Utilization", + "title": "Load Average", "tooltip": { "shared": false, "sort": 0, @@ -14290,106 +12274,22 @@ items: }, "yaxes": [ { - "format": "percent", + "format": "short", "label": null, "logBase": 1, - "max": 100, + "max": null, "min": 0, "show": true }, { - "format": "percent", + "format": "short", "label": null, "logBase": 1, - "max": 100, + "max": null, "min": 0, "show": true } ] - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": "$datasource", - "format": "percent", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": true, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - - }, - "id": 5, - "interval": null, - "links": [ - - ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "avg(sum by (cpu) (irate(node_cpu_seconds_total{cluster=\"$cluster\", job=\"node-exporter\", mode!=\"idle\", instance=\"$instance\"}[2m]))) * 100\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": "80, 90", - "title": "CPU Usage", - "tooltip": { - "shared": false - }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" } ], "repeat": null, @@ -14416,7 +12316,7 @@ items: "gridPos": { }, - "id": 6, + "id": 4, "legend": { "alignAsTable": false, "avg": false, @@ -14444,32 +12344,32 @@ items: ], "spaceLength": 10, "span": 9, - "stack": false, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "max(\n node_memory_MemTotal_bytes{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}\n - node_memory_MemFree_bytes{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}\n - node_memory_Buffers_bytes{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}\n - node_memory_Cached_bytes{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}\n)\n", + "expr": "(\n node_memory_MemTotal_bytes{job=\"node-exporter\", instance=\"$instance\"}\n-\n node_memory_MemFree_bytes{job=\"node-exporter\", instance=\"$instance\"}\n-\n node_memory_Buffers_bytes{job=\"node-exporter\", instance=\"$instance\"}\n-\n node_memory_Cached_bytes{job=\"node-exporter\", instance=\"$instance\"}\n)\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "memory used", "refId": "A" }, { - "expr": "max(node_memory_Buffers_bytes{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"})", + "expr": "node_memory_Buffers_bytes{job=\"node-exporter\", instance=\"$instance\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "memory buffers", "refId": "B" }, { - "expr": "max(node_memory_Cached_bytes{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"})", + "expr": "node_memory_Cached_bytes{job=\"node-exporter\", instance=\"$instance\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "memory cached", "refId": "C" }, { - "expr": "max(node_memory_MemFree_bytes{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"})", + "expr": "node_memory_MemFree_bytes{job=\"node-exporter\", instance=\"$instance\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "memory free", @@ -14503,7 +12403,7 @@ items: "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true }, { @@ -14511,7 +12411,7 @@ items: "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true } ] @@ -14537,7 +12437,7 @@ items: "gridPos": { }, - "id": 7, + "id": 5, "interval": null, "links": [ @@ -14577,7 +12477,7 @@ items: "tableColumn": "", "targets": [ { - "expr": "max(\n (\n (\n node_memory_MemTotal_bytes{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}\n - node_memory_MemFree_bytes{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}\n - node_memory_Buffers_bytes{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}\n - node_memory_Cached_bytes{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}\n )\n / node_memory_MemTotal_bytes{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}\n ) * 100)\n", + "expr": "100 -\n(\n node_memory_MemAvailable_bytes{job=\"node-exporter\", instance=\"$instance\"}\n/\n node_memory_MemTotal_bytes{job=\"node-exporter\", instance=\"$instance\"}\n* 100\n)\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "", @@ -14621,11 +12521,11 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, + "fill": 0, "gridPos": { }, - "id": 8, + "id": 6, "legend": { "alignAsTable": false, "avg": false, @@ -14650,11 +12550,11 @@ items: "repeat": null, "seriesOverrides": [ { - "alias": "read", + "alias": "/ read| written/", "yaxis": 1 }, { - "alias": "io time", + "alias": "/ io time/", "yaxis": 2 } ], @@ -14664,24 +12564,27 @@ items: "steppedLine": false, "targets": [ { - "expr": "max(rate(node_disk_read_bytes_total{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}[2m]))", + "expr": "rate(node_disk_read_bytes_total{job=\"node-exporter\", instance=\"$instance\", device=~\"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+\"}[$__interval])", "format": "time_series", + "interval": "1m", "intervalFactor": 2, - "legendFormat": "read", + "legendFormat": "{{device}} read", "refId": "A" }, { - "expr": "max(rate(node_disk_written_bytes_total{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}[2m]))", + "expr": "rate(node_disk_written_bytes_total{job=\"node-exporter\", instance=\"$instance\", device=~\"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+\"}[$__interval])", "format": "time_series", + "interval": "1m", "intervalFactor": 2, - "legendFormat": "written", + "legendFormat": "{{device}} written", "refId": "B" }, { - "expr": "max(rate(node_disk_io_time_seconds_total{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}[2m]))", + "expr": "rate(node_disk_io_time_seconds_total{job=\"node-exporter\", instance=\"$instance\", device=~\"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+\"}[$__interval])", "format": "time_series", + "interval": "1m", "intervalFactor": 2, - "legendFormat": "io time", + "legendFormat": "{{device}} io time", "refId": "C" } ], @@ -14737,7 +12640,7 @@ items: "gridPos": { }, - "id": 9, + "id": 7, "legend": { "alignAsTable": false, "avg": false, @@ -14761,25 +12664,32 @@ items: "renderer": "flot", "repeat": null, "seriesOverrides": [ - + { + "alias": "used", + "color": "#E0B400" + }, + { + "alias": "available", + "color": "#73BF69" + } ], "spaceLength": 10, "span": 6, - "stack": false, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "node:node_filesystem_usage:{cluster=\"$cluster\", instance=\"$instance\"}", + "expr": "sum(\n max by (device) (\n node_filesystem_size_bytes{job=\"node-exporter\", instance=\"$instance\", }\n -\n node_filesystem_avail_bytes{job=\"node-exporter\", instance=\"$instance\", }\n )\n)\n", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{device}} disk used", + "legendFormat": "used", "refId": "A" }, { - "expr": "node:node_filesystem_usage:{cluster=\"$cluster\", instance=\"$instance\"}", + "expr": "sum(\n max by (device) (\n node_filesystem_avail_bytes{job=\"node-exporter\", instance=\"$instance\", }\n )\n)\n", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{device}} disk free", + "legendFormat": "available", "refId": "B" } ], @@ -14806,19 +12716,19 @@ items: }, "yaxes": [ { - "format": "percentunit", + "format": "bytes", "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true }, { - "format": "percentunit", + "format": "bytes", "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true } ] @@ -14844,11 +12754,11 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, + "fill": 0, "gridPos": { }, - "id": 10, + "id": 8, "legend": { "alignAsTable": false, "avg": false, @@ -14876,12 +12786,13 @@ items: ], "spaceLength": 10, "span": 6, - "stack": true, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "rate(node_network_receive_bytes_total{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\", device!~\"lo\"}[5m])", + "expr": "rate(node_network_receive_bytes_total{job=\"node-exporter\", instance=\"$instance\", device!=\"lo\"}[$__interval])", "format": "time_series", + "interval": "1m", "intervalFactor": 2, "legendFormat": "{{device}}", "refId": "A" @@ -14914,7 +12825,7 @@ items: "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true }, { @@ -14922,7 +12833,7 @@ items: "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true } ] @@ -14935,11 +12846,11 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, + "fill": 0, "gridPos": { }, - "id": 11, + "id": 9, "legend": { "alignAsTable": false, "avg": false, @@ -14967,12 +12878,13 @@ items: ], "spaceLength": 10, "span": 6, - "stack": true, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "rate(node_network_transmit_bytes_total{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\", device!~\"lo\"}[5m])", + "expr": "rate(node_network_transmit_bytes_total{job=\"node-exporter\", instance=\"$instance\", device!=\"lo\"}[$__interval])", "format": "time_series", + "interval": "1m", "intervalFactor": 2, "legendFormat": "{{device}}", "refId": "A" @@ -15005,7 +12917,7 @@ items: "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true }, { @@ -15013,205 +12925,10 @@ items: "label": null, "logBase": 1, "max": null, - "min": null, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": false, - "collapsed": false, - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { - - }, - "id": 12, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 9, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "max(\n node_filesystem_files{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}\n - node_filesystem_files_free{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}\n)\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "inodes used", - "refId": "A" - }, - { - "expr": "max(node_filesystem_files_free{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "inodes free", - "refId": "B" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Inodes Usage", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, + "min": 0, "show": true } ] - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": "$datasource", - "format": "percent", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": true, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - - }, - "id": 13, - "interval": null, - "links": [ - - ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "max(\n (\n (\n node_filesystem_files{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}\n - node_filesystem_files_free{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}\n )\n / node_filesystem_files{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}\n ) * 100)\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": "80, 90", - "title": "Inodes Usage", - "tooltip": { - "shared": false - }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" } ], "repeat": null, @@ -15226,7 +12943,7 @@ items: "schemaVersion": 14, "style": "dark", "tags": [ - "kubernetes-mixin" + ], "templating": { "list": [ @@ -15250,32 +12967,6 @@ items: "allValue": null, "current": { - }, - "datasource": "$datasource", - "hide": 2, - "includeAll": false, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ - - ], - "query": "label_values(kube_pod_info, cluster)", - "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - }, "datasource": "$datasource", "hide": 0, @@ -15286,7 +12977,7 @@ items: "options": [ ], - "query": "label_values(node_boot_time_seconds{cluster=\"$cluster\", job=\"node-exporter\"}, instance)", + "query": "label_values(node_exporter_build_info{job=\"node-exporter\"}, instance)", "refresh": 2, "regex": "", "sort": 0, @@ -15330,7 +13021,7 @@ items: ] }, "timezone": "", - "title": "Kubernetes / Nodes", + "title": "Nodes", "uid": "fa49a4706d07a042595b664c87fb33ea", "version": 0 } @@ -16002,7 +13693,7 @@ items: "refId": "C" }, { - "expr": "sum by(container) (container_memory_cache{job=\"kubelet\", namespace=\"$namespace\", pod=~\"$pod\", container=~\"$container\", container!=\"POD\"})", + "expr": "sum by(container) (container_memory_cache{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\", container=~\"$container\", container!=\"POD\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "Cache: {{ container }}", @@ -21794,7 +19485,7 @@ items: "options": [ ], - "query": "label_values(kube_statefulset_metadata_generation{job=\"kube-state-metrics\"}, namespace)", + "query": "label_values(kube_statefulset_metadata_generation{job=\"kube-state-metrics\"}, cluster=\"$cluster\", namespace)", "refresh": 2, "regex": "", "sort": 0, @@ -21820,7 +19511,7 @@ items: "options": [ ], - "query": "label_values(kube_statefulset_metadata_generation{job=\"kube-state-metrics\", namespace=\"$namespace\"}, statefulset)", + "query": "label_values(kube_statefulset_metadata_generation{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\"}, statefulset)", "refresh": 2, "regex": "", "sort": 0, diff --git a/manifests/grafana-deployment.yaml b/manifests/grafana-deployment.yaml index 9fd37eb2..4fe23fa9 100644 --- a/manifests/grafana-deployment.yaml +++ b/manifests/grafana-deployment.yaml @@ -48,12 +48,6 @@ spec: - mountPath: /grafana-dashboard-definitions/0/controller-manager name: grafana-dashboard-controller-manager readOnly: false - - mountPath: /grafana-dashboard-definitions/0/k8s-cluster-rsrc-use - name: grafana-dashboard-k8s-cluster-rsrc-use - readOnly: false - - mountPath: /grafana-dashboard-definitions/0/k8s-node-rsrc-use - name: grafana-dashboard-k8s-node-rsrc-use - readOnly: false - mountPath: /grafana-dashboard-definitions/0/k8s-resources-cluster name: grafana-dashboard-k8s-resources-cluster readOnly: false @@ -123,12 +117,6 @@ spec: - configMap: name: grafana-dashboard-controller-manager name: grafana-dashboard-controller-manager - - configMap: - name: grafana-dashboard-k8s-cluster-rsrc-use - name: grafana-dashboard-k8s-cluster-rsrc-use - - configMap: - name: grafana-dashboard-k8s-node-rsrc-use - name: grafana-dashboard-k8s-node-rsrc-use - configMap: name: grafana-dashboard-k8s-resources-cluster name: grafana-dashboard-k8s-resources-cluster diff --git a/manifests/prometheus-rules.yaml b/manifests/prometheus-rules.yaml index eeee6375..65db451f 100644 --- a/manifests/prometheus-rules.yaml +++ b/manifests/prometheus-rules.yaml @@ -82,13 +82,6 @@ spec: - expr: | sum(container_memory_usage_bytes{job="kubelet", image!="", container!="POD"}) by (namespace) record: namespace:container_memory_usage_bytes:sum - - expr: | - sum by (namespace, label_name) ( - sum(container_memory_usage_bytes{job="kubelet",image!="", container!="POD"}) by (pod, namespace) - * on (namespace, pod) - group_left(label_name) kube_pod_labels{job="kube-state-metrics"} - ) - record: namespace:container_memory_usage_bytes:sum - expr: | sum by (namespace, label_name) ( sum(kube_pod_container_resource_requests_memory_bytes{job="kube-state-metrics"} * on (endpoint, instance, job, namespace, pod, service) group_left(phase) (kube_pod_status_phase{phase=~"^(Pending|Running)$"} == 1)) by (namespace, pod) @@ -214,177 +207,9 @@ spec: node_namespace_pod:kube_pod_info: )) record: node:node_num_cpu:sum - - expr: | - 1 - avg(rate(node_cpu_seconds_total{job="node-exporter",mode="idle"}[1m])) - record: :node_cpu_utilisation:avg1m - - expr: | - 1 - avg by (node) ( - rate(node_cpu_seconds_total{job="node-exporter",mode="idle"}[1m]) - * on (namespace, pod) group_left(node) - node_namespace_pod:kube_pod_info:) - record: node:node_cpu_utilisation:avg1m - - expr: | - node:node_cpu_utilisation:avg1m - * - node:node_num_cpu:sum - / - scalar(sum(node:node_num_cpu:sum)) - record: node:cluster_cpu_utilisation:ratio - - expr: | - sum(node_load1{job="node-exporter"}) - / - sum(node:node_num_cpu:sum) - record: ':node_cpu_saturation_load1:' - - expr: | - sum by (node) ( - node_load1{job="node-exporter"} - * on (namespace, pod) group_left(node) - node_namespace_pod:kube_pod_info: - ) - / - node:node_num_cpu:sum - record: 'node:node_cpu_saturation_load1:' - - expr: | - 1 - - sum(node_memory_MemFree_bytes{job="node-exporter"} + node_memory_Cached_bytes{job="node-exporter"} + node_memory_Buffers_bytes{job="node-exporter"}) - / - sum(node_memory_MemTotal_bytes{job="node-exporter"}) - record: ':node_memory_utilisation:' - expr: | sum(node_memory_MemFree_bytes{job="node-exporter"} + node_memory_Cached_bytes{job="node-exporter"} + node_memory_Buffers_bytes{job="node-exporter"}) record: :node_memory_MemFreeCachedBuffers_bytes:sum - - expr: | - sum(node_memory_MemTotal_bytes{job="node-exporter"}) - record: :node_memory_MemTotal_bytes:sum - - expr: | - sum by (node) ( - (node_memory_MemFree_bytes{job="node-exporter"} + node_memory_Cached_bytes{job="node-exporter"} + node_memory_Buffers_bytes{job="node-exporter"}) - * on (namespace, pod) group_left(node) - node_namespace_pod:kube_pod_info: - ) - record: node:node_memory_bytes_available:sum - - expr: | - sum by (node) ( - node_memory_MemTotal_bytes{job="node-exporter"} - * on (namespace, pod) group_left(node) - node_namespace_pod:kube_pod_info: - ) - record: node:node_memory_bytes_total:sum - - expr: | - (node:node_memory_bytes_total:sum - node:node_memory_bytes_available:sum) - / - node:node_memory_bytes_total:sum - record: node:node_memory_utilisation:ratio - - expr: | - (node:node_memory_bytes_total:sum - node:node_memory_bytes_available:sum) - / - scalar(sum(node:node_memory_bytes_total:sum)) - record: node:cluster_memory_utilisation:ratio - - expr: | - 1e3 * sum( - (rate(node_vmstat_pgpgin{job="node-exporter"}[1m]) - + rate(node_vmstat_pgpgout{job="node-exporter"}[1m])) - ) - record: :node_memory_swap_io_bytes:sum_rate - - expr: | - 1 - - sum by (node) ( - (node_memory_MemFree_bytes{job="node-exporter"} + node_memory_Cached_bytes{job="node-exporter"} + node_memory_Buffers_bytes{job="node-exporter"}) - * on (namespace, pod) group_left(node) - node_namespace_pod:kube_pod_info: - ) - / - sum by (node) ( - node_memory_MemTotal_bytes{job="node-exporter"} - * on (namespace, pod) group_left(node) - node_namespace_pod:kube_pod_info: - ) - record: 'node:node_memory_utilisation:' - - expr: | - 1 - (node:node_memory_bytes_available:sum / node:node_memory_bytes_total:sum) - record: 'node:node_memory_utilisation_2:' - - expr: | - 1e3 * sum by (node) ( - (rate(node_vmstat_pgpgin{job="node-exporter"}[1m]) - + rate(node_vmstat_pgpgout{job="node-exporter"}[1m])) - * on (namespace, pod) group_left(node) - node_namespace_pod:kube_pod_info: - ) - record: node:node_memory_swap_io_bytes:sum_rate - - expr: | - avg(irate(node_disk_io_time_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m])) - record: :node_disk_utilisation:avg_irate - - expr: | - avg by (node) ( - irate(node_disk_io_time_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m]) - * on (namespace, pod) group_left(node) - node_namespace_pod:kube_pod_info: - ) - record: node:node_disk_utilisation:avg_irate - - expr: | - avg(irate(node_disk_io_time_weighted_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m])) - record: :node_disk_saturation:avg_irate - - expr: | - avg by (node) ( - irate(node_disk_io_time_weighted_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m]) - * on (namespace, pod) group_left(node) - node_namespace_pod:kube_pod_info: - ) - record: node:node_disk_saturation:avg_irate - - expr: | - max by (instance, namespace, pod, device) ((node_filesystem_size_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"} - - node_filesystem_avail_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"}) - / node_filesystem_size_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"}) - record: 'node:node_filesystem_usage:' - - expr: | - max by (instance, namespace, pod, device) (node_filesystem_avail_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"} / node_filesystem_size_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"}) - record: 'node:node_filesystem_avail:' - - expr: | - sum(irate(node_network_receive_bytes_total{job="node-exporter",device!~"veth.+"}[1m])) + - sum(irate(node_network_transmit_bytes_total{job="node-exporter",device!~"veth.+"}[1m])) - record: :node_net_utilisation:sum_irate - - expr: | - sum by (node) ( - (irate(node_network_receive_bytes_total{job="node-exporter",device!~"veth.+"}[1m]) + - irate(node_network_transmit_bytes_total{job="node-exporter",device!~"veth.+"}[1m])) - * on (namespace, pod) group_left(node) - node_namespace_pod:kube_pod_info: - ) - record: node:node_net_utilisation:sum_irate - - expr: | - sum(irate(node_network_receive_drop_total{job="node-exporter",device!~"veth.+"}[1m])) + - sum(irate(node_network_transmit_drop_total{job="node-exporter",device!~"veth.+"}[1m])) - record: :node_net_saturation:sum_irate - - expr: | - sum by (node) ( - (irate(node_network_receive_drop_total{job="node-exporter",device!~"veth.+"}[1m]) + - irate(node_network_transmit_drop_total{job="node-exporter",device!~"veth.+"}[1m])) - * on (namespace, pod) group_left(node) - node_namespace_pod:kube_pod_info: - ) - record: node:node_net_saturation:sum_irate - - expr: | - max( - max( - kube_pod_info{job="kube-state-metrics", host_ip!=""} - ) by (node, host_ip) - * on (host_ip) group_right (node) - label_replace( - (max(node_filesystem_files{job="node-exporter", mountpoint="/"}) by (instance)), "host_ip", "$1", "instance", "(.*):.*" - ) - ) by (node) - record: 'node:node_inodes_total:' - - expr: | - max( - max( - kube_pod_info{job="kube-state-metrics", host_ip!=""} - ) by (node, host_ip) - * on (host_ip) group_right (node) - label_replace( - (max(node_filesystem_files_free{job="node-exporter", mountpoint="/"}) by (instance)), "host_ip", "$1", "instance", "(.*):.*" - ) - ) by (node) - record: 'node:node_inodes_free:' - name: kube-prometheus-node-recording.rules rules: - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[3m])) BY @@ -663,17 +488,17 @@ spec: runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepodcrashlooping expr: | rate(kube_pod_container_status_restarts_total{job="kube-state-metrics"}[15m]) * 60 * 5 > 0 - for: 1h + for: 15m labels: severity: critical - alert: KubePodNotReady annotations: message: Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in a non-ready - state for longer than an hour. + state for longer than 15 minutes. runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepodnotready expr: | sum by (namespace, pod) (kube_pod_status_phase{job="kube-state-metrics", phase=~"Failed|Pending|Unknown"}) > 0 - for: 1h + for: 15m labels: severity: critical - alert: KubeDeploymentGenerationMismatch @@ -692,13 +517,13 @@ spec: - alert: KubeDeploymentReplicasMismatch annotations: message: Deployment {{ $labels.namespace }}/{{ $labels.deployment }} has not - matched the expected number of replicas for longer than an hour. + matched the expected number of replicas for longer than 15 minutes. runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedeploymentreplicasmismatch expr: | kube_deployment_spec_replicas{job="kube-state-metrics"} != kube_deployment_status_replicas_available{job="kube-state-metrics"} - for: 1h + for: 15m labels: severity: critical - alert: KubeStatefulSetReplicasMismatch @@ -806,7 +631,7 @@ spec: runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubejobfailed expr: | kube_job_status_failed{job="kube-state-metrics"} > 0 - for: 1h + for: 15m labels: severity: warning - name: kubernetes-resources @@ -940,7 +765,7 @@ spec: runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubenodenotready expr: | kube_node_status_condition{job="kube-state-metrics",condition="Ready",status="true"} == 0 - for: 1h + for: 15m labels: severity: warning - alert: KubeVersionMismatch @@ -950,7 +775,7 @@ spec: runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeversionmismatch expr: | count(count by (gitVersion) (label_replace(kubernetes_build_info{job!~"kube-dns|coredns"},"gitVersion","$1","gitVersion","(v[0-9]*.[0-9]*.[0-9]*).*"))) > 1 - for: 1h + for: 15m labels: severity: warning - alert: KubeClientErrors -- GitLab