From 888443e447cc0dd6b131da21e0103e6ade29e842 Mon Sep 17 00:00:00 2001 From: paulfantom <pawel@krupa.net.pl> Date: Tue, 25 May 2021 16:03:49 +0200 Subject: [PATCH] manifests: regenerate --- manifests/grafana-dashboardDefinitions.yaml | 24498 ++++++++-------- manifests/grafana-deployment.yaml | 6 - .../kube-state-metrics-prometheusRule.yaml | 23 + manifests/kubernetes-prometheusRule.yaml | 236 +- manifests/prometheus-prometheusRule.yaml | 10 + .../setup/prometheus-operator-deployment.yaml | 2 + 6 files changed, 12776 insertions(+), 11999 deletions(-) diff --git a/manifests/grafana-dashboardDefinitions.yaml b/manifests/grafana-dashboardDefinitions.yaml index 2ab0f97b..267433db 100644 --- a/manifests/grafana-dashboardDefinitions.yaml +++ b/manifests/grafana-dashboardDefinitions.yaml @@ -7199,124 +7199,10 @@ items: "showTitle": true, "title": "Rate of Packets Dropped", "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "kubernetes-mixin" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": null, - "name": "datasource", - "options": [ - - ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": null, - "current": { - "text": "", - "value": "" - }, - "datasource": "$datasource", - "hide": 2, - "includeAll": false, - "label": null, - "multi": false, - "name": "cluster", - "options": [ - - ], - "query": "label_values(node_cpu_seconds_total, cluster)", - "refresh": 2, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "UTC", - "title": "Kubernetes / Compute Resources / Cluster", - "uid": "efa86fd1d0c121a26444b636a3f509a8", - "version": 0 - } - kind: ConfigMap - metadata: - labels: - app.kubernetes.io/component: grafana - app.kubernetes.io/name: grafana - app.kubernetes.io/part-of: kube-prometheus - app.kubernetes.io/version: 7.5.4 - name: grafana-dashboard-k8s-resources-cluster - namespace: monitoring -- apiVersion: v1 - data: - k8s-resources-namespace.json: |- - { - "annotations": { - "list": [ - - ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "links": [ - - ], - "refresh": "10s", - "rows": [ + }, { "collapse": false, - "height": "100px", + "height": "250px", "panels": [ { "aliasColors": { @@ -7326,9 +7212,9 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "format": "percentunit", - "id": 1, + "decimals": -1, + "fill": 10, + "id": 20, "legend": { "avg": false, "current": false, @@ -7339,7 +7225,7 @@ items: "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], @@ -7352,28 +7238,31 @@ items: ], "spaceLength": 10, - "span": 3, - "stack": false, + "span": 6, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})", + "expr": "ceil(sum by(namespace) (rate(container_fs_reads_total{container!=\"\", cluster=\"$cluster\"}[5m]) + rate(container_fs_writes_total{container!=\"\", cluster=\"$cluster\"}[5m])))", "format": "time_series", - "instant": true, "intervalFactor": 2, - "refId": "A" + "legendFormat": "{{namespace}}", + "legendLink": null, + "step": 10 } ], - "thresholds": "70,80", + "thresholds": [ + + ], "timeFrom": null, "timeShift": null, - "title": "CPU Utilisation (from requests)", + "title": "IOPS(Reads+Writes)", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "type": "singlestat", + "type": "graph", "xaxis": { "buckets": null, "mode": "time", @@ -7410,9 +7299,8 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "format": "percentunit", - "id": 2, + "fill": 10, + "id": 21, "legend": { "avg": false, "current": false, @@ -7423,7 +7311,7 @@ items: "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], @@ -7436,28 +7324,31 @@ items: ], "spaceLength": 10, - "span": 3, - "stack": false, + "span": 6, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})", + "expr": "sum by(namespace) (rate(container_fs_reads_bytes_total{container!=\"\", cluster=\"$cluster\"}[5m]) + rate(container_fs_writes_bytes_total{container!=\"\", cluster=\"$cluster\"}[5m]))", "format": "time_series", - "instant": true, "intervalFactor": 2, - "refId": "A" + "legendFormat": "{{namespace}}", + "legendLink": null, + "step": 10 } ], - "thresholds": "70,80", + "thresholds": [ + + ], "timeFrom": null, "timeShift": null, - "title": "CPU Utilisation (from limits)", + "title": "ThroughPut(Read+Write)", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "type": "singlestat", + "type": "graph", "xaxis": { "buckets": null, "mode": "time", @@ -7469,7 +7360,7 @@ items: }, "yaxes": [ { - "format": "short", + "format": "Bps", "label": null, "logBase": 1, "max": null, @@ -7485,7 +7376,19 @@ items: "show": false } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Storage IO", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ { "aliasColors": { @@ -7495,8 +7398,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, - "format": "percentunit", - "id": 3, + "id": 22, "legend": { "avg": false, "current": false, @@ -7519,113 +7421,239 @@ items: "seriesOverrides": [ ], + "sort": { + "col": 4, + "desc": true + }, "spaceLength": 10, - "span": 3, + "span": 12, "stack": false, "steppedLine": false, - "targets": [ + "styles": [ { - "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) / sum(kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"})", - "format": "time_series", - "instant": true, - "intervalFactor": 2, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Memory Utilization (from requests)", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true + "alias": "IOPS(Reads)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": -1, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "short" }, { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { + "alias": "IOPS(Writes)", + "colorMode": null, + "colors": [ - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "format": "percentunit", - "id": 4, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": -1, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ + ], + "type": "number", + "unit": "short" + }, + { + "alias": "IOPS(Reads + Writes)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": -1, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "Throughput(Read)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Throughput(Write)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Throughput(Read + Write)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Namespace", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTargetBlank": false, + "linkTooltip": "Drill down to pods", + "linkUrl": "./d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell", + "pattern": "namespace", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + ], + "type": "string", + "unit": "short" + } ], - "spaceLength": 10, - "span": 3, - "stack": false, - "steppedLine": false, "targets": [ { - "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) / sum(kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"})", - "format": "time_series", + "expr": "sum by(namespace) (rate(container_fs_reads_total{container!=\"\", cluster=\"$cluster\"}[5m]))", + "format": "table", "instant": true, "intervalFactor": 2, - "refId": "A" + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sum by(namespace) (rate(container_fs_writes_total{container!=\"\", cluster=\"$cluster\"}[5m]))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum by(namespace) (rate(container_fs_reads_total{container!=\"\", cluster=\"$cluster\"}[5m]) + rate(container_fs_writes_total{container!=\"\", cluster=\"$cluster\"}[5m]))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum by(namespace) (rate(container_fs_reads_bytes_total{container!=\"\", cluster=\"$cluster\"}[5m]))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum by(namespace) (rate(container_fs_writes_bytes_total{container!=\"\", cluster=\"$cluster\"}[5m]))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + }, + { + "expr": "sum by(namespace) (rate(container_fs_reads_bytes_total{container!=\"\", cluster=\"$cluster\"}[5m]) + rate(container_fs_writes_bytes_total{container!=\"\", cluster=\"$cluster\"}[5m]))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 } ], - "thresholds": "70,80", + "thresholds": [ + + ], "timeFrom": null, "timeShift": null, - "title": "Memory Utilisation (from limits)", + "title": "Current Storage IO", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "type": "singlestat", + "transform": "table", + "type": "table", "xaxis": { "buckets": null, "mode": "time", @@ -7658,109 +7686,184 @@ items: "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": false, - "title": "Headlines", + "showTitle": true, + "title": "Storage IO - Distribution", "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 10, - "id": 5, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 0, - "links": [ + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": "", + "value": "" + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(node_cpu_seconds_total, cluster)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "UTC", + "title": "Kubernetes / Compute Resources / Cluster", + "uid": "efa86fd1d0c121a26444b636a3f509a8", + "version": 0 + } + kind: ConfigMap + metadata: + labels: + app.kubernetes.io/component: grafana + app.kubernetes.io/name: grafana + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 7.5.4 + name: grafana-dashboard-k8s-resources-cluster + namespace: monitoring +- apiVersion: v1 + data: + k8s-resources-namespace.json: |- + { + "annotations": { + "list": [ + + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "100px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "percentunit", + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "quota - requests", - "color": "#F2495C", - "dashes": true, - "fill": 0, - "hiddenSeries": true, - "hideTooltip": true, - "legend": true, - "linewidth": 2, - "stack": false - }, - { - "alias": "quota - limits", - "color": "#FF9830", - "dashes": true, - "fill": 0, - "hiddenSeries": true, - "hideTooltip": true, - "legend": true, - "linewidth": 2, - "stack": false - } ], "spaceLength": 10, - "span": 12, - "stack": true, + "span": 3, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "legendLink": null, - "step": 10 - }, - { - "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"requests.cpu\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "quota - requests", - "legendLink": null, - "step": 10 - }, - { - "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"limits.cpu\"})", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})", "format": "time_series", + "instant": true, "intervalFactor": 2, - "legendFormat": "quota - limits", - "legendLink": null, - "step": 10 + "refId": "A" } ], - "thresholds": [ - - ], + "thresholds": "70,80", "timeFrom": null, "timeShift": null, - "title": "CPU Usage", + "title": "CPU Utilisation (from requests)", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "singlestat", "xaxis": { "buckets": null, "mode": "time", @@ -7788,19 +7891,7 @@ items: "show": false } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "CPU Usage", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ + }, { "aliasColors": { @@ -7810,7 +7901,8 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 6, + "format": "percentunit", + "id": 2, "legend": { "avg": false, "current": false, @@ -7834,206 +7926,112 @@ items: ], "spaceLength": 10, - "span": 12, + "span": 3, "stack": false, "steppedLine": false, - "styles": [ + "targets": [ { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "CPU Utilisation (from limits)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true }, { - "alias": "CPU Usage", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ - - ], - "type": "number", - "unit": "short" - }, - { - "alias": "CPU Requests", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #B", - "thresholds": [ - - ], - "type": "number", - "unit": "short" - }, - { - "alias": "CPU Requests %", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #C", - "thresholds": [ - - ], - "type": "number", - "unit": "percentunit" - }, - { - "alias": "CPU Limits", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #D", - "thresholds": [ - - ], - "type": "number", - "unit": "short" - }, - { - "alias": "CPU Limits %", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #E", - "thresholds": [ - - ], - "type": "number", - "unit": "percentunit" - }, - { - "alias": "Pod", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": true, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell", - "pattern": "pod", - "thresholds": [ + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { - ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "percentunit", + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ - ], - "type": "string", - "unit": "short" - } ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "A", - "step": 10 - }, - { - "expr": "sum(kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "B", - "step": 10 - }, - { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "C", - "step": 10 - }, - { - "expr": "sum(kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "D", - "step": 10 - }, - { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}) by (pod)", - "format": "table", + "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) / sum(kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"})", + "format": "time_series", "instant": true, "intervalFactor": 2, - "legendFormat": "", - "refId": "E", - "step": 10 + "refId": "A" } ], - "thresholds": [ - - ], + "thresholds": "70,80", "timeFrom": null, "timeShift": null, - "title": "CPU Quota", + "title": "Memory Utilisation (from requests)", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "transform": "table", - "type": "table", + "type": "singlestat", "xaxis": { "buckets": null, "mode": "time", @@ -8061,19 +8059,7 @@ items: "show": false } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "CPU Quota", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ + }, { "aliasColors": { @@ -8082,8 +8068,9 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 10, - "id": 7, + "fill": 1, + "format": "percentunit", + "id": 4, "legend": { "avg": false, "current": false, @@ -8094,7 +8081,7 @@ items: "values": false }, "lines": true, - "linewidth": 0, + "linewidth": 1, "links": [ ], @@ -8104,36 +8091,131 @@ items: "points": false, "renderer": "flot", "seriesOverrides": [ - { - "alias": "quota - requests", - "color": "#F2495C", - "dashes": true, - "fill": 0, - "hiddenSeries": true, - "hideTooltip": true, - "legend": true, - "linewidth": 2, - "stack": false - }, - { - "alias": "quota - limits", - "color": "#FF9830", - "dashes": true, - "fill": 0, - "hiddenSeries": true, - "hideTooltip": true, - "legend": true, - "linewidth": 2, - "stack": false - } + ], "spaceLength": 10, - "span": 12, - "stack": true, + "span": 3, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}) by (pod)", + "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) / sum(kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"})", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Memory Utilisation (from limits)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Headlines", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "quota - requests", + "color": "#F2495C", + "dashes": true, + "fill": 0, + "hiddenSeries": true, + "hideTooltip": true, + "legend": true, + "linewidth": 2, + "stack": false + }, + { + "alias": "quota - limits", + "color": "#FF9830", + "dashes": true, + "fill": 0, + "hiddenSeries": true, + "hideTooltip": true, + "legend": true, + "linewidth": 2, + "stack": false + } + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", @@ -8141,7 +8223,7 @@ items: "step": 10 }, { - "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"requests.memory\"})", + "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"requests.cpu\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "quota - requests", @@ -8149,7 +8231,7 @@ items: "step": 10 }, { - "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"limits.memory\"})", + "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"limits.cpu\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "quota - limits", @@ -8162,7 +8244,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Memory Usage (w/o cache)", + "title": "CPU Usage", "tooltip": { "shared": false, "sort": 0, @@ -8180,7 +8262,7 @@ items: }, "yaxes": [ { - "format": "bytes", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -8202,7 +8284,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Memory Usage", + "title": "CPU Usage", "titleSize": "h6" }, { @@ -8218,7 +8300,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 8, + "id": 6, "legend": { "avg": false, "current": false, @@ -8253,7 +8335,7 @@ items: "type": "hidden" }, { - "alias": "Memory Usage", + "alias": "CPU Usage", "colorMode": null, "colors": [ @@ -8269,10 +8351,10 @@ items: ], "type": "number", - "unit": "bytes" + "unit": "short" }, { - "alias": "Memory Requests", + "alias": "CPU Requests", "colorMode": null, "colors": [ @@ -8288,10 +8370,10 @@ items: ], "type": "number", - "unit": "bytes" + "unit": "short" }, { - "alias": "Memory Requests %", + "alias": "CPU Requests %", "colorMode": null, "colors": [ @@ -8310,7 +8392,7 @@ items: "unit": "percentunit" }, { - "alias": "Memory Limits", + "alias": "CPU Limits", "colorMode": null, "colors": [ @@ -8326,10 +8408,10 @@ items: ], "type": "number", - "unit": "bytes" + "unit": "short" }, { - "alias": "Memory Limits %", + "alias": "CPU Limits %", "colorMode": null, "colors": [ @@ -8347,63 +8429,6 @@ items: "type": "number", "unit": "percentunit" }, - { - "alias": "Memory Usage (RSS)", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #F", - "thresholds": [ - - ], - "type": "number", - "unit": "bytes" - }, - { - "alias": "Memory Usage (Cache)", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #G", - "thresholds": [ - - ], - "type": "number", - "unit": "bytes" - }, - { - "alias": "Memory Usage (Swap)", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #H", - "thresholds": [ - - ], - "type": "number", - "unit": "bytes" - }, { "alias": "Pod", "colorMode": null, @@ -8441,7 +8466,7 @@ items: ], "targets": [ { - "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod)", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -8450,7 +8475,7 @@ items: "step": 10 }, { - "expr": "sum(kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}) by (pod)", + "expr": "sum(kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -8459,7 +8484,7 @@ items: "step": 10 }, { - "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod) / sum(kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}) by (pod)", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -8468,7 +8493,7 @@ items: "step": 10 }, { - "expr": "sum(kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}) by (pod)", + "expr": "sum(kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -8477,40 +8502,13 @@ items: "step": 10 }, { - "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod) / sum(kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}) by (pod)", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "E", "step": 10 - }, - { - "expr": "sum(container_memory_rss{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "F", - "step": 10 - }, - { - "expr": "sum(container_memory_cache{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "G", - "step": 10 - }, - { - "expr": "sum(container_memory_swap{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "H", - "step": 10 } ], "thresholds": [ @@ -8518,7 +8516,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Memory Quota", + "title": "CPU Quota", "tooltip": { "shared": false, "sort": 0, @@ -8559,7 +8557,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Memory Quota", + "title": "CPU Quota", "titleSize": "h6" }, { @@ -8574,9 +8572,8 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "id": 9, - "interval": "1m", + "fill": 10, + "id": 7, "legend": { "avg": false, "current": false, @@ -8587,7 +8584,7 @@ items: "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], @@ -8597,9 +8594,144 @@ items: "points": false, "renderer": "flot", "seriesOverrides": [ - - ], - "spaceLength": 10, + { + "alias": "quota - requests", + "color": "#F2495C", + "dashes": true, + "fill": 0, + "hiddenSeries": true, + "hideTooltip": true, + "legend": true, + "linewidth": 2, + "stack": false + }, + { + "alias": "quota - limits", + "color": "#FF9830", + "dashes": true, + "fill": 0, + "hiddenSeries": true, + "hideTooltip": true, + "legend": true, + "linewidth": 2, + "stack": false + } + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}) by (pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + }, + { + "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"requests.memory\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "quota - requests", + "legendLink": null, + "step": 10 + }, + { + "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"limits.memory\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "quota - limits", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Usage (w/o cache)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memory Usage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, "span": 12, "stack": false, "steppedLine": false, @@ -8611,7 +8743,7 @@ items: "type": "hidden" }, { - "alias": "Current Receive Bandwidth", + "alias": "Memory Usage", "colorMode": null, "colors": [ @@ -8627,10 +8759,10 @@ items: ], "type": "number", - "unit": "Bps" + "unit": "bytes" }, { - "alias": "Current Transmit Bandwidth", + "alias": "Memory Requests", "colorMode": null, "colors": [ @@ -8646,10 +8778,10 @@ items: ], "type": "number", - "unit": "Bps" + "unit": "bytes" }, { - "alias": "Rate of Received Packets", + "alias": "Memory Requests %", "colorMode": null, "colors": [ @@ -8665,10 +8797,10 @@ items: ], "type": "number", - "unit": "pps" + "unit": "percentunit" }, { - "alias": "Rate of Transmitted Packets", + "alias": "Memory Limits", "colorMode": null, "colors": [ @@ -8684,10 +8816,10 @@ items: ], "type": "number", - "unit": "pps" + "unit": "bytes" }, { - "alias": "Rate of Received Packets Dropped", + "alias": "Memory Limits %", "colorMode": null, "colors": [ @@ -8703,10 +8835,10 @@ items: ], "type": "number", - "unit": "pps" + "unit": "percentunit" }, { - "alias": "Rate of Transmitted Packets Dropped", + "alias": "Memory Usage (RSS)", "colorMode": null, "colors": [ @@ -8722,7 +8854,45 @@ items: ], "type": "number", - "unit": "pps" + "unit": "bytes" + }, + { + "alias": "Memory Usage (Cache)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #G", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Usage (Swap)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #H", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" }, { "alias": "Pod", @@ -8734,7 +8904,7 @@ items: "decimals": 2, "link": true, "linkTargetBlank": false, - "linkTooltip": "Drill down to pods", + "linkTooltip": "Drill down", "linkUrl": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell", "pattern": "pod", "thresholds": [ @@ -8761,7 +8931,7 @@ items: ], "targets": [ { - "expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (pod)", + "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -8770,7 +8940,7 @@ items: "step": 10 }, { - "expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (pod)", + "expr": "sum(kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -8779,7 +8949,7 @@ items: "step": 10 }, { - "expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (pod)", + "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod) / sum(kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -8788,7 +8958,7 @@ items: "step": 10 }, { - "expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (pod)", + "expr": "sum(kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -8797,7 +8967,7 @@ items: "step": 10 }, { - "expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (pod)", + "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod) / sum(kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -8806,13 +8976,31 @@ items: "step": 10 }, { - "expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (pod)", + "expr": "sum(container_memory_rss{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "F", "step": 10 + }, + { + "expr": "sum(container_memory_cache{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "G", + "step": 10 + }, + { + "expr": "sum(container_memory_swap{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "H", + "step": 10 } ], "thresholds": [ @@ -8820,7 +9008,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Current Network Usage", + "title": "Memory Quota", "tooltip": { "shared": false, "sort": 0, @@ -8861,7 +9049,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Current Network Usage", + "title": "Memory Quota", "titleSize": "h6" }, { @@ -8876,8 +9064,9 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 10, - "id": 10, + "fill": 1, + "id": 9, + "interval": "1m", "legend": { "avg": false, "current": false, @@ -8888,7 +9077,7 @@ items: "values": false }, "lines": true, - "linewidth": 0, + "linewidth": 1, "links": [ ], @@ -8901,31 +9090,234 @@ items: ], "spaceLength": 10, - "span": 6, - "stack": true, + "span": 12, + "stack": false, "steppedLine": false, - "targets": [ + "styles": [ { - "expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (pod)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "legendLink": null, - "step": 10 - } + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "Current Receive Bandwidth", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Current Transmit Bandwidth", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Rate of Received Packets", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Rate of Transmitted Packets", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Rate of Received Packets Dropped", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Rate of Transmitted Packets Dropped", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Pod", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTargetBlank": false, + "linkTooltip": "Drill down to pods", + "linkUrl": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell", + "pattern": "pod", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + }, + { + "expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 + } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, - "title": "Receive Bandwidth", + "title": "Current Network Usage", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "type": "graph", + "transform": "table", + "type": "table", "xaxis": { "buckets": null, "mode": "time", @@ -8937,7 +9329,7 @@ items: }, "yaxes": [ { - "format": "Bps", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -8953,7 +9345,19 @@ items: "show": false } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Current Network Usage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ { "aliasColors": { @@ -8963,7 +9367,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 11, + "id": 10, "legend": { "avg": false, "current": false, @@ -8992,7 +9396,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (pod)", + "expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (pod)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", @@ -9005,7 +9409,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Transmit Bandwidth", + "title": "Receive Bandwidth", "tooltip": { "shared": false, "sort": 0, @@ -9039,19 +9443,7 @@ items: "show": false } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Bandwidth", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ + }, { "aliasColors": { @@ -9061,7 +9453,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 12, + "id": 11, "legend": { "avg": false, "current": false, @@ -9090,7 +9482,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (pod)", + "expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (pod)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", @@ -9103,7 +9495,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Rate of Received Packets", + "title": "Transmit Bandwidth", "tooltip": { "shared": false, "sort": 0, @@ -9137,7 +9529,19 @@ items: "show": false } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Bandwidth", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ { "aliasColors": { @@ -9147,7 +9551,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 13, + "id": 12, "legend": { "avg": false, "current": false, @@ -9176,7 +9580,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (pod)", + "expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (pod)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", @@ -9189,7 +9593,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Rate of Transmitted Packets", + "title": "Rate of Received Packets", "tooltip": { "shared": false, "sort": 0, @@ -9223,19 +9627,7 @@ items: "show": false } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Rate of Packets", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ + }, { "aliasColors": { @@ -9245,7 +9637,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 14, + "id": 13, "legend": { "avg": false, "current": false, @@ -9274,7 +9666,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (pod)", + "expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (pod)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", @@ -9287,7 +9679,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Rate of Received Packets Dropped", + "title": "Rate of Transmitted Packets", "tooltip": { "shared": false, "sort": 0, @@ -9321,7 +9713,19 @@ items: "show": false } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Rate of Packets", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ { "aliasColors": { @@ -9331,7 +9735,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 15, + "id": 14, "legend": { "avg": false, "current": false, @@ -9360,7 +9764,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (pod)", + "expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (pod)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", @@ -9373,7 +9777,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Rate of Transmitted Packets Dropped", + "title": "Rate of Received Packets Dropped", "tooltip": { "shared": false, "sort": 0, @@ -9407,160 +9811,7 @@ items: "show": false } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Rate of Packets Dropped", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "kubernetes-mixin" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": null, - "name": "datasource", - "options": [ - - ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": null, - "current": { - "text": "", - "value": "" - }, - "datasource": "$datasource", - "hide": 2, - "includeAll": false, - "label": null, - "multi": false, - "name": "cluster", - "options": [ - - ], - "query": "label_values(kube_pod_info, cluster)", - "refresh": 2, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "", - "value": "" }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": null, - "multi": false, - "name": "namespace", - "options": [ - - ], - "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)", - "refresh": 2, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "UTC", - "title": "Kubernetes / Compute Resources / Namespace (Pods)", - "uid": "85a562078cdf77779eaa1add43ccec1e", - "version": 0 - } - kind: ConfigMap - metadata: - labels: - app.kubernetes.io/component: grafana - app.kubernetes.io/name: grafana - app.kubernetes.io/part-of: kube-prometheus - app.kubernetes.io/version: 7.5.4 - name: grafana-dashboard-k8s-resources-namespace - namespace: monitoring -- apiVersion: v1 - data: - k8s-resources-node.json: |- - { - "annotations": { - "list": [ - - ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "links": [ - - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ { "aliasColors": { @@ -9570,7 +9821,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 1, + "id": 15, "legend": { "avg": false, "current": false, @@ -9594,12 +9845,12 @@ items: ], "spaceLength": 10, - "span": 12, + "span": 6, "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", + "expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (pod)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", @@ -9612,7 +9863,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "CPU Usage", + "title": "Rate of Transmitted Packets Dropped", "tooltip": { "shared": false, "sort": 0, @@ -9630,7 +9881,7 @@ items: }, "yaxes": [ { - "format": "short", + "format": "Bps", "label": null, "logBase": 1, "max": null, @@ -9652,7 +9903,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "CPU Usage", + "title": "Rate of Packets Dropped", "titleSize": "h6" }, { @@ -9667,8 +9918,9 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "id": 2, + "decimals": -1, + "fill": 10, + "id": 16, "legend": { "avg": false, "current": false, @@ -9679,7 +9931,7 @@ items: "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], @@ -9692,206 +9944,31 @@ items: ], "spaceLength": 10, - "span": 12, - "stack": false, + "span": 6, + "stack": true, "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, + "targets": [ { - "alias": "CPU Usage", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ - - ], - "type": "number", - "unit": "short" - }, - { - "alias": "CPU Requests", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #B", - "thresholds": [ - - ], - "type": "number", - "unit": "short" - }, - { - "alias": "CPU Requests %", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #C", - "thresholds": [ - - ], - "type": "number", - "unit": "percentunit" - }, - { - "alias": "CPU Limits", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #D", - "thresholds": [ - - ], - "type": "number", - "unit": "short" - }, - { - "alias": "CPU Limits %", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #E", - "thresholds": [ - - ], - "type": "number", - "unit": "percentunit" - }, - { - "alias": "Pod", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "pod", - "thresholds": [ - - ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ - - ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "A", - "step": 10 - }, - { - "expr": "sum(kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\", resource=\"cpu\"}) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "B", - "step": 10 - }, - { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", node=~\"$node\"}) by (pod) / sum(kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\", resource=\"cpu\"}) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "C", - "step": 10 - }, - { - "expr": "sum(kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\", resource=\"cpu\"}) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "D", - "step": 10 - }, - { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", node=~\"$node\"}) by (pod) / sum(kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\", resource=\"cpu\"}) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "E", - "step": 10 - } - ], - "thresholds": [ + "expr": "ceil(sum by(pod) (rate(container_fs_reads_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\"}[5m]) + rate(container_fs_writes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\"}[5m])))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], "timeFrom": null, "timeShift": null, - "title": "CPU Quota", + "title": "IOPS(Reads+Writes)", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "transform": "table", - "type": "table", + "type": "graph", "xaxis": { "buckets": null, "mode": "time", @@ -9919,19 +9996,7 @@ items: "show": false } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "CPU Quota", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ + }, { "aliasColors": { @@ -9941,7 +10006,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 3, + "id": 17, "legend": { "avg": false, "current": false, @@ -9965,12 +10030,12 @@ items: ], "spaceLength": 10, - "span": 12, + "span": 6, "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\", container!=\"\"}) by (pod)", + "expr": "sum by(pod) (rate(container_fs_reads_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\"}[5m]) + rate(container_fs_writes_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\"}[5m]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", @@ -9983,7 +10048,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Memory Usage (w/o cache)", + "title": "ThroughPut(Read+Write)", "tooltip": { "shared": false, "sort": 0, @@ -10001,7 +10066,7 @@ items: }, "yaxes": [ { - "format": "bytes", + "format": "Bps", "label": null, "logBase": 1, "max": null, @@ -10023,7 +10088,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Memory Usage", + "title": "Storage IO", "titleSize": "h6" }, { @@ -10039,7 +10104,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 4, + "id": 18, "legend": { "avg": false, "current": false, @@ -10062,6 +10127,10 @@ items: "seriesOverrides": [ ], + "sort": { + "col": 4, + "desc": true + }, "spaceLength": 10, "span": 12, "stack": false, @@ -10074,13 +10143,13 @@ items: "type": "hidden" }, { - "alias": "Memory Usage", + "alias": "IOPS(Reads)", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, + "decimals": -1, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", @@ -10090,16 +10159,16 @@ items: ], "type": "number", - "unit": "bytes" + "unit": "short" }, { - "alias": "Memory Requests", + "alias": "IOPS(Writes)", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, + "decimals": -1, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", @@ -10109,16 +10178,16 @@ items: ], "type": "number", - "unit": "bytes" + "unit": "short" }, { - "alias": "Memory Requests %", + "alias": "IOPS(Reads + Writes)", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, + "decimals": -1, "link": false, "linkTargetBlank": false, "linkTooltip": "Drill down", @@ -10128,10 +10197,10 @@ items: ], "type": "number", - "unit": "percentunit" + "unit": "short" }, { - "alias": "Memory Limits", + "alias": "Throughput(Read)", "colorMode": null, "colors": [ @@ -10147,10 +10216,10 @@ items: ], "type": "number", - "unit": "bytes" + "unit": "Bps" }, { - "alias": "Memory Limits %", + "alias": "Throughput(Write)", "colorMode": null, "colors": [ @@ -10166,10 +10235,10 @@ items: ], "type": "number", - "unit": "percentunit" + "unit": "Bps" }, { - "alias": "Memory Usage (RSS)", + "alias": "Throughput(Read + Write)", "colorMode": null, "colors": [ @@ -10185,84 +10254,46 @@ items: ], "type": "number", - "unit": "bytes" + "unit": "Bps" }, { - "alias": "Memory Usage (Cache)", + "alias": "Pod", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, - "link": false, + "link": true, "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #G", + "linkTooltip": "Drill down to pods", + "linkUrl": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell", + "pattern": "pod", "thresholds": [ ], "type": "number", - "unit": "bytes" + "unit": "short" }, { - "alias": "Memory Usage (Swap)", + "alias": "", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #H", + "pattern": "/.*/", "thresholds": [ ], - "type": "number", - "unit": "bytes" - }, + "type": "string", + "unit": "short" + } + ], + "targets": [ { - "alias": "Pod", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "pod", - "thresholds": [ - - ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ - - ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod)", + "expr": "sum by(pod) (rate(container_fs_reads_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\"}[5m]))", "format": "table", "instant": true, "intervalFactor": 2, @@ -10271,7 +10302,7 @@ items: "step": 10 }, { - "expr": "sum(kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\", resource=\"memory\"}) by (pod)", + "expr": "sum by(pod) (rate(container_fs_writes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\"}[5m]))", "format": "table", "instant": true, "intervalFactor": 2, @@ -10280,7 +10311,7 @@ items: "step": 10 }, { - "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod) / sum(kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\", resource=\"memory\"}) by (pod)", + "expr": "sum by(pod) (rate(container_fs_reads_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\"}[5m]) + rate(container_fs_writes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\"}[5m]))", "format": "table", "instant": true, "intervalFactor": 2, @@ -10289,7 +10320,7 @@ items: "step": 10 }, { - "expr": "sum(kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\", resource=\"memory\"}) by (pod)", + "expr": "sum by(pod) (rate(container_fs_reads_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\"}[5m]))", "format": "table", "instant": true, "intervalFactor": 2, @@ -10298,7 +10329,7 @@ items: "step": 10 }, { - "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod) / sum(kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\", resource=\"memory\"}) by (pod)", + "expr": "sum by(pod) (rate(container_fs_writes_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\"}[5m]))", "format": "table", "instant": true, "intervalFactor": 2, @@ -10307,31 +10338,13 @@ items: "step": 10 }, { - "expr": "sum(node_namespace_pod_container:container_memory_rss{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod)", + "expr": "sum by(pod) (rate(container_fs_reads_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\"}[5m]) + rate(container_fs_writes_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\"}[5m]))", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "F", "step": 10 - }, - { - "expr": "sum(node_namespace_pod_container:container_memory_cache{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "G", - "step": 10 - }, - { - "expr": "sum(node_namespace_pod_container:container_memory_swap{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "H", - "step": 10 } ], "thresholds": [ @@ -10339,7 +10352,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Memory Quota", + "title": "Current Storage IO", "tooltip": { "shared": false, "sort": 0, @@ -10380,7 +10393,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Memory Quota", + "title": "Storage IO - Distribution", "titleSize": "h6" } ], @@ -10444,12 +10457,12 @@ items: "hide": 0, "includeAll": false, "label": null, - "multi": true, - "name": "node", + "multi": false, + "name": "namespace", "options": [ ], - "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, node)", + "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)", "refresh": 2, "regex": "", "sort": 1, @@ -10493,8 +10506,8 @@ items: ] }, "timezone": "UTC", - "title": "Kubernetes / Compute Resources / Node (Pods)", - "uid": "200ac8fdbfbb74b39aff88118e4d1c2c", + "title": "Kubernetes / Compute Resources / Namespace (Pods)", + "uid": "85a562078cdf77779eaa1add43ccec1e", "version": 0 } kind: ConfigMap @@ -10504,11 +10517,11 @@ items: app.kubernetes.io/name: grafana app.kubernetes.io/part-of: kube-prometheus app.kubernetes.io/version: 7.5.4 - name: grafana-dashboard-k8s-resources-node + name: grafana-dashboard-k8s-resources-namespace namespace: monitoring - apiVersion: v1 data: - k8s-resources-pod.json: |- + k8s-resources-node.json: |- { "annotations": { "list": [ @@ -10558,24 +10571,7 @@ items: "points": false, "renderer": "flot", "seriesOverrides": [ - { - "alias": "requests", - "color": "#F2495C", - "fill": 0, - "hideTooltip": true, - "legend": true, - "linewidth": 2, - "stack": false - }, - { - "alias": "limits", - "color": "#FF9830", - "fill": 0, - "hideTooltip": true, - "legend": true, - "linewidth": 2, - "stack": false - } + ], "spaceLength": 10, "span": 12, @@ -10583,26 +10579,10 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{namespace=\"$namespace\", pod=\"$pod\", cluster=\"$cluster\"}) by (container)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{container}}", - "legendLink": null, - "step": 10 - }, - { - "expr": "sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}\n)\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "requests", - "legendLink": null, - "step": 10 - }, - { - "expr": "sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}\n)\n", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "limits", + "legendFormat": "{{pod}}", "legendLink": null, "step": 10 } @@ -10662,118 +10642,13 @@ items: { "aliasColors": { - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 10, - "id": 2, - "legend": { - "avg": false, - "current": true, - "max": true, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 0, - "links": [ - - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 12, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum(increase(container_cpu_cfs_throttled_periods_total{namespace=\"$namespace\", pod=\"$pod\", container!=\"\", cluster=\"$cluster\"}[5m])) by (container) /sum(increase(container_cpu_cfs_periods_total{namespace=\"$namespace\", pod=\"$pod\", container!=\"\", cluster=\"$cluster\"}[5m])) by (container)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{container}}", - "legendLink": null, - "step": 10 - } - ], - "thresholds": [ - { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 0.25, - "yaxis": "left" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "CPU Throttling", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "percentunit", - "label": null, - "logBase": 1, - "max": 1, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "CPU Throttling", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { - }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 3, + "id": 2, "legend": { "avg": false, "current": false, @@ -10903,7 +10778,7 @@ items: "unit": "percentunit" }, { - "alias": "Container", + "alias": "Pod", "colorMode": null, "colors": [ @@ -10914,7 +10789,7 @@ items: "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "container", + "pattern": "pod", "thresholds": [ ], @@ -10939,7 +10814,7 @@ items: ], "targets": [ { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -10948,7 +10823,7 @@ items: "step": 10 }, { - "expr": "sum(kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}) by (container)", + "expr": "sum(kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\", resource=\"cpu\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -10957,7 +10832,7 @@ items: "step": 10 }, { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}) by (container)", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", node=~\"$node\"}) by (pod) / sum(kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\", resource=\"cpu\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -10966,7 +10841,7 @@ items: "step": 10 }, { - "expr": "sum(kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}) by (container)", + "expr": "sum(kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\", resource=\"cpu\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -10975,7 +10850,7 @@ items: "step": 10 }, { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}) by (container)", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", node=~\"$node\"}) by (pod) / sum(kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\", resource=\"cpu\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -11046,7 +10921,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 4, + "id": 3, "legend": { "avg": false, "current": false, @@ -11067,26 +10942,7 @@ items: "points": false, "renderer": "flot", "seriesOverrides": [ - { - "alias": "requests", - "color": "#F2495C", - "dashes": true, - "fill": 0, - "hideTooltip": true, - "legend": true, - "linewidth": 2, - "stack": false - }, - { - "alias": "limits", - "color": "#FF9830", - "dashes": true, - "fill": 0, - "hideTooltip": true, - "legend": true, - "linewidth": 2, - "stack": false - } + ], "spaceLength": 10, "span": 12, @@ -11094,26 +10950,10 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container)", + "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\", container!=\"\"}) by (pod)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{container}}", - "legendLink": null, - "step": 10 - }, - { - "expr": "sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}\n)\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "requests", - "legendLink": null, - "step": 10 - }, - { - "expr": "sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}\n)\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "limits", + "legendFormat": "{{pod}}", "legendLink": null, "step": 10 } @@ -11123,7 +10963,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Memory Usage", + "title": "Memory Usage (w/o cache)", "tooltip": { "shared": false, "sort": 0, @@ -11179,7 +11019,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 5, + "id": 4, "legend": { "avg": false, "current": false, @@ -11366,7 +11206,7 @@ items: "unit": "bytes" }, { - "alias": "Container", + "alias": "Pod", "colorMode": null, "colors": [ @@ -11377,7 +11217,7 @@ items: "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "container", + "pattern": "pod", "thresholds": [ ], @@ -11402,7 +11242,7 @@ items: ], "targets": [ { - "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container)", + "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -11411,7 +11251,7 @@ items: "step": 10 }, { - "expr": "sum(kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}) by (container)", + "expr": "sum(kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\", resource=\"memory\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -11420,7 +11260,7 @@ items: "step": 10 }, { - "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", image!=\"\"}) by (container) / sum(kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}) by (container)", + "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod) / sum(kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\", resource=\"memory\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -11429,7 +11269,7 @@ items: "step": 10 }, { - "expr": "sum(kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}) by (container)", + "expr": "sum(kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\", resource=\"memory\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -11438,7 +11278,7 @@ items: "step": 10 }, { - "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container) / sum(kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}) by (container)", + "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod) / sum(kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\", resource=\"memory\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -11447,7 +11287,7 @@ items: "step": 10 }, { - "expr": "sum(container_memory_rss{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)", + "expr": "sum(node_namespace_pod_container:container_memory_rss{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -11456,7 +11296,7 @@ items: "step": 10 }, { - "expr": "sum(container_memory_cache{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)", + "expr": "sum(node_namespace_pod_container:container_memory_cache{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -11465,7 +11305,7 @@ items: "step": 10 }, { - "expr": "sum(container_memory_swap{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)", + "expr": "sum(node_namespace_pod_container:container_memory_swap{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -11522,7 +11362,148 @@ items: "showTitle": true, "title": "Memory Quota", "titleSize": "h6" - }, + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": "", + "value": "" + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(kube_pod_info, cluster)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "", + "value": "" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": null, + "multi": true, + "name": "node", + "options": [ + + ], + "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, node)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "UTC", + "title": "Kubernetes / Compute Resources / Node (Pods)", + "uid": "200ac8fdbfbb74b39aff88118e4d1c2c", + "version": 0 + } + kind: ConfigMap + metadata: + labels: + app.kubernetes.io/component: grafana + app.kubernetes.io/name: grafana + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 7.5.4 + name: grafana-dashboard-k8s-resources-node + namespace: monitoring +- apiVersion: v1 + data: + k8s-resources-pod.json: |- + { + "annotations": { + "list": [ + + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + + ], + "refresh": "10s", + "rows": [ { "collapse": false, "height": "250px", @@ -11536,8 +11517,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 6, - "interval": "1m", + "id": 1, "legend": { "avg": false, "current": false, @@ -11558,18 +11538,51 @@ items: "points": false, "renderer": "flot", "seriesOverrides": [ - + { + "alias": "requests", + "color": "#F2495C", + "fill": 0, + "hideTooltip": true, + "legend": true, + "linewidth": 2, + "stack": false + }, + { + "alias": "limits", + "color": "#FF9830", + "fill": 0, + "hideTooltip": true, + "legend": true, + "linewidth": 2, + "stack": false + } ], "spaceLength": 10, - "span": 6, + "span": 12, "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{namespace=\"$namespace\", pod=\"$pod\", cluster=\"$cluster\"}) by (container)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{pod}}", + "legendFormat": "{{container}}", + "legendLink": null, + "step": 10 + }, + { + "expr": "sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}\n)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "requests", + "legendLink": null, + "step": 10 + }, + { + "expr": "sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}\n)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "limits", "legendLink": null, "step": 10 } @@ -11579,7 +11592,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Receive Bandwidth", + "title": "CPU Usage", "tooltip": { "shared": false, "sort": 0, @@ -11597,7 +11610,7 @@ items: }, "yaxes": [ { - "format": "Bps", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -11613,7 +11626,19 @@ items: "show": false } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU Usage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ { "aliasColors": { @@ -11623,12 +11648,11 @@ items: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 7, - "interval": "1m", + "id": 2, "legend": { "avg": false, - "current": false, - "max": false, + "current": true, + "max": true, "min": false, "show": true, "total": false, @@ -11648,25 +11672,32 @@ items: ], "spaceLength": 10, - "span": 6, + "span": 12, "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)", + "expr": "sum(increase(container_cpu_cfs_throttled_periods_total{namespace=\"$namespace\", pod=\"$pod\", container!=\"\", cluster=\"$cluster\"}[5m])) by (container) /sum(increase(container_cpu_cfs_periods_total{namespace=\"$namespace\", pod=\"$pod\", container!=\"\", cluster=\"$cluster\"}[5m])) by (container)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{pod}}", + "legendFormat": "{{container}}", "legendLink": null, "step": 10 } ], "thresholds": [ - + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 0.25, + "yaxis": "left" + } ], "timeFrom": null, "timeShift": null, - "title": "Transmit Bandwidth", + "title": "CPU Throttling", "tooltip": { "shared": false, "sort": 0, @@ -11684,10 +11715,10 @@ items: }, "yaxes": [ { - "format": "Bps", + "format": "percentunit", "label": null, "logBase": 1, - "max": null, + "max": 1, "min": 0, "show": true }, @@ -11706,7 +11737,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Bandwidth", + "title": "CPU Throttling", "titleSize": "h6" }, { @@ -11721,9 +11752,8 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 10, - "id": 8, - "interval": "1m", + "fill": 1, + "id": 3, "legend": { "avg": false, "current": false, @@ -11734,7 +11764,7 @@ items: "values": false }, "lines": true, - "linewidth": 0, + "linewidth": 1, "links": [ ], @@ -11747,16 +11777,190 @@ items: ], "spaceLength": 10, - "span": 6, - "stack": true, + "span": 12, + "stack": false, "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "CPU Usage", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Requests", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Requests %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "CPU Limits", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Limits %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Container", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "container", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], "targets": [ { - "expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)", - "format": "time_series", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "format": "table", + "instant": true, "intervalFactor": 2, - "legendFormat": "{{pod}}", - "legendLink": null, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sum(kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum(kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", "step": 10 } ], @@ -11765,13 +11969,14 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Rate of Received Packets", + "title": "CPU Quota", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "type": "graph", + "transform": "table", + "type": "table", "xaxis": { "buckets": null, "mode": "time", @@ -11783,7 +11988,7 @@ items: }, "yaxes": [ { - "format": "Bps", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -11799,7 +12004,19 @@ items: "show": false } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU Quota", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ { "aliasColors": { @@ -11809,8 +12026,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 9, - "interval": "1m", + "id": 4, "legend": { "avg": false, "current": false, @@ -11831,117 +12047,53 @@ items: "points": false, "renderer": "flot", "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 6, - "stack": true, - "steppedLine": false, - "targets": [ { - "expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "legendLink": null, - "step": 10 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Rate of Transmitted Packets", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true + "alias": "requests", + "color": "#F2495C", + "dashes": true, + "fill": 0, + "hideTooltip": true, + "legend": true, + "linewidth": 2, + "stack": false }, { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false + "alias": "limits", + "color": "#FF9830", + "dashes": true, + "fill": 0, + "hideTooltip": true, + "legend": true, + "linewidth": 2, + "stack": false } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Rate of Packets", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 10, - "id": 10, - "interval": "1m", - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 0, - "links": [ - - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - ], "spaceLength": 10, - "span": 6, + "span": 12, "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)", + "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{pod}}", + "legendFormat": "{{container}}", + "legendLink": null, + "step": 10 + }, + { + "expr": "sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}\n)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "requests", + "legendLink": null, + "step": 10 + }, + { + "expr": "sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}\n)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "limits", "legendLink": null, "step": 10 } @@ -11951,7 +12103,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Rate of Received Packets Dropped", + "title": "Memory Usage (WSS)", "tooltip": { "shared": false, "sort": 0, @@ -11969,7 +12121,7 @@ items: }, "yaxes": [ { - "format": "Bps", + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -11985,7 +12137,19 @@ items: "show": false } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memory Usage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ { "aliasColors": { @@ -11994,9 +12158,8 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 10, - "id": 11, - "interval": "1m", + "fill": 1, + "id": 5, "legend": { "avg": false, "current": false, @@ -12007,7 +12170,7 @@ items: "values": false }, "lines": true, - "linewidth": 0, + "linewidth": 1, "links": [ ], @@ -12020,441 +12183,153 @@ items: ], "spaceLength": 10, - "span": 6, - "stack": true, + "span": 12, + "stack": false, "steppedLine": false, - "targets": [ + "styles": [ { - "expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "legendLink": null, - "step": 10 - } - ], - "thresholds": [ + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "Memory Usage (WSS)", + "colorMode": null, + "colors": [ - ], - "timeFrom": null, - "timeShift": null, - "title": "Rate of Transmitted Packets Dropped", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ - ] - }, - "yaxes": [ + ], + "type": "number", + "unit": "bytes" + }, { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true + "alias": "Memory Requests", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" }, { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Rate of Packets Dropped", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "kubernetes-mixin" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": null, - "name": "datasource", - "options": [ + "alias": "Memory Requests %", + "colorMode": null, + "colors": [ - ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": null, - "current": { - "text": "", - "value": "" - }, - "datasource": "$datasource", - "hide": 2, - "includeAll": false, - "label": null, - "multi": false, - "name": "cluster", - "options": [ + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ - ], - "query": "label_values(kube_pod_info, cluster)", - "refresh": 2, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Memory Limits", + "colorMode": null, + "colors": [ - ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "", - "value": "" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": null, - "multi": false, - "name": "namespace", - "options": [ + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ - ], - "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)", - "refresh": 2, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Limits %", + "colorMode": null, + "colors": [ - ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "", - "value": "" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": null, - "multi": false, - "name": "pod", - "options": [ + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ - ], - "query": "label_values(kube_pod_info{cluster=\"$cluster\", namespace=\"$namespace\"}, pod)", - "refresh": 2, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Memory Usage (RSS)", + "colorMode": null, + "colors": [ - ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "UTC", - "title": "Kubernetes / Compute Resources / Pod", - "uid": "6581e46e4e5c7ba40a07646395ef7b23", - "version": 0 - } - kind: ConfigMap - metadata: - labels: - app.kubernetes.io/component: grafana - app.kubernetes.io/name: grafana - app.kubernetes.io/part-of: kube-prometheus - app.kubernetes.io/version: 7.5.4 - name: grafana-dashboard-k8s-resources-pod - namespace: monitoring -- apiVersion: v1 - data: - k8s-resources-workload.json: |- - { - "annotations": { - "list": [ + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", + "thresholds": [ - ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "links": [ + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Usage (Cache)", + "colorMode": null, + "colors": [ - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #G", + "thresholds": [ - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 10, - "id": 1, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 0, - "links": [ - - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 12, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "legendLink": null, - "step": 10 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "CPU Usage", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "CPU Usage", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "CPU Usage", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ - - ], - "type": "number", - "unit": "short" - }, - { - "alias": "CPU Requests", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #B", - "thresholds": [ - - ], - "type": "number", - "unit": "short" - }, - { - "alias": "CPU Requests %", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #C", - "thresholds": [ - - ], - "type": "number", - "unit": "percentunit" - }, - { - "alias": "CPU Limits", - "colorMode": null, - "colors": [ + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Usage (Swap)", + "colorMode": null, + "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", @@ -12463,15 +12338,15 @@ items: "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "Value #D", + "pattern": "Value #H", "thresholds": [ ], "type": "number", - "unit": "short" + "unit": "bytes" }, { - "alias": "CPU Limits %", + "alias": "Container", "colorMode": null, "colors": [ @@ -12482,26 +12357,7 @@ items: "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "Value #E", - "thresholds": [ - - ], - "type": "number", - "unit": "percentunit" - }, - { - "alias": "Pod", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": true, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell", - "pattern": "pod", + "pattern": "container", "thresholds": [ ], @@ -12526,7 +12382,7 @@ items: ], "targets": [ { - "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container)", "format": "table", "instant": true, "intervalFactor": 2, @@ -12535,7 +12391,7 @@ items: "step": 10 }, { - "expr": "sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "expr": "sum(kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}) by (container)", "format": "table", "instant": true, "intervalFactor": 2, @@ -12544,7 +12400,7 @@ items: "step": 10 }, { - "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", image!=\"\"}) by (container) / sum(kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}) by (container)", "format": "table", "instant": true, "intervalFactor": 2, @@ -12553,7 +12409,7 @@ items: "step": 10 }, { - "expr": "sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "expr": "sum(kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}) by (container)", "format": "table", "instant": true, "intervalFactor": 2, @@ -12562,13 +12418,40 @@ items: "step": 10 }, { - "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container) / sum(kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}) by (container)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "E", "step": 10 + }, + { + "expr": "sum(container_memory_rss{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 + }, + { + "expr": "sum(container_memory_cache{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "G", + "step": 10 + }, + { + "expr": "sum(container_memory_swap{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "H", + "step": 10 } ], "thresholds": [ @@ -12576,7 +12459,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "CPU Quota", + "title": "Memory Quota", "tooltip": { "shared": false, "sort": 0, @@ -12617,7 +12500,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "CPU Quota", + "title": "Memory Quota", "titleSize": "h6" }, { @@ -12633,7 +12516,8 @@ items: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 3, + "id": 6, + "interval": "1m", "legend": { "avg": false, "current": false, @@ -12657,12 +12541,12 @@ items: ], "spaceLength": 10, - "span": 12, + "span": 6, "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", @@ -12675,7 +12559,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Memory Usage", + "title": "Receive Bandwidth", "tooltip": { "shared": false, "sort": 0, @@ -12693,7 +12577,7 @@ items: }, "yaxes": [ { - "format": "bytes", + "format": "Bps", "label": null, "logBase": 1, "max": null, @@ -12709,19 +12593,7 @@ items: "show": false } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Memory Usage", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ + }, { "aliasColors": { @@ -12730,8 +12602,9 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "id": 4, + "fill": 10, + "id": 7, + "interval": "1m", "legend": { "avg": false, "current": false, @@ -12742,7 +12615,7 @@ items: "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], @@ -12755,190 +12628,16 @@ items: ], "spaceLength": 10, - "span": 12, - "stack": false, + "span": 6, + "stack": true, "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "Memory Usage", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ - - ], - "type": "number", - "unit": "bytes" - }, - { - "alias": "Memory Requests", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #B", - "thresholds": [ - - ], - "type": "number", - "unit": "bytes" - }, - { - "alias": "Memory Requests %", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #C", - "thresholds": [ - - ], - "type": "number", - "unit": "percentunit" - }, - { - "alias": "Memory Limits", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #D", - "thresholds": [ - - ], - "type": "number", - "unit": "bytes" - }, - { - "alias": "Memory Limits %", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #E", - "thresholds": [ - - ], - "type": "number", - "unit": "percentunit" - }, - { - "alias": "Pod", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": true, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell", - "pattern": "pod", - "thresholds": [ - - ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ - - ], - "type": "string", - "unit": "short" - } - ], "targets": [ { - "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "A", - "step": 10 - }, - { - "expr": "sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "B", - "step": 10 - }, - { - "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "C", - "step": 10 - }, - { - "expr": "sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "D", - "step": 10 - }, - { - "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", - "format": "table", - "instant": true, + "expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)", + "format": "time_series", "intervalFactor": 2, - "legendFormat": "", - "refId": "E", + "legendFormat": "{{pod}}", + "legendLink": null, "step": 10 } ], @@ -12947,14 +12646,13 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Memory Quota", + "title": "Transmit Bandwidth", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "transform": "table", - "type": "table", + "type": "graph", "xaxis": { "buckets": null, "mode": "time", @@ -12966,7 +12664,7 @@ items: }, "yaxes": [ { - "format": "short", + "format": "Bps", "label": null, "logBase": 1, "max": null, @@ -12988,7 +12686,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Memory Quota", + "title": "Bandwidth", "titleSize": "h6" }, { @@ -13003,8 +12701,8 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "id": 5, + "fill": 10, + "id": 8, "interval": "1m", "legend": { "avg": false, @@ -13016,7 +12714,7 @@ items: "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], @@ -13029,234 +12727,31 @@ items: ], "spaceLength": 10, - "span": 12, - "stack": false, + "span": 6, + "stack": true, "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, + "targets": [ { - "alias": "Current Receive Bandwidth", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ - - ], - "type": "number", - "unit": "Bps" - }, - { - "alias": "Current Transmit Bandwidth", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #B", - "thresholds": [ - - ], - "type": "number", - "unit": "Bps" - }, - { - "alias": "Rate of Received Packets", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #C", - "thresholds": [ - - ], - "type": "number", - "unit": "pps" - }, - { - "alias": "Rate of Transmitted Packets", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #D", - "thresholds": [ - - ], - "type": "number", - "unit": "pps" - }, - { - "alias": "Rate of Received Packets Dropped", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #E", - "thresholds": [ - - ], - "type": "number", - "unit": "pps" - }, - { - "alias": "Rate of Transmitted Packets Dropped", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #F", - "thresholds": [ - - ], - "type": "number", - "unit": "pps" - }, - { - "alias": "Pod", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": true, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell", - "pattern": "pod", - "thresholds": [ - - ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ - - ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "A", - "step": 10 - }, - { - "expr": "(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "B", - "step": 10 - }, - { - "expr": "(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "C", - "step": 10 - }, - { - "expr": "(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "D", - "step": 10 - }, - { - "expr": "(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "E", - "step": 10 - }, - { - "expr": "(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "F", - "step": 10 - } - ], - "thresholds": [ + "expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], "timeFrom": null, "timeShift": null, - "title": "Current Network Usage", + "title": "Rate of Received Packets", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "transform": "table", - "type": "table", + "type": "graph", "xaxis": { "buckets": null, "mode": "time", @@ -13268,7 +12763,7 @@ items: }, "yaxes": [ { - "format": "short", + "format": "Bps", "label": null, "logBase": 1, "max": null, @@ -13284,19 +12779,7 @@ items: "show": false } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Current Network Usage", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ + }, { "aliasColors": { @@ -13306,7 +12789,8 @@ items: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 6, + "id": 9, + "interval": "1m", "legend": { "avg": false, "current": false, @@ -13335,7 +12819,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", + "expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", @@ -13348,7 +12832,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Receive Bandwidth", + "title": "Rate of Transmitted Packets", "tooltip": { "shared": false, "sort": 0, @@ -13382,7 +12866,19 @@ items: "show": false } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Rate of Packets", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ { "aliasColors": { @@ -13392,7 +12888,8 @@ items: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 7, + "id": 10, + "interval": "1m", "legend": { "avg": false, "current": false, @@ -13421,7 +12918,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", + "expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", @@ -13434,7 +12931,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Transmit Bandwidth", + "title": "Rate of Received Packets Dropped", "tooltip": { "shared": false, "sort": 0, @@ -13468,19 +12965,7 @@ items: "show": false } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Bandwidth", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ + }, { "aliasColors": { @@ -13490,7 +12975,8 @@ items: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 8, + "id": 11, + "interval": "1m", "legend": { "avg": false, "current": false, @@ -13519,7 +13005,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "(avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", + "expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", @@ -13532,7 +13018,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Average Container Bandwidth by Pod: Received", + "title": "Rate of Transmitted Packets Dropped", "tooltip": { "shared": false, "sort": 0, @@ -13566,7 +13052,19 @@ items: "show": false } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Rate of Packets Dropped", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ { "aliasColors": { @@ -13575,8 +13073,9 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", + "decimals": -1, "fill": 10, - "id": 9, + "id": 12, "legend": { "avg": false, "current": false, @@ -13605,10 +13104,18 @@ items: "steppedLine": false, "targets": [ { - "expr": "(avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", + "expr": "ceil(sum by(pod) (rate(container_fs_reads_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[5m])))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{pod}}", + "legendFormat": "Reads", + "legendLink": null, + "step": 10 + }, + { + "expr": "ceil(sum by(pod) (rate(container_fs_writes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[5m])))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Writes", "legendLink": null, "step": 10 } @@ -13618,7 +13125,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Average Container Bandwidth by Pod: Transmitted", + "title": "IOPS", "tooltip": { "shared": false, "sort": 0, @@ -13636,7 +13143,7 @@ items: }, "yaxes": [ { - "format": "Bps", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -13652,19 +13159,7 @@ items: "show": false } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Average Container Bandwidth by Pod", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ + }, { "aliasColors": { @@ -13674,7 +13169,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 10, + "id": 13, "legend": { "avg": false, "current": false, @@ -13703,10 +13198,18 @@ items: "steppedLine": false, "targets": [ { - "expr": "(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", + "expr": "sum by(pod) (rate(container_fs_reads_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[5m]))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{pod}}", + "legendFormat": "Reads", + "legendLink": null, + "step": 10 + }, + { + "expr": "sum by(pod) (rate(container_fs_writes_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Writes", "legendLink": null, "step": 10 } @@ -13716,7 +13219,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Rate of Received Packets", + "title": "ThroughPut", "tooltip": { "shared": false, "sort": 0, @@ -13750,7 +13253,19 @@ items: "show": false } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Storage IO - Distribution(Pod - Read & Writes)", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ { "aliasColors": { @@ -13759,8 +13274,9 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", + "decimals": -1, "fill": 10, - "id": 11, + "id": 14, "legend": { "avg": false, "current": false, @@ -13789,10 +13305,10 @@ items: "steppedLine": false, "targets": [ { - "expr": "(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", + "expr": "ceil(sum by(container) (rate(container_fs_reads_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\", pod=\"$pod\"}[5m]) + rate(container_fs_writes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\", pod=\"$pod\"}[5m])))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{pod}}", + "legendFormat": "{{container}}", "legendLink": null, "step": 10 } @@ -13802,7 +13318,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Rate of Transmitted Packets", + "title": "IOPS(Reads+Writes)", "tooltip": { "shared": false, "sort": 0, @@ -13820,7 +13336,7 @@ items: }, "yaxes": [ { - "format": "Bps", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -13836,19 +13352,7 @@ items: "show": false } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Rate of Packets", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ + }, { "aliasColors": { @@ -13858,7 +13362,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 12, + "id": 15, "legend": { "avg": false, "current": false, @@ -13887,10 +13391,10 @@ items: "steppedLine": false, "targets": [ { - "expr": "(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", + "expr": "sum by(container) (rate(container_fs_reads_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\", pod=\"$pod\"}[5m]) + rate(container_fs_writes_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\", pod=\"$pod\"}[5m]))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{pod}}", + "legendFormat": "{{container}}", "legendLink": null, "step": 10 } @@ -13900,7 +13404,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Rate of Received Packets Dropped", + "title": "ThroughPut(Read+Write)", "tooltip": { "shared": false, "sort": 0, @@ -13934,7 +13438,19 @@ items: "show": false } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Storage IO - Distribution(Containers)", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ { "aliasColors": { @@ -13943,8 +13459,8 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 10, - "id": 13, + "fill": 1, + "id": 16, "legend": { "avg": false, "current": false, @@ -13955,7 +13471,7 @@ items: "values": false }, "lines": true, - "linewidth": 0, + "linewidth": 1, "links": [ ], @@ -13967,118 +13483,298 @@ items: "seriesOverrides": [ ], + "sort": { + "col": 4, + "desc": true + }, "spaceLength": 10, - "span": 6, - "stack": true, + "span": 12, + "stack": false, "steppedLine": false, - "targets": [ + "styles": [ { - "expr": "(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "legendLink": null, - "step": 10 - } - ], - "thresholds": [ + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "IOPS(Reads)", + "colorMode": null, + "colors": [ - ], - "timeFrom": null, - "timeShift": null, - "title": "Rate of Transmitted Packets Dropped", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": -1, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ - ] - }, - "yaxes": [ - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true + ], + "type": "number", + "unit": "short" }, { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Rate of Packets Dropped", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "kubernetes-mixin" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": null, - "name": "datasource", - "options": [ + "alias": "IOPS(Writes)", + "colorMode": null, + "colors": [ - ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": null, - "current": { - "text": "", - "value": "" - }, - "datasource": "$datasource", - "hide": 2, - "includeAll": false, - "label": null, - "multi": false, - "name": "cluster", - "options": [ + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": -1, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ - ], - "query": "label_values(kube_pod_info, cluster)", - "refresh": 2, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ + ], + "type": "number", + "unit": "short" + }, + { + "alias": "IOPS(Reads + Writes)", + "colorMode": null, + "colors": [ - ], - "tagsQuery": "", - "type": "query", - "useTags": false + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": -1, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "Throughput(Read)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Throughput(Write)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Throughput(Read + Write)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Container", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "container", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "sum by(container) (rate(container_fs_reads_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\", pod=\"$pod\"}[5m]))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sum by(container) (rate(container_fs_writes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\", pod=\"$pod\"}[5m]))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum by(container) (rate(container_fs_reads_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\", pod=\"$pod\"}[5m]) + rate(container_fs_writes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\", pod=\"$pod\"}[5m]))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum by(container) (rate(container_fs_reads_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\", pod=\"$pod\"}[5m]))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum by(container) (rate(container_fs_writes_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\", pod=\"$pod\"}[5m]))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + }, + { + "expr": "sum by(container) (rate(container_fs_reads_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\", pod=\"$pod\"}[5m]) + rate(container_fs_writes_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\", pod=\"$pod\"}[5m]))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Current Storage IO", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Storage IO - Distribution", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" }, { "allValue": null, @@ -14087,15 +13783,15 @@ items: "value": "" }, "datasource": "$datasource", - "hide": 0, + "hide": 2, "includeAll": false, "label": null, "multi": false, - "name": "namespace", + "name": "cluster", "options": [ ], - "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)", + "query": "label_values(kube_pod_info, cluster)", "refresh": 2, "regex": "", "sort": 1, @@ -14118,11 +13814,11 @@ items: "includeAll": false, "label": null, "multi": false, - "name": "workload", + "name": "namespace", "options": [ ], - "query": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\"}, workload)", + "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)", "refresh": 2, "regex": "", "sort": 1, @@ -14145,11 +13841,11 @@ items: "includeAll": false, "label": null, "multi": false, - "name": "type", + "name": "pod", "options": [ ], - "query": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\"}, workload_type)", + "query": "label_values(kube_pod_info{cluster=\"$cluster\", namespace=\"$namespace\"}, pod)", "refresh": 2, "regex": "", "sort": 1, @@ -14193,8 +13889,8 @@ items: ] }, "timezone": "UTC", - "title": "Kubernetes / Compute Resources / Workload", - "uid": "a164a7f0339f99e89cea5cb47e9be617", + "title": "Kubernetes / Compute Resources / Pod", + "uid": "6581e46e4e5c7ba40a07646395ef7b23", "version": 0 } kind: ConfigMap @@ -14204,11 +13900,11 @@ items: app.kubernetes.io/name: grafana app.kubernetes.io/part-of: kube-prometheus app.kubernetes.io/version: 7.5.4 - name: grafana-dashboard-k8s-resources-workload + name: grafana-dashboard-k8s-resources-pod namespace: monitoring - apiVersion: v1 data: - k8s-resources-workloads-namespace.json: |- + k8s-resources-workload.json: |- { "annotations": { "list": [ @@ -14258,55 +13954,18 @@ items: "points": false, "renderer": "flot", "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ { - "alias": "quota - requests", - "color": "#F2495C", - "dashes": true, - "fill": 0, - "hiddenSeries": true, - "hideTooltip": true, - "legend": true, - "linewidth": 2, - "stack": false - }, - { - "alias": "quota - limits", - "color": "#FF9830", - "dashes": true, - "fill": 0, - "hiddenSeries": true, - "hideTooltip": true, - "legend": true, - "linewidth": 2, - "stack": false - } - ], - "spaceLength": 10, - "span": 12, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{workload}} - {{workload_type}}", - "legendLink": null, - "step": 10 - }, - { - "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"requests.cpu\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "quota - requests", - "legendLink": null, - "step": 10 - }, - { - "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"limits.cpu\"})", + "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", "format": "time_series", "intervalFactor": 2, - "legendFormat": "quota - limits", + "legendFormat": "{{pod}}", "legendLink": null, "step": 10 } @@ -14406,25 +14065,6 @@ items: "pattern": "Time", "type": "hidden" }, - { - "alias": "Running Pods", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ - - ], - "type": "number", - "unit": "short" - }, { "alias": "CPU Usage", "colorMode": null, @@ -14437,7 +14077,7 @@ items: "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "Value #B", + "pattern": "Value #A", "thresholds": [ ], @@ -14456,7 +14096,7 @@ items: "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "Value #C", + "pattern": "Value #B", "thresholds": [ ], @@ -14475,7 +14115,7 @@ items: "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "Value #D", + "pattern": "Value #C", "thresholds": [ ], @@ -14494,7 +14134,7 @@ items: "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "Value #E", + "pattern": "Value #D", "thresholds": [ ], @@ -14513,7 +14153,7 @@ items: "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "Value #F", + "pattern": "Value #E", "thresholds": [ ], @@ -14521,7 +14161,7 @@ items: "unit": "percentunit" }, { - "alias": "Workload", + "alias": "Pod", "colorMode": null, "colors": [ @@ -14531,27 +14171,8 @@ items: "link": true, "linkTargetBlank": false, "linkTooltip": "Drill down", - "linkUrl": "./d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=$__cell&var-type=$__cell_2", - "pattern": "workload", - "thresholds": [ - - ], - "type": "number", - "unit": "short" - }, - { - "alias": "Workload Type", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "workload_type", + "linkUrl": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell", + "pattern": "pod", "thresholds": [ ], @@ -14576,7 +14197,7 @@ items: ], "targets": [ { - "expr": "count(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload, workload_type)", + "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", "format": "table", "instant": true, "intervalFactor": 2, @@ -14585,7 +14206,7 @@ items: "step": 10 }, { - "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", + "expr": "sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", "format": "table", "instant": true, "intervalFactor": 2, @@ -14594,7 +14215,7 @@ items: "step": 10 }, { - "expr": "sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", + "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", "format": "table", "instant": true, "intervalFactor": 2, @@ -14603,7 +14224,7 @@ items: "step": 10 }, { - "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", + "expr": "sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", "format": "table", "instant": true, "intervalFactor": 2, @@ -14612,22 +14233,13 @@ items: "step": 10 }, { - "expr": "sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", + "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "E", "step": 10 - }, - { - "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "F", - "step": 10 } ], "thresholds": [ @@ -14713,28 +14325,7 @@ items: "points": false, "renderer": "flot", "seriesOverrides": [ - { - "alias": "quota - requests", - "color": "#F2495C", - "dashes": true, - "fill": 0, - "hiddenSeries": true, - "hideTooltip": true, - "legend": true, - "linewidth": 2, - "stack": false - }, - { - "alias": "quota - limits", - "color": "#FF9830", - "dashes": true, - "fill": 0, - "hiddenSeries": true, - "hideTooltip": true, - "legend": true, - "linewidth": 2, - "stack": false - } + ], "spaceLength": 10, "span": 12, @@ -14742,26 +14333,10 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{workload}} - {{workload_type}}", - "legendLink": null, - "step": 10 - }, - { - "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"requests.memory\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "quota - requests", - "legendLink": null, - "step": 10 - }, - { - "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"limits.memory\"})", + "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", "format": "time_series", "intervalFactor": 2, - "legendFormat": "quota - limits", + "legendFormat": "{{pod}}", "legendLink": null, "step": 10 } @@ -14861,25 +14436,6 @@ items: "pattern": "Time", "type": "hidden" }, - { - "alias": "Running Pods", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ - - ], - "type": "number", - "unit": "short" - }, { "alias": "Memory Usage", "colorMode": null, @@ -14892,7 +14448,7 @@ items: "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "Value #B", + "pattern": "Value #A", "thresholds": [ ], @@ -14911,7 +14467,7 @@ items: "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "Value #C", + "pattern": "Value #B", "thresholds": [ ], @@ -14930,7 +14486,7 @@ items: "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "Value #D", + "pattern": "Value #C", "thresholds": [ ], @@ -14949,7 +14505,7 @@ items: "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "Value #E", + "pattern": "Value #D", "thresholds": [ ], @@ -14968,7 +14524,7 @@ items: "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "Value #F", + "pattern": "Value #E", "thresholds": [ ], @@ -14976,7 +14532,7 @@ items: "unit": "percentunit" }, { - "alias": "Workload", + "alias": "Pod", "colorMode": null, "colors": [ @@ -14986,27 +14542,8 @@ items: "link": true, "linkTargetBlank": false, "linkTooltip": "Drill down", - "linkUrl": "./d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=$__cell&var-type=$__cell_2", - "pattern": "workload", - "thresholds": [ - - ], - "type": "number", - "unit": "short" - }, - { - "alias": "Workload Type", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "workload_type", + "linkUrl": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell", + "pattern": "pod", "thresholds": [ ], @@ -15031,7 +14568,7 @@ items: ], "targets": [ { - "expr": "count(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload, workload_type)", + "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", "format": "table", "instant": true, "intervalFactor": 2, @@ -15040,7 +14577,7 @@ items: "step": 10 }, { - "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", + "expr": "sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", "format": "table", "instant": true, "intervalFactor": 2, @@ -15049,7 +14586,7 @@ items: "step": 10 }, { - "expr": "sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", + "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", "format": "table", "instant": true, "intervalFactor": 2, @@ -15058,7 +14595,7 @@ items: "step": 10 }, { - "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", + "expr": "sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", "format": "table", "instant": true, "intervalFactor": 2, @@ -15067,22 +14604,13 @@ items: "step": 10 }, { - "expr": "sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", + "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "E", "step": 10 - }, - { - "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "F", - "step": 10 } ], "thresholds": [ @@ -15297,7 +14825,7 @@ items: "unit": "pps" }, { - "alias": "Workload", + "alias": "Pod", "colorMode": null, "colors": [ @@ -15306,9 +14834,9 @@ items: "decimals": 2, "link": true, "linkTargetBlank": false, - "linkTooltip": "Drill down to pods", - "linkUrl": "./d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=$__cell&var-type=$type", - "pattern": "workload", + "linkTooltip": "Drill down", + "linkUrl": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell", + "pattern": "pod", "thresholds": [ ], @@ -15316,43 +14844,24 @@ items: "unit": "short" }, { - "alias": "Workload Type", + "alias": "", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "workload_type", + "pattern": "/.*/", "thresholds": [ ], - "type": "number", + "type": "string", "unit": "short" - }, + } + ], + "targets": [ { - "alias": "", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ - - ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n", + "expr": "(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "table", "instant": true, "intervalFactor": 2, @@ -15361,7 +14870,7 @@ items: "step": 10 }, { - "expr": "(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n", + "expr": "(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "table", "instant": true, "intervalFactor": 2, @@ -15370,7 +14879,7 @@ items: "step": 10 }, { - "expr": "(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n", + "expr": "(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "table", "instant": true, "intervalFactor": 2, @@ -15379,7 +14888,7 @@ items: "step": 10 }, { - "expr": "(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n", + "expr": "(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "table", "instant": true, "intervalFactor": 2, @@ -15388,7 +14897,7 @@ items: "step": 10 }, { - "expr": "(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n", + "expr": "(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "table", "instant": true, "intervalFactor": 2, @@ -15397,7 +14906,7 @@ items: "step": 10 }, { - "expr": "(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n", + "expr": "(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "table", "instant": true, "intervalFactor": 2, @@ -15497,10 +15006,10 @@ items: "steppedLine": false, "targets": [ { - "expr": "(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "expr": "(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{workload}}", + "legendFormat": "{{pod}}", "legendLink": null, "step": 10 } @@ -15583,10 +15092,10 @@ items: "steppedLine": false, "targets": [ { - "expr": "(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "expr": "(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{workload}}", + "legendFormat": "{{pod}}", "legendLink": null, "step": 10 } @@ -15681,10 +15190,10 @@ items: "steppedLine": false, "targets": [ { - "expr": "(avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "expr": "(avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{workload}}", + "legendFormat": "{{pod}}", "legendLink": null, "step": 10 } @@ -15694,7 +15203,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Average Container Bandwidth by Workload: Received", + "title": "Average Container Bandwidth by Pod: Received", "tooltip": { "shared": false, "sort": 0, @@ -15767,10 +15276,10 @@ items: "steppedLine": false, "targets": [ { - "expr": "(avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "expr": "(avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{workload}}", + "legendFormat": "{{pod}}", "legendLink": null, "step": 10 } @@ -15780,7 +15289,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Average Container Bandwidth by Workload: Transmitted", + "title": "Average Container Bandwidth by Pod: Transmitted", "tooltip": { "shared": false, "sort": 0, @@ -15820,7 +15329,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Average Container Bandwidth by Workload", + "title": "Average Container Bandwidth by Pod", "titleSize": "h6" }, { @@ -15865,10 +15374,10 @@ items: "steppedLine": false, "targets": [ { - "expr": "(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "expr": "(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{workload}}", + "legendFormat": "{{pod}}", "legendLink": null, "step": 10 } @@ -15951,10 +15460,10 @@ items: "steppedLine": false, "targets": [ { - "expr": "(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "expr": "(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{workload}}", + "legendFormat": "{{pod}}", "legendLink": null, "step": 10 } @@ -16049,10 +15558,10 @@ items: "steppedLine": false, "targets": [ { - "expr": "(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "expr": "(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{workload}}", + "legendFormat": "{{pod}}", "legendLink": null, "step": 10 } @@ -16135,10 +15644,10 @@ items: "steppedLine": false, "targets": [ { - "expr": "(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "expr": "(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{workload}}", + "legendFormat": "{{pod}}", "legendLink": null, "step": 10 } @@ -16244,28 +15753,23 @@ items: }, { "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", "current": { - "text": "deployment", - "value": "deployment" + "text": "", + "value": "" }, "datasource": "$datasource", - "definition": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\"}, workload_type)", "hide": 0, "includeAll": false, "label": null, "multi": false, - "name": "type", + "name": "namespace", "options": [ ], - "query": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\"}, workload_type)", + "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)", "refresh": 2, "regex": "", - "skipUrlSync": false, - "sort": 0, + "sort": 1, "tagValuesQuery": "", "tags": [ @@ -16285,11 +15789,38 @@ items: "includeAll": false, "label": null, "multi": false, - "name": "namespace", + "name": "workload", "options": [ ], - "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)", + "query": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\"}, workload)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "", + "value": "" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "type", + "options": [ + + ], + "query": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\"}, workload_type)", "refresh": 2, "regex": "", "sort": 1, @@ -16333,8 +15864,8 @@ items: ] }, "timezone": "UTC", - "title": "Kubernetes / Compute Resources / Namespace (Workloads)", - "uid": "a87fb0d919ec0ea5f6543124e16c42a5", + "title": "Kubernetes / Compute Resources / Workload", + "uid": "a164a7f0339f99e89cea5cb47e9be617", "version": 0 } kind: ConfigMap @@ -16344,28 +15875,21 @@ items: app.kubernetes.io/name: grafana app.kubernetes.io/part-of: kube-prometheus app.kubernetes.io/version: 7.5.4 - name: grafana-dashboard-k8s-resources-workloads-namespace + name: grafana-dashboard-k8s-resources-workload namespace: monitoring - apiVersion: v1 data: - kubelet.json: |- + k8s-resources-workloads-namespace.json: |- { - "__inputs": [ - - ], - "__requires": [ - - ], "annotations": { "list": [ ] }, - "editable": false, + "editable": true, "gnetId": null, "graphTooltip": 0, "hideControls": false, - "id": null, "links": [ ], @@ -16373,524 +15897,462 @@ items: "rows": [ { "collapse": false, - "collapsed": false, + "height": "250px", "panels": [ { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { + "aliasColors": { }, - "id": 2, - "interval": null, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, "links": [ ], - "mappingType": 1, - "mappingTypes": [ + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ { - "name": "value to text", - "value": 1 + "alias": "quota - requests", + "color": "#F2495C", + "dashes": true, + "fill": 0, + "hiddenSeries": true, + "hideTooltip": true, + "legend": true, + "linewidth": 2, + "stack": false }, { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" + "alias": "quota - limits", + "color": "#FF9830", + "dashes": true, + "fill": 0, + "hiddenSeries": true, + "hideTooltip": true, + "legend": true, + "linewidth": 2, + "stack": false } ], - "span": 2, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, "targets": [ { - "expr": "sum(up{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\"})", + "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", "format": "time_series", "intervalFactor": 2, - "legendFormat": "", - "refId": "A" + "legendFormat": "{{workload}} - {{workload_type}}", + "legendLink": null, + "step": 10 + }, + { + "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"requests.cpu\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "quota - requests", + "legendLink": null, + "step": 10 + }, + { + "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"limits.cpu\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "quota - limits", + "legendLink": null, + "step": 10 } ], - "thresholds": "", - "title": "Up", + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Usage", "tooltip": { - "shared": false - }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "min" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true + "shared": false, + "sort": 0, + "value_type": "individual" }, - "gridPos": { + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + ] }, - "id": 3, - "interval": null, - "links": [ - - ], - "mappingType": 1, - "mappingTypes": [ + "yaxes": [ { - "name": "value to text", - "value": 1 + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true }, { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 2, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "sum(kubelet_running_pods{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}) OR sum(kubelet_running_pod_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "A" - } - ], - "thresholds": "", - "title": "Running Pods", - "tooltip": { - "shared": false - }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false } - ], - "valueName": "min" - }, + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU Usage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { + "aliasColors": { }, - "id": 4, - "interval": null, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, "links": [ ], - "mappingType": 1, - "mappingTypes": [ + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ { - "name": "value to text", - "value": 1 + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" }, { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ + "alias": "Running Pods", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 2, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ + "alias": "CPU Usage", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, { - "expr": "sum(kubelet_running_containers{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}) OR sum(kubelet_running_container_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "A" - } - ], - "thresholds": "", - "title": "Running Container", - "tooltip": { - "shared": false - }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ + "alias": "CPU Requests", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "min" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { + "alias": "CPU Requests %", + "colorMode": null, + "colors": [ - }, - "id": 5, - "interval": null, - "links": [ + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ - ], - "mappingType": 1, - "mappingTypes": [ + ], + "type": "number", + "unit": "percentunit" + }, { - "name": "value to text", - "value": 1 + "alias": "CPU Limits", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "short" }, { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ + "alias": "CPU Limits %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, { - "from": "null", - "text": "N/A", - "to": "null" + "alias": "Workload", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "./d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=$__cell&var-type=$__cell_2", + "pattern": "workload", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "Workload Type", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "workload_type", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" } ], - "span": 2, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", "targets": [ { - "expr": "sum(volume_manager_total_volumes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\", state=\"actual_state_of_world\"})", - "format": "time_series", + "expr": "count(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload, workload_type)", + "format": "table", + "instant": true, "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "A" + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + }, + { + "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 } ], - "thresholds": "", - "title": "Actual Volume Count", + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Quota", "tooltip": { - "shared": false + "shared": false, + "sort": 0, + "value_type": "individual" }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "min" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - - }, - "id": 6, - "interval": null, - "links": [ - - ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 2, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "sum(volume_manager_total_volumes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",state=\"desired_state_of_world\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "A" - } - ], - "thresholds": "", - "title": "Desired Volume Count", - "tooltip": { - "shared": false - }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "min" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + ] }, - "id": 7, - "interval": null, - "links": [ - - ], - "mappingType": 1, - "mappingTypes": [ + "yaxes": [ { - "name": "value to text", - "value": 1 + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true }, { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 2, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "sum(rate(kubelet_node_config_error{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "A" - } - ], - "thresholds": "", - "title": "Config Error Count", - "tooltip": { - "shared": false - }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false } - ], - "valueName": "min" + ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" + "showTitle": true, + "title": "CPU Quota", + "titleSize": "h6" }, { "collapse": false, - "collapsed": false, + "height": "250px", "panels": [ { "aliasColors": { @@ -16900,142 +16362,79 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - - }, - "id": 8, + "fill": 10, + "id": 3, "legend": { - "alignAsTable": true, "avg": false, - "current": true, + "current": false, "max": false, "min": false, - "rightSide": true, "show": true, - "sideWidth": null, "total": false, - "values": true + "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], - "nullPointMode": "null", + "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [ - + { + "alias": "quota - requests", + "color": "#F2495C", + "dashes": true, + "fill": 0, + "hiddenSeries": true, + "hideTooltip": true, + "legend": true, + "linewidth": 2, + "stack": false + }, + { + "alias": "quota - limits", + "color": "#FF9830", + "dashes": true, + "fill": 0, + "hiddenSeries": true, + "hideTooltip": true, + "legend": true, + "linewidth": 2, + "stack": false + } ], "spaceLength": 10, - "span": 6, - "stack": false, + "span": 12, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(rate(kubelet_runtime_operations_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (operation_type, instance)", + "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}} {{operation_type}}", - "refId": "A" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Operation Rate", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "ops", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "legendFormat": "{{workload}} - {{workload_type}}", + "legendLink": null, + "step": 10 }, { - "format": "ops", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - - }, - "id": 9, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ + "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"requests.memory\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "quota - requests", + "legendLink": null, + "step": 10 + }, { - "expr": "sum(rate(kubelet_runtime_operations_errors_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, operation_type)", + "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"limits.memory\"})", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}} {{operation_type}}", - "refId": "A" + "legendFormat": "quota - limits", + "legendLink": null, + "step": 10 } ], "thresholds": [ @@ -17043,7 +16442,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Operation Error Rate", + "title": "Memory Usage", "tooltip": { "shared": false, "sort": 0, @@ -17061,7 +16460,7 @@ items: }, "yaxes": [ { - "format": "ops", + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -17069,12 +16468,12 @@ items: "show": true }, { - "format": "ops", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": 0, - "show": true + "min": null, + "show": false } ] } @@ -17082,14 +16481,13 @@ items: "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" + "showTitle": true, + "title": "Memory Usage", + "titleSize": "h6" }, { "collapse": false, - "collapsed": false, + "height": "250px", "panels": [ { "aliasColors": { @@ -17100,34 +16498,26 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, - "fillGradient": 0, - "gridPos": { - - }, - "id": 10, + "id": 4, "legend": { - "alignAsTable": true, "avg": false, - "current": true, + "current": false, "max": false, "min": false, - "rightSide": true, "show": true, - "sideWidth": null, "total": false, - "values": true + "values": false }, "lines": true, "linewidth": 1, "links": [ ], - "nullPointMode": "null", + "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [ ], @@ -17135,126 +16525,235 @@ items: "span": 12, "stack": false, "steppedLine": false, - "targets": [ + "styles": [ { - "expr": "histogram_quantile(0.99, sum(rate(kubelet_runtime_operations_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, operation_type, le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} {{operation_type}}", - "refId": "A" - } - ], - "thresholds": [ + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "Running Pods", + "colorMode": null, + "colors": [ - ], - "timeFrom": null, - "timeShift": null, - "title": "Operation duration 99th quantile", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ - ] - }, - "yaxes": [ + ], + "type": "number", + "unit": "short" + }, { - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "alias": "Memory Usage", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" }, { - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": false, - "collapsed": false, - "panels": [ - { - "aliasColors": { + "alias": "Memory Requests", + "colorMode": null, + "colors": [ - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ - }, - "id": 11, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [ + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Requests %", + "colorMode": null, + "colors": [ - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Memory Limits", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Limits %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Workload", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "./d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=$__cell&var-type=$__cell_2", + "pattern": "workload", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "Workload Type", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "workload_type", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + ], + "type": "string", + "unit": "short" + } ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, "targets": [ { - "expr": "sum(rate(kubelet_pod_start_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance)", - "format": "time_series", + "expr": "count(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload, workload_type)", + "format": "table", + "instant": true, "intervalFactor": 2, - "legendFormat": "{{instance}} pod", - "refId": "A" + "legendFormat": "", + "refId": "A", + "step": 10 }, { - "expr": "sum(rate(kubelet_pod_worker_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance)", - "format": "time_series", + "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, "intervalFactor": 2, - "legendFormat": "{{instance}} worker", - "refId": "B" + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + }, + { + "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 } ], "thresholds": [ @@ -17262,13 +16761,14 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Pod Start Rate", + "title": "Memory Quota", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "type": "graph", + "transform": "table", + "type": "table", "xaxis": { "buckets": null, "mode": "time", @@ -17280,7 +16780,7 @@ items: }, "yaxes": [ { - "format": "ops", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -17288,15 +16788,27 @@ items: "show": true }, { - "format": "ops", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": 0, - "show": true + "min": null, + "show": false } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memory Quota", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ { "aliasColors": { @@ -17306,69 +16818,278 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, - "fillGradient": 0, - "gridPos": { - - }, - "id": 12, + "id": 5, + "interval": "1m", "legend": { - "alignAsTable": true, "avg": false, - "current": true, + "current": false, "max": false, "min": false, - "rightSide": true, "show": true, - "sideWidth": null, "total": false, - "values": true + "values": false }, "lines": true, "linewidth": 1, "links": [ ], - "nullPointMode": "null", + "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 6, + "span": 12, "stack": false, "steppedLine": false, - "targets": [ + "styles": [ { - "expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_start_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} pod", - "refId": "A" + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" }, { - "expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} worker", - "refId": "B" - } - ], + "alias": "Current Receive Bandwidth", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Current Transmit Bandwidth", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Rate of Received Packets", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Rate of Transmitted Packets", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Rate of Received Packets Dropped", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Rate of Transmitted Packets Dropped", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Workload", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTargetBlank": false, + "linkTooltip": "Drill down to pods", + "linkUrl": "./d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=$__cell&var-type=$type", + "pattern": "workload", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "Workload Type", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "workload_type", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + }, + { + "expr": "(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 + } + ], "thresholds": [ ], "timeFrom": null, "timeShift": null, - "title": "Pod Start Duration", + "title": "Current Network Usage", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "type": "graph", + "transform": "table", + "type": "table", "xaxis": { "buckets": null, "mode": "time", @@ -17380,7 +17101,7 @@ items: }, "yaxes": [ { - "format": "s", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -17388,12 +17109,12 @@ items: "show": true }, { - "format": "s", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": 0, - "show": true + "min": null, + "show": false } ] } @@ -17401,14 +17122,13 @@ items: "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" + "showTitle": true, + "title": "Current Network Usage", + "titleSize": "h6" }, { "collapse": false, - "collapsed": false, + "height": "250px", "panels": [ { "aliasColors": { @@ -17418,51 +17138,42 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - - }, - "id": 13, + "fill": 10, + "id": 6, "legend": { - "alignAsTable": true, "avg": false, - "current": true, - "hideEmpty": true, - "hideZero": true, + "current": false, "max": false, "min": false, - "rightSide": true, "show": true, - "sideWidth": null, "total": false, - "values": true + "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], - "nullPointMode": "null", + "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 6, - "stack": false, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(rate(storage_operation_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin)", + "expr": "(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}} {{operation_name}} {{volume_plugin}}", - "refId": "A" + "legendFormat": "{{workload}}", + "legendLink": null, + "step": 10 } ], "thresholds": [ @@ -17470,7 +17181,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Storage Operation Rate", + "title": "Receive Bandwidth", "tooltip": { "shared": false, "sort": 0, @@ -17488,7 +17199,7 @@ items: }, "yaxes": [ { - "format": "ops", + "format": "Bps", "label": null, "logBase": 1, "max": null, @@ -17496,12 +17207,12 @@ items: "show": true }, { - "format": "ops", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": 0, - "show": true + "min": null, + "show": false } ] }, @@ -17513,51 +17224,42 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - - }, - "id": 14, + "fill": 10, + "id": 7, "legend": { - "alignAsTable": true, "avg": false, - "current": true, - "hideEmpty": true, - "hideZero": true, + "current": false, "max": false, "min": false, - "rightSide": true, "show": true, - "sideWidth": null, "total": false, - "values": true + "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], - "nullPointMode": "null", + "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 6, - "stack": false, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(rate(storage_operation_errors_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin)", + "expr": "(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}} {{operation_name}} {{volume_plugin}}", - "refId": "A" + "legendFormat": "{{workload}}", + "legendLink": null, + "step": 10 } ], "thresholds": [ @@ -17565,7 +17267,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Storage Operation Error Rate", + "title": "Transmit Bandwidth", "tooltip": { "shared": false, "sort": 0, @@ -17583,7 +17285,7 @@ items: }, "yaxes": [ { - "format": "ops", + "format": "Bps", "label": null, "logBase": 1, "max": null, @@ -17591,12 +17293,12 @@ items: "show": true }, { - "format": "ops", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": 0, - "show": true + "min": null, + "show": false } ] } @@ -17604,14 +17306,13 @@ items: "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" + "showTitle": true, + "title": "Bandwidth", + "titleSize": "h6" }, { "collapse": false, - "collapsed": false, + "height": "250px", "panels": [ { "aliasColors": { @@ -17621,51 +17322,42 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - - }, - "id": 15, + "fill": 10, + "id": 8, "legend": { - "alignAsTable": true, "avg": false, - "current": true, - "hideEmpty": true, - "hideZero": true, + "current": false, "max": false, "min": false, - "rightSide": true, "show": true, - "sideWidth": null, "total": false, - "values": true + "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], - "nullPointMode": "null", + "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 12, - "stack": false, + "span": 6, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(storage_operation_duration_seconds_bucket{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin, le))", + "expr": "(avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}} {{operation_name}} {{volume_plugin}}", - "refId": "A" + "legendFormat": "{{workload}}", + "legendLink": null, + "step": 10 } ], "thresholds": [ @@ -17673,7 +17365,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Storage Operation Duration 99th quantile", + "title": "Average Container Bandwidth by Workload: Received", "tooltip": { "shared": false, "sort": 0, @@ -17691,7 +17383,7 @@ items: }, "yaxes": [ { - "format": "s", + "format": "Bps", "label": null, "logBase": 1, "max": null, @@ -17699,28 +17391,15 @@ items: "show": true }, { - "format": "s", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": 0, - "show": true + "min": null, + "show": false } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": false, - "collapsed": false, - "panels": [ + }, { "aliasColors": { @@ -17729,49 +17408,42 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - - }, - "id": 16, + "fill": 10, + "id": 9, "legend": { - "alignAsTable": true, "avg": false, - "current": true, + "current": false, "max": false, "min": false, - "rightSide": true, "show": true, - "sideWidth": null, "total": false, - "values": true + "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], - "nullPointMode": "null", + "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 6, - "stack": false, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(rate(kubelet_cgroup_manager_duration_seconds_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance, operation_type)", + "expr": "(avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{operation_type}}", - "refId": "A" + "legendFormat": "{{workload}}", + "legendLink": null, + "step": 10 } ], "thresholds": [ @@ -17779,7 +17451,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Cgroup manager operation rate", + "title": "Average Container Bandwidth by Workload: Transmitted", "tooltip": { "shared": false, "sort": 0, @@ -17797,7 +17469,7 @@ items: }, "yaxes": [ { - "format": "ops", + "format": "Bps", "label": null, "logBase": 1, "max": null, @@ -17805,15 +17477,27 @@ items: "show": true }, { - "format": "ops", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": 0, - "show": true + "min": null, + "show": false } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Average Container Bandwidth by Workload", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ { "aliasColors": { @@ -17822,49 +17506,42 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - - }, - "id": 17, + "fill": 10, + "id": 10, "legend": { - "alignAsTable": true, "avg": false, - "current": true, + "current": false, "max": false, "min": false, - "rightSide": true, "show": true, - "sideWidth": null, "total": false, - "values": true + "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], - "nullPointMode": "null", + "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 6, - "stack": false, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(kubelet_cgroup_manager_duration_seconds_bucket{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance, operation_type, le))", + "expr": "(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}} {{operation_type}}", - "refId": "A" + "legendFormat": "{{workload}}", + "legendLink": null, + "step": 10 } ], "thresholds": [ @@ -17872,7 +17549,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Cgroup manager 99th quantile", + "title": "Rate of Received Packets", "tooltip": { "shared": false, "sort": 0, @@ -17890,7 +17567,7 @@ items: }, "yaxes": [ { - "format": "s", + "format": "Bps", "label": null, "logBase": 1, "max": null, @@ -17898,28 +17575,15 @@ items: "show": true }, { - "format": "s", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": 0, - "show": true + "min": null, + "show": false } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": false, - "collapsed": false, - "panels": [ + }, { "aliasColors": { @@ -17928,143 +17592,42 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "description": "Pod lifecycle event generator", - "fill": 1, - "fillGradient": 0, - "gridPos": { - - }, - "id": 18, + "fill": 10, + "id": 11, "legend": { - "alignAsTable": true, "avg": false, - "current": true, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(kubelet_pleg_relist_duration_seconds_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "A" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "PLEG relist rate", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "ops", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "ops", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - } - ] - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - - }, - "id": 19, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, + "current": false, "max": false, "min": false, - "rightSide": true, "show": true, - "sideWidth": null, "total": false, - "values": true + "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], - "nullPointMode": "null", + "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 6, - "stack": false, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_interval_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, le))", + "expr": "(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "A" + "legendFormat": "{{workload}}", + "legendLink": null, + "step": 10 } ], "thresholds": [ @@ -18072,7 +17635,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "PLEG relist interval", + "title": "Rate of Transmitted Packets", "tooltip": { "shared": false, "sort": 0, @@ -18090,7 +17653,7 @@ items: }, "yaxes": [ { - "format": "s", + "format": "Bps", "label": null, "logBase": 1, "max": null, @@ -18098,12 +17661,12 @@ items: "show": true }, { - "format": "s", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": 0, - "show": true + "min": null, + "show": false } ] } @@ -18111,14 +17674,13 @@ items: "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" + "showTitle": true, + "title": "Rate of Packets", + "titleSize": "h6" }, { "collapse": false, - "collapsed": false, + "height": "250px", "panels": [ { "aliasColors": { @@ -18128,49 +17690,42 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - - }, - "id": 20, + "fill": 10, + "id": 12, "legend": { - "alignAsTable": true, "avg": false, - "current": true, + "current": false, "max": false, "min": false, - "rightSide": true, "show": true, - "sideWidth": null, "total": false, - "values": true + "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], - "nullPointMode": "null", + "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 12, - "stack": false, + "span": 6, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, le))", + "expr": "(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "A" + "legendFormat": "{{workload}}", + "legendLink": null, + "step": 10 } ], "thresholds": [ @@ -18178,7 +17733,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "PLEG relist duration", + "title": "Rate of Received Packets Dropped", "tooltip": { "shared": false, "sort": 0, @@ -18196,7 +17751,7 @@ items: }, "yaxes": [ { - "format": "s", + "format": "Bps", "label": null, "logBase": 1, "max": null, @@ -18204,28 +17759,15 @@ items: "show": true }, { - "format": "s", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": 0, - "show": true + "min": null, + "show": false } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": false, - "collapsed": false, - "panels": [ + }, { "aliasColors": { @@ -18234,70 +17776,42 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - - }, - "id": 21, + "fill": 10, + "id": 13, "legend": { - "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, - "rightSide": false, "show": true, - "sideWidth": null, "total": false, "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], - "nullPointMode": "null", + "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 12, - "stack": false, + "span": 6, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"2..\"}[5m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "2xx", - "refId": "A" - }, - { - "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"3..\"}[5m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "3xx", - "refId": "B" - }, - { - "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"4..\"}[5m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "4xx", - "refId": "C" - }, - { - "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"5..\"}[5m]))", + "expr": "(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "format": "time_series", "intervalFactor": 2, - "legendFormat": "5xx", - "refId": "D" + "legendFormat": "{{workload}}", + "legendLink": null, + "step": 10 } ], "thresholds": [ @@ -18305,7 +17819,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "RPC Rate", + "title": "Rate of Transmitted Packets Dropped", "tooltip": { "shared": false, "sort": 0, @@ -18323,7 +17837,7 @@ items: }, "yaxes": [ { - "format": "ops", + "format": "Bps", "label": null, "logBase": 1, "max": null, @@ -18331,12 +17845,12 @@ items: "show": true }, { - "format": "ops", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": 0, - "show": true + "min": null, + "show": false } ] } @@ -18344,1378 +17858,942 @@ items: "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": false, - "collapsed": false, - "panels": [ - { - "aliasColors": { + "showTitle": true, + "title": "Rate of Packets Dropped", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ - }, - "bars": false, - "dashLength": 10, - "dashes": false, + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": "", + "value": "" + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(kube_pod_info, cluster)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "deployment", + "value": "deployment" + }, + "datasource": "$datasource", + "definition": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\"}, workload_type)", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "type", + "options": [ + + ], + "query": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\"}, workload_type)", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "", + "value": "" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "namespace", + "options": [ + + ], + "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "UTC", + "title": "Kubernetes / Compute Resources / Namespace (Workloads)", + "uid": "a87fb0d919ec0ea5f6543124e16c42a5", + "version": 0 + } + kind: ConfigMap + metadata: + labels: + app.kubernetes.io/component: grafana + app.kubernetes.io/name: grafana + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 7.5.4 + name: grafana-dashboard-k8s-resources-workloads-namespace + namespace: monitoring +- apiVersion: v1 + data: + kubelet.json: |- + { + "__inputs": [ + + ], + "__requires": [ + + ], + "annotations": { + "list": [ + + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ + + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, "gridPos": { }, - "id": 22, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, + "id": 2, + "interval": null, "links": [ ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "targets": [ + "mappingType": 1, + "mappingTypes": [ { - "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance, verb, url, le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} {{verb}} {{url}}", - "refId": "A" + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 } ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Request duration 99th quantile", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ { - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true + "from": "null", + "text": "N/A", + "to": "null" } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": false, - "collapsed": false, - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - - }, - "id": 23, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", "targets": [ { - "expr": "process_resident_memory_bytes{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}", + "expr": "sum(kubelet_node_name{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\"})", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}}", + "legendFormat": "", "refId": "A" } ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Memory", + "thresholds": "", + "title": "Running Kubelets", "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] + "shared": false }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "op": "=", + "text": "N/A", + "value": "null" } - ] + ], + "valueName": "min" }, { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, "gridPos": { }, - "id": 24, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, + "id": 3, + "interval": null, "links": [ ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", "targets": [ { - "expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])", + "expr": "sum(kubelet_running_pods{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}) OR sum(kubelet_running_pod_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", "refId": "A" } ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "CPU usage", + "thresholds": "", + "title": "Running Pods", "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] + "shared": false }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true + "op": "=", + "text": "N/A", + "value": "null" } - ] + ], + "valueName": "min" }, { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, "gridPos": { }, - "id": 25, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, + "id": 4, + "interval": null, "links": [ ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", "targets": [ { - "expr": "go_goroutines{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}", + "expr": "sum(kubelet_running_containers{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}) OR sum(kubelet_running_container_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", "refId": "A" } ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Goroutines", + "thresholds": "", + "title": "Running Container", "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" + "shared": false }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "min" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { - ] }, - "yaxes": [ + "id": 5, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "name": "value to text", + "value": 1 }, { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "name": "range to text", + "value": 2 } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "kubernetes-mixin" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": null, - "name": "datasource", - "options": [ - - ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": null, - "current": { - - }, - "datasource": "$datasource", - "hide": 2, - "includeAll": false, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ - - ], - "query": "label_values(kube_pod_info, cluster)", - "refresh": 2, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": null, - "multi": false, - "name": "instance", - "options": [ - - ], - "query": "label_values(kubelet_runtime_operations_total{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\"}, instance)", - "refresh": 2, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "UTC", - "title": "Kubernetes / Kubelet", - "uid": "3138fa155d5915769fbded898ac09fd9", - "version": 0 - } - kind: ConfigMap - metadata: - labels: - app.kubernetes.io/component: grafana - app.kubernetes.io/name: grafana - app.kubernetes.io/part-of: kube-prometheus - app.kubernetes.io/version: 7.5.4 - name: grafana-dashboard-kubelet - namespace: monitoring -- apiVersion: v1 - data: - namespace-by-pod.json: |- - { - "__inputs": [ - - ], - "__requires": [ - - ], - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "id": null, - "links": [ - - ], - "panels": [ - { - "collapse": false, - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 2, - "panels": [ - - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Current Bandwidth", - "titleSize": "h6", - "type": "row" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", - "decimals": 0, - "format": "time_series", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 9, - "w": 12, - "x": 0, - "y": 1 - }, - "height": 9, - "id": 3, - "interval": null, - "links": [ - - ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "minSpan": 12, - "nullPointMode": "connected", - "nullText": null, - "options": { - "fieldOptions": { - "calcs": [ - "last" ], - "defaults": { - "max": 10000000000, - "min": 0, - "title": "$namespace", - "unit": "Bps" - }, - "mappings": [ - + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } ], - "override": { - + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false }, - "thresholds": [ - { - "color": "dark-green", - "index": 0, - "value": null - }, + "tableColumn": "", + "targets": [ { - "color": "dark-yellow", - "index": 1, - "value": 5000000000 - }, + "expr": "sum(volume_manager_total_volumes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\", state=\"actual_state_of_world\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": "", + "title": "Actual Volume Count", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ { - "color": "dark-red", - "index": 2, - "value": 7000000000 + "op": "=", + "text": "N/A", + "value": "null" } ], - "values": false - } - }, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ + "valueName": "min" + }, { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 12, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution]))", - "format": "time_series", - "instant": null, - "intervalFactor": 1, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": "", - "timeFrom": null, - "timeShift": null, - "title": "Current Rate of Bytes Received", - "type": "gauge", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", - "decimals": 0, - "format": "time_series", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 1 - }, - "height": 9, - "id": 4, - "interval": null, - "links": [ + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { - ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 + }, + "id": 6, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(volume_manager_total_volumes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",state=\"desired_state_of_world\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": "", + "title": "Desired Volume Count", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "min" }, { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "minSpan": 12, - "nullPointMode": "connected", - "nullText": null, - "options": { - "fieldOptions": { - "calcs": [ - "last" + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" ], - "defaults": { - "max": 10000000000, - "min": 0, - "title": "$namespace", - "unit": "Bps" + "datasource": "$datasource", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true }, - "mappings": [ - - ], - "override": { + "gridPos": { }, - "thresholds": [ + "id": 7, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ { - "color": "dark-green", - "index": 0, - "value": null + "name": "value to text", + "value": 1 }, { - "color": "dark-yellow", - "index": 1, - "value": 5000000000 - }, + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ { - "color": "dark-red", - "index": 2, - "value": 7000000000 + "from": "null", + "text": "N/A", + "to": "null" } ], - "values": false - } - }, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 12, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution]))", - "format": "time_series", - "instant": null, - "intervalFactor": 1, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": "", - "timeFrom": null, - "timeShift": null, - "title": "Current Rate of Bytes Transmitted", - "type": "gauge", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(rate(kubelet_node_config_error{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": "", + "title": "Config Error Count", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "min" } ], - "valueName": "current" + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" }, { - "columns": [ - { - "text": "Time", - "value": "Time" - }, - { - "text": "Value #A", - "value": "Value #A" - }, - { - "text": "Value #B", - "value": "Value #B" - }, - { - "text": "Value #C", - "value": "Value #C" - }, - { - "text": "Value #D", - "value": "Value #D" - }, - { - "text": "Value #E", - "value": "Value #E" - }, - { - "text": "Value #F", - "value": "Value #F" - }, - { - "text": "pod", - "value": "pod" - } - ], - "datasource": "$datasource", - "fill": 1, - "fontSize": "100%", - "gridPos": { - "h": 9, - "w": 24, - "x": 0, - "y": 10 - }, - "id": 5, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "minSpan": 24, - "nullPointMode": "null as zero", - "renderer": "flot", - "scroll": true, - "showHeader": true, - "sort": { - "col": 0, - "desc": false - }, - "spaceLength": 10, - "span": 24, - "styles": [ + "collapse": false, + "collapsed": false, + "panels": [ { - "alias": "Time", - "colorMode": null, - "colors": [ + "aliasColors": { - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Time", - "thresholds": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { - ], - "type": "hidden", - "unit": "short" - }, - { - "alias": "Bandwidth Received", - "colorMode": null, - "colors": [ + }, + "id": 8, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], - "type": "number", - "unit": "Bps" - }, - { - "alias": "Bandwidth Transmitted", - "colorMode": null, - "colors": [ - + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(kubelet_runtime_operations_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (operation_type, instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} {{operation_type}}", + "refId": "A" + } ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #B", "thresholds": [ ], - "type": "number", - "unit": "Bps" - }, - { - "alias": "Rate of Received Packets", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #C", - "thresholds": [ + "timeFrom": null, + "timeShift": null, + "title": "Operation Rate", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ - ], - "type": "number", - "unit": "pps" + ] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] }, { - "alias": "Rate of Transmitted Packets", - "colorMode": null, - "colors": [ + "aliasColors": { - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #D", - "thresholds": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { - ], - "type": "number", - "unit": "pps" - }, - { - "alias": "Rate of Received Packets Dropped", - "colorMode": null, - "colors": [ + }, + "id": 9, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #E", - "thresholds": [ + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], - "type": "number", - "unit": "pps" - }, - { - "alias": "Rate of Transmitted Packets Dropped", - "colorMode": null, - "colors": [ - + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(kubelet_runtime_operations_errors_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, operation_type)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} {{operation_type}}", + "refId": "A" + } ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #F", "thresholds": [ ], - "type": "number", - "unit": "pps" - }, - { - "alias": "Pod", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": true, - "linkTooltip": "Drill down", - "linkUrl": "d/7a18067ce943a40ae25454675c19ff5c/kubernetes-networking-pod?orgId=1&refresh=30s&var-namespace=$namespace&var-pod=$__cell", - "pattern": "pod", - "thresholds": [ + "timeFrom": null, + "timeShift": null, + "title": "Operation Error Rate", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ - ], - "type": "number", - "unit": "short" - } - ], - "targets": [ - { - "expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "A", - "step": 10 - }, - { - "expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "B", - "step": 10 - }, - { - "expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "C", - "step": 10 - }, - { - "expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "D", - "step": 10 - }, - { - "expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "E", - "step": 10 - }, - { - "expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "F", - "step": 10 + ] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] } - ], - "timeFrom": null, - "timeShift": null, - "title": "Current Status", - "type": "table" - }, - { - "collapse": false, - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 19 - }, - "id": 6, - "panels": [ - ], "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": true, - "title": "Bandwidth", + "showTitle": false, + "title": "Dashboard Row", "titleSize": "h6", "type": "row" }, { - "aliasColors": { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 9, - "w": 12, - "x": 0, - "y": 20 - }, - "id": 7, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "hideEmpty": true, - "hideZero": true, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { - ], - "minSpan": 12, - "nullPointMode": "connected", - "paceLength": 10, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 12, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{pod}}", - "refId": "A", - "step": 10 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Receive Bandwidth", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - } - ] - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 20 - }, - "id": 8, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "hideEmpty": true, - "hideZero": true, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [ - - ], - "minSpan": 12, - "nullPointMode": "connected", - "paceLength": 10, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 12, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{pod}}", - "refId": "A", - "step": 10 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Transmit Bandwidth", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - } - ] - }, - { - "collapse": true, - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 29 - }, - "id": 9, - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 30 }, "id": 10, "legend": { - "alignAsTable": false, + "alignAsTable": true, "avg": false, - "current": false, - "hideEmpty": true, - "hideZero": true, + "current": true, "max": false, "min": false, - "rightSide": false, + "rightSide": true, "show": true, "sideWidth": null, "total": false, - "values": false + "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [ ], - "minSpan": 12, - "nullPointMode": "connected", - "paceLength": 10, + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, @@ -19726,16 +18804,15 @@ items: ], "spaceLength": 10, "span": 12, - "stack": true, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", + "expr": "histogram_quantile(0.99, sum(rate(kubelet_runtime_operations_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, operation_type, le))", "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{pod}}", - "refId": "A", - "step": 10 + "intervalFactor": 2, + "legendFormat": "{{instance}} {{operation_type}}", + "refId": "A" } ], "thresholds": [ @@ -19743,10 +18820,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Rate of Received Packets", + "title": "Operation duration 99th quantile", "tooltip": { - "shared": true, - "sort": 2, + "shared": false, + "sort": 0, "value_type": "individual" }, "type": "graph", @@ -19761,23 +18838,36 @@ items: }, "yaxes": [ { - "format": "pps", + "format": "s", "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true }, { - "format": "pps", + "format": "s", "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ { "aliasColors": { @@ -19786,37 +18876,30 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 2, + "fill": 1, "fillGradient": 0, "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 30 + }, "id": 11, "legend": { - "alignAsTable": false, + "alignAsTable": true, "avg": false, - "current": false, - "hideEmpty": true, - "hideZero": true, + "current": true, "max": false, "min": false, - "rightSide": false, + "rightSide": true, "show": true, "sideWidth": null, "total": false, - "values": false + "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [ ], - "minSpan": 12, - "nullPointMode": "connected", - "paceLength": 10, + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, @@ -19826,17 +18909,23 @@ items: ], "spaceLength": 10, - "span": 12, - "stack": true, + "span": 6, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", + "expr": "sum(rate(kubelet_pod_start_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance)", "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{pod}}", - "refId": "A", - "step": 10 + "intervalFactor": 2, + "legendFormat": "{{instance}} pod", + "refId": "A" + }, + { + "expr": "sum(rate(kubelet_pod_worker_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} worker", + "refId": "B" } ], "thresholds": [ @@ -19844,10 +18933,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Rate of Transmitted Packets", + "title": "Pod Start Rate", "tooltip": { - "shared": true, - "sort": 2, + "shared": false, + "sort": 0, "value_type": "individual" }, "type": "graph", @@ -19862,7 +18951,7 @@ items: }, "yaxes": [ { - "format": "pps", + "format": "ops", "label": null, "logBase": 1, "max": null, @@ -19870,7 +18959,107 @@ items: "show": true }, { - "format": "pps", + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 12, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_start_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} pod", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} worker", + "refId": "B" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Pod Start Duration", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "s", "label": null, "logBase": 1, "max": null, @@ -19883,21 +19072,14 @@ items: "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": true, - "title": "Packets", + "showTitle": false, + "title": "Dashboard Row", "titleSize": "h6", "type": "row" }, { - "collapse": true, - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 30 - }, - "id": 12, + "collapse": false, + "collapsed": false, "panels": [ { "aliasColors": { @@ -19907,37 +19089,32 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 2, + "fill": 1, "fillGradient": 0, "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 40 + }, "id": 13, "legend": { - "alignAsTable": false, + "alignAsTable": true, "avg": false, - "current": false, + "current": true, "hideEmpty": true, "hideZero": true, "max": false, "min": false, - "rightSide": false, + "rightSide": true, "show": true, "sideWidth": null, "total": false, - "values": false + "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [ ], - "minSpan": 12, - "nullPointMode": "connected", - "paceLength": 10, + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, @@ -19947,17 +19124,16 @@ items: ], "spaceLength": 10, - "span": 12, - "stack": true, + "span": 6, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", + "expr": "sum(rate(storage_operation_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin)", "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{pod}}", - "refId": "A", - "step": 10 + "intervalFactor": 2, + "legendFormat": "{{instance}} {{operation_name}} {{volume_plugin}}", + "refId": "A" } ], "thresholds": [ @@ -19965,10 +19141,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Rate of Received Packets Dropped", + "title": "Storage Operation Rate", "tooltip": { - "shared": true, - "sort": 2, + "shared": false, + "sort": 0, "value_type": "individual" }, "type": "graph", @@ -19983,7 +19159,7 @@ items: }, "yaxes": [ { - "format": "pps", + "format": "ops", "label": null, "logBase": 1, "max": null, @@ -19991,7 +19167,7 @@ items: "show": true }, { - "format": "pps", + "format": "ops", "label": null, "logBase": 1, "max": null, @@ -20008,37 +19184,32 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 2, + "fill": 1, "fillGradient": 0, "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 40 + }, "id": 14, "legend": { - "alignAsTable": false, + "alignAsTable": true, "avg": false, - "current": false, + "current": true, "hideEmpty": true, "hideZero": true, "max": false, "min": false, - "rightSide": false, + "rightSide": true, "show": true, "sideWidth": null, "total": false, - "values": false + "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [ ], - "minSpan": 12, - "nullPointMode": "connected", - "paceLength": 10, + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, @@ -20048,17 +19219,16 @@ items: ], "spaceLength": 10, - "span": 12, - "stack": true, + "span": 6, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", + "expr": "sum(rate(storage_operation_errors_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin)", "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{pod}}", - "refId": "A", - "step": 10 + "intervalFactor": 2, + "legendFormat": "{{instance}} {{operation_name}} {{volume_plugin}}", + "refId": "A" } ], "thresholds": [ @@ -20066,10 +19236,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Rate of Transmitted Packets Dropped", + "title": "Storage Operation Error Rate", "tooltip": { - "shared": true, - "sort": 2, + "shared": false, + "sort": 0, "value_type": "individual" }, "type": "graph", @@ -20084,7 +19254,7 @@ items: }, "yaxes": [ { - "format": "pps", + "format": "ops", "label": null, "logBase": 1, "max": null, @@ -20092,7 +19262,7 @@ items: "show": true }, { - "format": "pps", + "format": "ops", "label": null, "logBase": 1, "max": null, @@ -20105,863 +19275,554 @@ items: "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": true, - "title": "Errors", + "showTitle": false, + "title": "Dashboard Row", "titleSize": "h6", "type": "row" - } - ], - "refresh": "10s", - "rows": [ - - ], - "schemaVersion": 18, - "style": "dark", - "tags": [ - "kubernetes-mixin" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": null, - "name": "datasource", - "options": [ + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { - ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": null, - "current": { + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { - }, - "datasource": "$datasource", - "hide": 2, - "includeAll": false, - "label": null, - "multi": false, - "name": "cluster", - "options": [ + }, + "id": 15, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ - ], - "query": "label_values(kube_pod_info, cluster)", - "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ - ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "text": "kube-system", - "value": "kube-system" - }, - "datasource": "$datasource", - "definition": "label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, namespace)", - "hide": 0, - "includeAll": true, - "label": null, - "multi": false, - "name": "namespace", - "options": [ + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(storage_operation_duration_seconds_bucket{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin, le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} {{operation_name}} {{volume_plugin}}", + "refId": "A" + } + ], + "thresholds": [ - ], - "query": "label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, namespace)", - "refresh": 2, - "regex": "", - "skipUrlSync": false, - "sort": 1, - "tagValuesQuery": "", - "tags": [ + ], + "timeFrom": null, + "timeShift": null, + "title": "Storage Operation Duration 99th quantile", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ - ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "text": "5m", - "value": "5m" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": null, - "multi": false, - "name": "resolution", - "options": [ - { - "selected": false, - "text": "30s", - "value": "30s" + ] }, - { - "selected": true, - "text": "5m", - "value": "5m" + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + }, - { - "selected": false, - "text": "1h", - "value": "1h" - } - ], - "query": "30s,5m,1h", - "refresh": 2, - "regex": "", - "skipUrlSync": false, - "sort": 1, - "tagValuesQuery": "", - "tags": [ + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { - ], - "tagsQuery": "", - "type": "interval", - "useTags": false - }, - { - "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "text": "5m", - "value": "5m" - }, - "datasource": "$datasource", - "hide": 2, - "includeAll": false, - "label": null, - "multi": false, - "name": "interval", - "options": [ - { - "selected": true, - "text": "4h", - "value": "4h" - } - ], - "query": "4h", - "refresh": 2, - "regex": "", - "skipUrlSync": false, - "sort": 1, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "interval", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "UTC", - "title": "Kubernetes / Networking / Namespace (Pods)", - "uid": "8b7a8b326d7a6f1f04244066368c67af", - "version": 0 - } - kind: ConfigMap - metadata: - labels: - app.kubernetes.io/component: grafana - app.kubernetes.io/name: grafana - app.kubernetes.io/part-of: kube-prometheus - app.kubernetes.io/version: 7.5.4 - name: grafana-dashboard-namespace-by-pod - namespace: monitoring -- apiVersion: v1 - data: - namespace-by-workload.json: |- - { - "__inputs": [ + }, + "id": 16, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ - ], - "__requires": [ + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ - ], - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "id": null, - "links": [ + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(kubelet_cgroup_manager_duration_seconds_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance, operation_type)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{operation_type}}", + "refId": "A" + } + ], + "thresholds": [ - ], - "panels": [ - { - "collapse": false, - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 2, - "panels": [ + ], + "timeFrom": null, + "timeShift": null, + "title": "Cgroup manager operation rate", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Current Bandwidth", - "titleSize": "h6", - "type": "row" - }, - { - "aliasColors": { + ] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { - }, - "bars": true, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 9, - "w": 12, - "x": 0, - "y": 1 - }, - "id": 3, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "hideEmpty": true, - "hideZero": true, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": false, - "linewidth": 1, - "links": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { - ], - "minSpan": 24, - "nullPointMode": "null", - "paceLength": 10, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ + }, + "id": 17, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ - ], - "spaceLength": 10, - "span": 24, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{ workload }}", - "refId": "A", - "step": 10 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Current Rate of Bytes Received", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "series", - "name": null, - "show": false, - "values": [ - "current" - ] - }, - "yaxes": [ - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - } - ] - }, - { - "aliasColors": { + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ - }, - "bars": true, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 1 - }, - "id": 4, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "hideEmpty": true, - "hideZero": true, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": false, - "linewidth": 1, - "links": [ + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(kubelet_cgroup_manager_duration_seconds_bucket{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance, operation_type, le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} {{operation_type}}", + "refId": "A" + } + ], + "thresholds": [ - ], - "minSpan": 24, - "nullPointMode": "null", - "paceLength": 10, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ + ], + "timeFrom": null, + "timeShift": null, + "title": "Cgroup manager 99th quantile", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ - ], - "spaceLength": 10, - "span": 24, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{ workload }}", - "refId": "A", - "step": 10 + ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] } ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Current Rate of Bytes Transmitted", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "series", - "name": null, - "show": false, - "values": [ - "current" - ] - }, - "yaxes": [ - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - } - ] + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" }, { - "columns": [ - { - "text": "Time", - "value": "Time" - }, - { - "text": "Value #A", - "value": "Value #A" - }, - { - "text": "Value #B", - "value": "Value #B" - }, - { - "text": "Value #C", - "value": "Value #C" - }, - { - "text": "Value #D", - "value": "Value #D" - }, - { - "text": "Value #E", - "value": "Value #E" - }, - { - "text": "Value #F", - "value": "Value #F" - }, - { - "text": "Value #G", - "value": "Value #G" - }, + "collapse": false, + "collapsed": false, + "panels": [ { - "text": "Value #H", - "value": "Value #H" - }, - { - "text": "workload", - "value": "workload" - } - ], - "datasource": "$datasource", - "fill": 1, - "fontSize": "90%", - "gridPos": { - "h": 9, - "w": 24, - "x": 0, - "y": 10 - }, - "id": 5, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "minSpan": 24, - "nullPointMode": "null as zero", - "renderer": "flot", - "scroll": true, - "showHeader": true, - "sort": { - "col": 0, - "desc": false - }, - "spaceLength": 10, - "span": 24, - "styles": [ - { - "alias": "Time", - "colorMode": null, - "colors": [ + "aliasColors": { - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Time", - "thresholds": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Pod lifecycle event generator", + "fill": 1, + "fillGradient": 0, + "gridPos": { - ], - "type": "hidden", - "unit": "short" - }, - { - "alias": "Current Bandwidth Received", - "colorMode": null, - "colors": [ + }, + "id": 18, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], - "type": "number", - "unit": "Bps" - }, - { - "alias": "Current Bandwidth Transmitted", - "colorMode": null, - "colors": [ - + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(kubelet_pleg_relist_duration_seconds_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A" + } ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #B", "thresholds": [ ], - "type": "number", - "unit": "Bps" + "timeFrom": null, + "timeShift": null, + "title": "PLEG relist rate", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] }, { - "alias": "Average Bandwidth Received", - "colorMode": null, - "colors": [ + "aliasColors": { - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #C", - "thresholds": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { - ], - "type": "number", - "unit": "Bps" - }, - { - "alias": "Average Bandwidth Transmitted", - "colorMode": null, - "colors": [ + }, + "id": 19, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #D", - "thresholds": [ + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], - "type": "number", - "unit": "Bps" - }, - { - "alias": "Rate of Received Packets", - "colorMode": null, - "colors": [ - + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_interval_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A" + } ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #E", "thresholds": [ ], - "type": "number", - "unit": "pps" - }, - { - "alias": "Rate of Transmitted Packets", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #F", - "thresholds": [ - - ], - "type": "number", - "unit": "pps" - }, - { - "alias": "Rate of Received Packets Dropped", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #G", - "thresholds": [ - - ], - "type": "number", - "unit": "pps" - }, - { - "alias": "Rate of Transmitted Packets Dropped", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #H", - "thresholds": [ - - ], - "type": "number", - "unit": "pps" - }, - { - "alias": "Workload", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": true, - "linkTooltip": "Drill down", - "linkUrl": "d/728bf77cc1166d2f3133bf25846876cc/kubernetes-networking-workload?orgId=1&refresh=30s&var-namespace=$namespace&var-type=$type&var-workload=$__cell", - "pattern": "workload", - "thresholds": [ + "timeFrom": null, + "timeShift": null, + "title": "PLEG relist interval", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ - ], - "type": "number", - "unit": "short" - } - ], - "targets": [ - { - "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "A", - "step": 10 - }, - { - "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "B", - "step": 10 - }, - { - "expr": "sort_desc(avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "C", - "step": 10 - }, - { - "expr": "sort_desc(avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "D", - "step": 10 - }, - { - "expr": "sort_desc(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "E", - "step": 10 - }, - { - "expr": "sort_desc(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "F", - "step": 10 - }, - { - "expr": "sort_desc(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "G", - "step": 10 - }, - { - "expr": "sort_desc(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "H", - "step": 10 + ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] } ], - "timeFrom": null, - "timeShift": null, - "title": "Current Status", - "type": "table" + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" }, { - "collapse": true, - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 19 - }, - "id": 6, + "collapse": false, + "collapsed": false, "panels": [ { "aliasColors": { }, - "bars": true, + "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 2, + "fill": 1, "fillGradient": 0, "gridPos": { - "h": 9, - "w": 12, - "x": 0, - "y": 20 + }, - "id": 7, + "id": 20, "legend": { "alignAsTable": true, "avg": false, "current": true, - "hideEmpty": true, - "hideZero": true, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", - "sortDesc": true, "total": false, "values": true }, - "lines": false, + "lines": true, "linewidth": 1, "links": [ ], - "minSpan": 24, "nullPointMode": "null", - "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, @@ -20971,17 +19832,16 @@ items: ], "spaceLength": 10, - "span": 24, + "span": 12, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sort_desc(avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, le))", "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{ workload }}", - "refId": "A", - "step": 10 + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A" } ], "thresholds": [ @@ -20989,25 +19849,25 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Average Rate of Bytes Received", + "title": "PLEG relist duration", "tooltip": { - "shared": true, - "sort": 2, + "shared": false, + "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, - "mode": "series", + "mode": "time", "name": null, - "show": false, + "show": true, "values": [ - "current" + ] }, "yaxes": [ { - "format": "Bps", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -21015,7 +19875,7 @@ items: "show": true }, { - "format": "Bps", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -21023,48 +19883,52 @@ items: "show": true } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ { "aliasColors": { }, - "bars": true, + "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 2, + "fill": 1, "fillGradient": 0, "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 20 + }, - "id": 8, + "id": 21, "legend": { - "alignAsTable": true, + "alignAsTable": false, "avg": false, - "current": true, - "hideEmpty": true, - "hideZero": true, + "current": false, "max": false, "min": false, - "rightSide": true, + "rightSide": false, "show": true, "sideWidth": null, - "sort": "current", - "sortDesc": true, "total": false, - "values": true + "values": false }, - "lines": false, + "lines": true, "linewidth": 1, "links": [ ], - "minSpan": 24, "nullPointMode": "null", - "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, @@ -21074,17 +19938,37 @@ items: ], "spaceLength": 10, - "span": 24, + "span": 12, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sort_desc(avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"2..\"}[5m]))", "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{ workload }}", - "refId": "A", - "step": 10 + "intervalFactor": 2, + "legendFormat": "2xx", + "refId": "A" + }, + { + "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"3..\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "3xx", + "refId": "B" + }, + { + "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"4..\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "4xx", + "refId": "C" + }, + { + "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"5..\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "5xx", + "refId": "D" } ], "thresholds": [ @@ -21092,25 +19976,25 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Average Rate of Bytes Transmitted", + "title": "RPC Rate", "tooltip": { - "shared": true, - "sort": 2, + "shared": false, + "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, - "mode": "series", + "mode": "time", "name": null, - "show": false, + "show": true, "values": [ - "current" + ] }, "yaxes": [ { - "format": "Bps", + "format": "ops", "label": null, "logBase": 1, "max": null, @@ -21118,7 +20002,7 @@ items: "show": true }, { - "format": "Bps", + "format": "ops", "label": null, "logBase": 1, "max": null, @@ -21131,284 +20015,153 @@ items: "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": true, - "title": "Average Bandwidth", + "showTitle": false, + "title": "Dashboard Row", "titleSize": "h6", "type": "row" }, { "collapse": false, "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 29 - }, - "id": 9, "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 22, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance, verb, url, le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} {{verb}} {{url}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Request duration 99th quantile", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": true, - "title": "Bandwidth HIstory", + "showTitle": false, + "title": "Dashboard Row", "titleSize": "h6", "type": "row" }, { - "aliasColors": { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 9, - "w": 12, - "x": 0, - "y": 38 - }, - "id": 10, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "hideEmpty": true, - "hideZero": true, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { - ], - "minSpan": 12, - "nullPointMode": "connected", - "paceLength": 10, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 12, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{workload}}", - "refId": "A", - "step": 10 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Receive Bandwidth", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - } - ] - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 38 - }, - "id": 11, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "hideEmpty": true, - "hideZero": true, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [ - - ], - "minSpan": 12, - "nullPointMode": "connected", - "paceLength": 10, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 12, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{workload}}", - "refId": "A", - "step": 10 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Transmit Bandwidth", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - } - ] - }, - { - "collapse": true, - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 39 - }, - "id": 12, - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 9, - "w": 12, - "x": 0, - "y": 40 - }, - "id": 13, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "hideEmpty": true, - "hideZero": true, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [ + }, + "id": 23, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], - "minSpan": 12, - "nullPointMode": "connected", - "paceLength": 10, + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, @@ -21418,17 +20171,16 @@ items: ], "spaceLength": 10, - "span": 12, - "stack": true, + "span": 4, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "sort_desc(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "expr": "process_resident_memory_bytes{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}", "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{workload}}", - "refId": "A", - "step": 10 + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A" } ], "thresholds": [ @@ -21436,10 +20188,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Rate of Received Packets", + "title": "Memory", "tooltip": { - "shared": true, - "sort": 2, + "shared": false, + "sort": 0, "value_type": "individual" }, "type": "graph", @@ -21454,19 +20206,19 @@ items: }, "yaxes": [ { - "format": "pps", + "format": "bytes", "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true }, { - "format": "pps", + "format": "bytes", "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true } ] @@ -21479,21 +20231,16 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 2, + "fill": 1, "fillGradient": 0, "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 40 + }, - "id": 14, + "id": 24, "legend": { "alignAsTable": false, "avg": false, "current": false, - "hideEmpty": true, - "hideZero": true, "max": false, "min": false, "rightSide": false, @@ -21503,13 +20250,11 @@ items: "values": false }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [ ], - "minSpan": 12, - "nullPointMode": "connected", - "paceLength": 10, + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, @@ -21519,138 +20264,16 @@ items: ], "spaceLength": 10, - "span": 12, - "stack": true, + "span": 4, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "sort_desc(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])", "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{workload}}", - "refId": "A", - "step": 10 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Rate of Transmitted Packets", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "pps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "pps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Packets", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": true, - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 40 - }, - "id": 15, - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 9, - "w": 12, - "x": 0, - "y": 41 - }, - "id": 16, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "hideEmpty": true, - "hideZero": true, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [ - - ], - "minSpan": 12, - "nullPointMode": "connected", - "paceLength": 10, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 12, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sort_desc(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{workload}}", - "refId": "A", - "step": 10 + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A" } ], "thresholds": [ @@ -21658,10 +20281,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Rate of Received Packets Dropped", + "title": "CPU usage", "tooltip": { - "shared": true, - "sort": 2, + "shared": false, + "sort": 0, "value_type": "individual" }, "type": "graph", @@ -21676,7 +20299,7 @@ items: }, "yaxes": [ { - "format": "pps", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -21684,7 +20307,7 @@ items: "show": true }, { - "format": "pps", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -21701,21 +20324,16 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 2, + "fill": 1, "fillGradient": 0, "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 41 + }, - "id": 17, + "id": 25, "legend": { "alignAsTable": false, "avg": false, "current": false, - "hideEmpty": true, - "hideZero": true, "max": false, "min": false, "rightSide": false, @@ -21725,13 +20343,11 @@ items: "values": false }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [ ], - "minSpan": 12, - "nullPointMode": "connected", - "paceLength": 10, + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, @@ -21741,17 +20357,16 @@ items: ], "spaceLength": 10, - "span": 12, - "stack": true, + "span": 4, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "sort_desc(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "expr": "go_goroutines{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}", "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{workload}}", - "refId": "A", - "step": 10 + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A" } ], "thresholds": [ @@ -21759,10 +20374,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Rate of Transmitted Packets Dropped", + "title": "Goroutines", "tooltip": { - "shared": true, - "sort": 2, + "shared": false, + "sort": 0, "value_type": "individual" }, "type": "graph", @@ -21777,19 +20392,19 @@ items: }, "yaxes": [ { - "format": "pps", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true }, { - "format": "pps", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true } ] @@ -21798,17 +20413,13 @@ items: "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": true, - "title": "Errors", + "showTitle": false, + "title": "Dashboard Row", "titleSize": "h6", "type": "row" } ], - "refresh": "10s", - "rows": [ - - ], - "schemaVersion": 18, + "schemaVersion": 14, "style": "dark", "tags": [ "kubernetes-mixin" @@ -21839,7 +20450,7 @@ items: "datasource": "$datasource", "hide": 2, "includeAll": false, - "label": null, + "label": "cluster", "multi": false, "name": "cluster", "options": [ @@ -21848,38 +20459,6 @@ items: "query": "label_values(kube_pod_info, cluster)", "refresh": 2, "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "text": "kube-system", - "value": "kube-system" - }, - "datasource": "$datasource", - "definition": "label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, namespace)", - "hide": 0, - "includeAll": false, - "label": null, - "multi": false, - "name": "namespace", - "options": [ - - ], - "query": "label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, namespace)", - "refresh": 2, - "regex": "", - "skipUrlSync": false, "sort": 1, "tagValuesQuery": "", "tags": [ @@ -21891,28 +20470,22 @@ items: }, { "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", "current": { - "text": "deployment", - "value": "deployment" + }, "datasource": "$datasource", - "definition": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\".+\"}, workload_type)", "hide": 0, - "includeAll": false, + "includeAll": true, "label": null, "multi": false, - "name": "type", + "name": "instance", "options": [ ], - "query": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\".+\"}, workload_type)", + "query": "label_values(kubelet_runtime_operations_total{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\"}, instance)", "refresh": 2, "regex": "", - "skipUrlSync": false, - "sort": 0, + "sort": 1, "tagValuesQuery": "", "tags": [ @@ -21920,86 +20493,6 @@ items: "tagsQuery": "", "type": "query", "useTags": false - }, - { - "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "text": "5m", - "value": "5m" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": null, - "multi": false, - "name": "resolution", - "options": [ - { - "selected": false, - "text": "30s", - "value": "30s" - }, - { - "selected": true, - "text": "5m", - "value": "5m" - }, - { - "selected": false, - "text": "1h", - "value": "1h" - } - ], - "query": "30s,5m,1h", - "refresh": 2, - "regex": "", - "skipUrlSync": false, - "sort": 1, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "interval", - "useTags": false - }, - { - "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "text": "5m", - "value": "5m" - }, - "datasource": "$datasource", - "hide": 2, - "includeAll": false, - "label": null, - "multi": false, - "name": "interval", - "options": [ - { - "selected": true, - "text": "4h", - "value": "4h" - } - ], - "query": "4h", - "refresh": 2, - "regex": "", - "skipUrlSync": false, - "sort": 1, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "interval", - "useTags": false } ] }, @@ -22033,8 +20526,8 @@ items: ] }, "timezone": "UTC", - "title": "Kubernetes / Networking / Namespace (Workload)", - "uid": "bbb2a765a623ae38130206c7d94a160f", + "title": "Kubernetes / Kubelet", + "uid": "3138fa155d5915769fbded898ac09fd9", "version": 0 } kind: ConfigMap @@ -22044,657 +20537,2121 @@ items: app.kubernetes.io/name: grafana app.kubernetes.io/part-of: kube-prometheus app.kubernetes.io/version: 7.5.4 - name: grafana-dashboard-namespace-by-workload + name: grafana-dashboard-kubelet namespace: monitoring - apiVersion: v1 data: - node-cluster-rsrc-use.json: |- + namespace-by-pod.json: |- { + "__inputs": [ + + ], + "__requires": [ + + ], "annotations": { "list": [ - + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } ] }, "editable": true, "gnetId": null, "graphTooltip": 0, "hideControls": false, + "id": null, "links": [ ], - "refresh": "10s", - "rows": [ + "panels": [ { "collapse": false, - "height": "250px", + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 2, "panels": [ + + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Current Bandwidth", + "titleSize": "h6", + "type": "row" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "decimals": 0, + "format": "time_series", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 1 + }, + "height": 9, + "id": 3, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 12, + "nullPointMode": "connected", + "nullText": null, + "options": { + "fieldOptions": { + "calcs": [ + "last" + ], + "defaults": { + "max": 10000000000, + "min": 0, + "title": "$namespace", + "unit": "Bps" + }, + "mappings": [ + + ], + "override": { + + }, + "thresholds": [ + { + "color": "dark-green", + "index": 0, + "value": null + }, + { + "color": "dark-yellow", + "index": 1, + "value": 5000000000 + }, + { + "color": "dark-red", + "index": 2, + "value": 7000000000 + } + ], + "values": false + } + }, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 12, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution]))", + "format": "time_series", + "instant": null, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "timeFrom": null, + "timeShift": null, + "title": "Current Rate of Bytes Received", + "type": "gauge", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "decimals": 0, + "format": "time_series", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 1 + }, + "height": 9, + "id": 4, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 12, + "nullPointMode": "connected", + "nullText": null, + "options": { + "fieldOptions": { + "calcs": [ + "last" + ], + "defaults": { + "max": 10000000000, + "min": 0, + "title": "$namespace", + "unit": "Bps" + }, + "mappings": [ + + ], + "override": { + + }, + "thresholds": [ + { + "color": "dark-green", + "index": 0, + "value": null + }, + { + "color": "dark-yellow", + "index": 1, + "value": 5000000000 + }, + { + "color": "dark-red", + "index": 2, + "value": 7000000000 + } + ], + "values": false + } + }, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 12, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution]))", + "format": "time_series", + "instant": null, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "timeFrom": null, + "timeShift": null, + "title": "Current Rate of Bytes Transmitted", + "type": "gauge", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "columns": [ + { + "text": "Time", + "value": "Time" + }, + { + "text": "Value #A", + "value": "Value #A" + }, + { + "text": "Value #B", + "value": "Value #B" + }, + { + "text": "Value #C", + "value": "Value #C" + }, + { + "text": "Value #D", + "value": "Value #D" + }, + { + "text": "Value #E", + "value": "Value #E" + }, + { + "text": "Value #F", + "value": "Value #F" + }, + { + "text": "pod", + "value": "pod" + } + ], + "datasource": "$datasource", + "fill": 1, + "fontSize": "100%", + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 10 + }, + "id": 5, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "minSpan": 24, + "nullPointMode": "null as zero", + "renderer": "flot", + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": false + }, + "spaceLength": 10, + "span": 24, + "styles": [ + { + "alias": "Time", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Time", + "thresholds": [ + + ], + "type": "hidden", + "unit": "short" + }, + { + "alias": "Bandwidth Received", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Bandwidth Transmitted", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Rate of Received Packets", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Rate of Transmitted Packets", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Rate of Received Packets Dropped", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Rate of Transmitted Packets Dropped", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Pod", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTooltip": "Drill down", + "linkUrl": "d/7a18067ce943a40ae25454675c19ff5c/kubernetes-networking-pod?orgId=1&refresh=30s&var-namespace=$namespace&var-pod=$__cell", + "pattern": "pod", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + }, + { + "expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Current Status", + "type": "table" + }, + { + "collapse": false, + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 19 + }, + "id": 6, + "panels": [ + + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Bandwidth", + "titleSize": "h6", + "type": "row" + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 20 + }, + "id": 7, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [ + + ], + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{pod}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Receive Bandwidth", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 20 + }, + "id": 8, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [ + + ], + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{pod}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Transmit Bandwidth", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "collapse": true, + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 29 + }, + "id": 9, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 30 + }, + "id": 10, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [ + + ], + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{pod}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Received Packets", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 30 + }, + "id": 11, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [ + + ], + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{pod}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Transmitted Packets", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Packets", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": true, + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 30 + }, + "id": 12, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 40 + }, + "id": 13, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [ + + ], + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{pod}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Received Packets Dropped", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 40 + }, + "id": 14, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [ + + ], + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{pod}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Transmitted Packets Dropped", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Errors", + "titleSize": "h6", + "type": "row" + } + ], + "refresh": "10s", + "rows": [ + + ], + "schemaVersion": 18, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(kube_pod_info, cluster)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "kube-system", + "value": "kube-system" + }, + "datasource": "$datasource", + "definition": "label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, namespace)", + "hide": 0, + "includeAll": true, + "label": null, + "multi": false, + "name": "namespace", + "options": [ + + ], + "query": "label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, namespace)", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "5m", + "value": "5m" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "resolution", + "options": [ + { + "selected": false, + "text": "30s", + "value": "30s" + }, + { + "selected": true, + "text": "5m", + "value": "5m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + } + ], + "query": "30s,5m,1h", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "interval", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "5m", + "value": "5m" + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "interval", + "options": [ + { + "selected": true, + "text": "4h", + "value": "4h" + } + ], + "query": "4h", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "interval", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "UTC", + "title": "Kubernetes / Networking / Namespace (Pods)", + "uid": "8b7a8b326d7a6f1f04244066368c67af", + "version": 0 + } + kind: ConfigMap + metadata: + labels: + app.kubernetes.io/component: grafana + app.kubernetes.io/name: grafana + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 7.5.4 + name: grafana-dashboard-namespace-by-pod + namespace: monitoring +- apiVersion: v1 + data: + namespace-by-workload.json: |- + { + "__inputs": [ + + ], + "__requires": [ + + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ + + ], + "panels": [ + { + "collapse": false, + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 2, + "panels": [ + + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Current Bandwidth", + "titleSize": "h6", + "type": "row" + }, + { + "aliasColors": { + + }, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 1 + }, + "id": 3, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": false, + "linewidth": 1, + "links": [ + + ], + "minSpan": 24, + "nullPointMode": "null", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 24, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ workload }}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Current Rate of Bytes Received", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "series", + "name": null, + "show": false, + "values": [ + "current" + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 1 + }, + "id": 4, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": false, + "linewidth": 1, + "links": [ + + ], + "minSpan": 24, + "nullPointMode": "null", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 24, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ workload }}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Current Rate of Bytes Transmitted", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "series", + "name": null, + "show": false, + "values": [ + "current" + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "columns": [ + { + "text": "Time", + "value": "Time" + }, + { + "text": "Value #A", + "value": "Value #A" + }, + { + "text": "Value #B", + "value": "Value #B" + }, + { + "text": "Value #C", + "value": "Value #C" + }, + { + "text": "Value #D", + "value": "Value #D" + }, + { + "text": "Value #E", + "value": "Value #E" + }, + { + "text": "Value #F", + "value": "Value #F" + }, { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 10, - "id": 1, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 0, - "links": [ - - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 6, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "(\n instance:node_cpu_utilisation:rate5m{job=\"node-exporter\"}\n*\n instance:node_num_cpu:sum{job=\"node-exporter\"}\n)\n/ scalar(sum(instance:node_num_cpu:sum{job=\"node-exporter\"}))\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "legendLink": "/dashboard/file/node-rsrc-use.json", - "step": 10 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "CPU Utilisation", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "percentunit", - "label": null, - "logBase": 1, - "max": 1, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] + "text": "Value #G", + "value": "Value #G" }, { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 10, - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 0, - "links": [ + "text": "Value #H", + "value": "Value #H" + }, + { + "text": "workload", + "value": "workload" + } + ], + "datasource": "$datasource", + "fill": 1, + "fontSize": "90%", + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 10 + }, + "id": 5, + "lines": true, + "linewidth": 1, + "links": [ - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ + ], + "minSpan": 24, + "nullPointMode": "null as zero", + "renderer": "flot", + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": false + }, + "spaceLength": 10, + "span": 24, + "styles": [ + { + "alias": "Time", + "colorMode": null, + "colors": [ ], - "spaceLength": 10, - "span": 6, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "instance:node_load1_per_cpu:ratio{job=\"node-exporter\"}\n/ scalar(count(instance:node_load1_per_cpu:ratio{job=\"node-exporter\"}))\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "legendLink": "/dashboard/file/node-rsrc-use.json", - "step": 10 - } - ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Time", "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "CPU Saturation (load1 per CPU)", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "percentunit", - "label": null, - "logBase": 1, - "max": 1, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "CPU", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ + "type": "hidden", + "unit": "short" + }, { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 10, - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 0, - "links": [ + "alias": "Current Bandwidth Received", + "colorMode": null, + "colors": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ ], - "spaceLength": 10, - "span": 6, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "instance:node_memory_utilisation:ratio{job=\"node-exporter\"}\n/ scalar(count(instance:node_memory_utilisation:ratio{job=\"node-exporter\"}))\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "legendLink": "/dashboard/file/node-rsrc-use.json", - "step": 10 - } + "type": "number", + "unit": "Bps" + }, + { + "alias": "Current Bandwidth Transmitted", + "colorMode": null, + "colors": [ + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Memory Utilisation", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "percentunit", - "label": null, - "logBase": 1, - "max": 1, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] + "type": "number", + "unit": "Bps" }, { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 10, - "id": 4, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 0, - "links": [ + "alias": "Average Bandwidth Received", + "colorMode": null, + "colors": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ ], - "spaceLength": 10, - "span": 6, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "instance:node_vmstat_pgmajfault:rate5m{job=\"node-exporter\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "legendLink": "/dashboard/file/node-rsrc-use.json", - "step": 10 - } + "type": "number", + "unit": "Bps" + }, + { + "alias": "Average Bandwidth Transmitted", + "colorMode": null, + "colors": [ + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Memory Saturation (Major Page Faults)", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "rps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Memory", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ + "type": "number", + "unit": "Bps" + }, { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 10, - "id": 5, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 0, - "links": [ + "alias": "Rate of Received Packets", + "colorMode": null, + "colors": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/ Receive/", - "stack": "A" - }, - { - "alias": "/ Transmit/", - "stack": "B", - "transform": "negative-Y" - } + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + ], - "spaceLength": 10, - "span": 6, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "instance:node_network_receive_bytes_excluding_lo:rate5m{job=\"node-exporter\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} Receive", - "legendLink": "/dashboard/file/node-rsrc-use.json", - "step": 10 - }, - { - "expr": "instance:node_network_transmit_bytes_excluding_lo:rate5m{job=\"node-exporter\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} Transmit", - "legendLink": "/dashboard/file/node-rsrc-use.json", - "step": 10 - } + "type": "number", + "unit": "pps" + }, + { + "alias": "Rate of Transmitted Packets", + "colorMode": null, + "colors": [ + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Net Utilisation (Bytes Receive/Transmit)", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] + "type": "number", + "unit": "pps" }, { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 10, - "id": 6, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 0, - "links": [ + "alias": "Rate of Received Packets Dropped", + "colorMode": null, + "colors": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/ Receive/", - "stack": "A" - }, - { - "alias": "/ Transmit/", - "stack": "B", - "transform": "negative-Y" - } + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #G", + "thresholds": [ + ], - "spaceLength": 10, - "span": 6, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "instance:node_network_receive_drop_excluding_lo:rate5m{job=\"node-exporter\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} Receive", - "legendLink": "/dashboard/file/node-rsrc-use.json", - "step": 10 - }, - { - "expr": "instance:node_network_transmit_drop_excluding_lo:rate5m{job=\"node-exporter\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} Transmit", - "legendLink": "/dashboard/file/node-rsrc-use.json", - "step": 10 - } + "type": "number", + "unit": "pps" + }, + { + "alias": "Rate of Transmitted Packets Dropped", + "colorMode": null, + "colors": [ + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #H", "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Net Saturation (Drops Receive/Transmit)", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + "type": "number", + "unit": "pps" + }, + { + "alias": "Workload", + "colorMode": null, + "colors": [ - ] - }, - "yaxes": [ - { - "format": "rps", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTooltip": "Drill down", + "linkUrl": "d/728bf77cc1166d2f3133bf25846876cc/kubernetes-networking-workload?orgId=1&refresh=30s&var-namespace=$namespace&var-type=$type&var-workload=$__cell", + "pattern": "workload", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sort_desc(avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sort_desc(avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sort_desc(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + }, + { + "expr": "sort_desc(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 + }, + { + "expr": "sort_desc(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "G", + "step": 10 + }, + { + "expr": "sort_desc(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "H", + "step": 10 } ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Network", - "titleSize": "h6" + "timeFrom": null, + "timeShift": null, + "title": "Current Status", + "type": "table" }, { - "collapse": false, - "height": "250px", + "collapse": true, + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 19 + }, + "id": 6, "panels": [ { "aliasColors": { }, - "bars": false, + "bars": true, "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 10, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 20 + }, "id": 7, "legend": { + "alignAsTable": true, "avg": false, - "current": false, + "current": true, + "hideEmpty": true, + "hideZero": true, "max": false, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, "total": false, - "values": false + "values": true }, - "lines": true, - "linewidth": 0, + "lines": false, + "linewidth": 1, "links": [ ], - "nullPointMode": "null as zero", + "minSpan": 24, + "nullPointMode": "null", + "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 6, - "stack": true, + "span": 24, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "instance_device:node_disk_io_time_seconds:rate5m{job=\"node-exporter\"}\n/ scalar(count(instance_device:node_disk_io_time_seconds:rate5m{job=\"node-exporter\"}))\n", + "expr": "sort_desc(avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} {{device}}", - "legendLink": "/dashboard/file/node-rsrc-use.json", + "intervalFactor": 1, + "legendFormat": "{{ workload }}", + "refId": "A", "step": 10 } ], @@ -22703,38 +22660,38 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Disk IO Utilisation", + "title": "Average Rate of Bytes Received", "tooltip": { - "shared": false, - "sort": 0, + "shared": true, + "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, - "mode": "time", + "mode": "series", "name": null, - "show": true, + "show": false, "values": [ - + "current" ] }, "yaxes": [ { - "format": "percentunit", + "format": "Bps", "label": null, "logBase": 1, - "max": 1, + "max": null, "min": 0, "show": true }, { - "format": "short", + "format": "Bps", "label": null, "logBase": 1, "max": null, - "min": null, - "show": false + "min": 0, + "show": true } ] }, @@ -22742,45 +22699,62 @@ items: "aliasColors": { }, - "bars": false, + "bars": true, "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 10, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 20 + }, "id": 8, "legend": { + "alignAsTable": true, "avg": false, - "current": false, + "current": true, + "hideEmpty": true, + "hideZero": true, "max": false, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, "total": false, - "values": false + "values": true }, - "lines": true, - "linewidth": 0, + "lines": false, + "linewidth": 1, "links": [ ], - "nullPointMode": "null as zero", + "minSpan": 24, + "nullPointMode": "null", + "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 6, - "stack": true, + "span": 24, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "instance_device:node_disk_io_time_weighted_seconds:rate5m{job=\"node-exporter\"}\n/ scalar(count(instance_device:node_disk_io_time_weighted_seconds:rate5m{job=\"node-exporter\"}))\n", + "expr": "sort_desc(avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} {{device}}", - "legendLink": "/dashboard/file/node-rsrc-use.json", + "intervalFactor": 1, + "legendFormat": "{{ workload }}", + "refId": "A", "step": 10 } ], @@ -22789,38 +22763,38 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Disk IO Saturation", + "title": "Average Rate of Bytes Transmitted", "tooltip": { - "shared": false, - "sort": 0, + "shared": true, + "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, - "mode": "time", + "mode": "series", "name": null, - "show": true, + "show": false, "values": [ - + "current" ] }, "yaxes": [ { - "format": "percentunit", + "format": "Bps", "label": null, "logBase": 1, - "max": 1, + "max": null, "min": 0, "show": true }, { - "format": "short", + "format": "Bps", "label": null, "logBase": 1, "max": null, - "min": null, - "show": false + "min": 0, + "show": true } ] } @@ -22829,197 +22803,243 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Disk IO", - "titleSize": "h6" + "title": "Average Bandwidth", + "titleSize": "h6", + "type": "row" }, { "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 10, - "id": 9, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 0, - "links": [ - - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 12, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum without (device) (\n max without (fstype, mountpoint) (\n node_filesystem_size_bytes{job=\"node-exporter\", fstype!=\"\"} - node_filesystem_avail_bytes{job=\"node-exporter\", fstype!=\"\"}\n )\n) \n/ scalar(sum(max without (fstype, mountpoint) (node_filesystem_size_bytes{job=\"node-exporter\", fstype!=\"\"})))\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "legendLink": "/dashboard/file/node-rsrc-use.json", - "step": 10 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Disk Space Utilisation", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "percentunit", - "label": null, - "logBase": 1, - "max": 1, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 29 + }, + "id": 9, + "panels": [ + ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Disk Space", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ + "title": "Bandwidth HIstory", + "titleSize": "h6", + "type": "row" + }, + { + "aliasColors": { - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 38 + }, + "id": 10, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [ + + ], + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{workload}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Receive Bandwidth", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true }, - "hide": 0, - "label": null, - "name": "datasource", - "options": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { - ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "USE Method / Cluster", - "uid": "", - "version": 0 - } - kind: ConfigMap - metadata: - labels: - app.kubernetes.io/component: grafana - app.kubernetes.io/name: grafana - app.kubernetes.io/part-of: kube-prometheus - app.kubernetes.io/version: 7.5.4 - name: grafana-dashboard-node-cluster-rsrc-use - namespace: monitoring -- apiVersion: v1 - data: - node-rsrc-use.json: |- - { - "annotations": { - "list": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 38 + }, + "id": 11, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [ - ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "links": [ + ], + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{workload}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Transmit Bandwidth", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ - ], - "refresh": "10s", - "rows": [ + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, { - "collapse": false, - "height": "250px", + "collapse": true, + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 39 + }, + "id": 12, "panels": [ { "aliasColors": { @@ -23029,41 +23049,56 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "id": 1, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 40 + }, + "id": 13, "legend": { + "alignAsTable": false, "avg": false, "current": false, + "hideEmpty": true, + "hideZero": true, "max": false, "min": false, - "show": false, + "rightSide": false, + "show": true, + "sideWidth": null, "total": false, "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 2, "links": [ ], - "nullPointMode": "null as zero", + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 6, - "stack": false, + "span": 12, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "instance:node_cpu_utilisation:rate5m{job=\"node-exporter\", instance=\"$instance\"}", + "expr": "sort_desc(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Utilisation", - "legendLink": null, + "intervalFactor": 1, + "legendFormat": "{{workload}}", + "refId": "A", "step": 10 } ], @@ -23072,10 +23107,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "CPU Utilisation", + "title": "Rate of Received Packets", "tooltip": { - "shared": false, - "sort": 0, + "shared": true, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -23090,7 +23125,7 @@ items: }, "yaxes": [ { - "format": "percentunit", + "format": "pps", "label": null, "logBase": 1, "max": null, @@ -23098,12 +23133,12 @@ items: "show": true }, { - "format": "short", + "format": "pps", "label": null, "logBase": 1, "max": null, - "min": null, - "show": false + "min": 0, + "show": true } ] }, @@ -23115,41 +23150,56 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "id": 2, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 40 + }, + "id": 14, "legend": { + "alignAsTable": false, "avg": false, "current": false, + "hideEmpty": true, + "hideZero": true, "max": false, "min": false, - "show": false, + "rightSide": false, + "show": true, + "sideWidth": null, "total": false, "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 2, "links": [ ], - "nullPointMode": "null as zero", + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 6, - "stack": false, + "span": 12, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "instance:node_load1_per_cpu:ratio{job=\"node-exporter\", instance=\"$instance\"}", + "expr": "sort_desc(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Saturation", - "legendLink": null, + "intervalFactor": 1, + "legendFormat": "{{workload}}", + "refId": "A", "step": 10 } ], @@ -23158,10 +23208,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "CPU Saturation (Load1 per CPU)", + "title": "Rate of Transmitted Packets", "tooltip": { - "shared": false, - "sort": 0, + "shared": true, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -23176,7 +23226,7 @@ items: }, "yaxes": [ { - "format": "percentunit", + "format": "pps", "label": null, "logBase": 1, "max": null, @@ -23184,12 +23234,12 @@ items: "show": true }, { - "format": "short", + "format": "pps", "label": null, "logBase": 1, "max": null, - "min": null, - "show": false + "min": 0, + "show": true } ] } @@ -23198,12 +23248,20 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "CPU", - "titleSize": "h6" + "title": "Packets", + "titleSize": "h6", + "type": "row" }, { - "collapse": false, - "height": "250px", + "collapse": true, + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 40 + }, + "id": 15, "panels": [ { "aliasColors": { @@ -23213,127 +23271,56 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "instance:node_memory_utilisation:ratio{job=\"node-exporter\", job=\"node-exporter\", instance=\"$instance\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Memory", - "legendLink": null, - "step": 10 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Memory Utilisation", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "percentunit", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 4, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 41 + }, + "id": 16, "legend": { + "alignAsTable": false, "avg": false, "current": false, + "hideEmpty": true, + "hideZero": true, "max": false, "min": false, - "show": false, + "rightSide": false, + "show": true, + "sideWidth": null, "total": false, "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 2, "links": [ ], - "nullPointMode": "null as zero", + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 6, - "stack": false, + "span": 12, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "instance:node_vmstat_pgmajfault:rate5m{job=\"node-exporter\", instance=\"$instance\"}", + "expr": "sort_desc(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Major page faults", - "legendLink": null, + "intervalFactor": 1, + "legendFormat": "{{workload}}", + "refId": "A", "step": 10 } ], @@ -23342,10 +23329,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Memory Saturation (Major Page Faults)", + "title": "Rate of Received Packets Dropped", "tooltip": { - "shared": false, - "sort": 0, + "shared": true, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -23360,7 +23347,7 @@ items: }, "yaxes": [ { - "format": "short", + "format": "pps", "label": null, "logBase": 1, "max": null, @@ -23368,27 +23355,15 @@ items: "show": true }, { - "format": "short", + "format": "pps", "label": null, "logBase": 1, "max": null, - "min": null, - "show": false + "min": 0, + "show": true } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Memory", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ + }, { "aliasColors": { @@ -23397,57 +23372,56 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "id": 5, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 41 + }, + "id": 17, "legend": { + "alignAsTable": false, "avg": false, "current": false, + "hideEmpty": true, + "hideZero": true, "max": false, "min": false, + "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 2, "links": [ ], - "nullPointMode": "null as zero", + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, "seriesOverrides": [ - { - "alias": "/Receive/", - "stack": "A" - }, - { - "alias": "/Transmit/", - "stack": "B", - "transform": "negative-Y" - } + ], "spaceLength": 10, - "span": 6, - "stack": false, + "span": 12, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "instance:node_network_receive_bytes_excluding_lo:rate5m{job=\"node-exporter\", instance=\"$instance\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Receive", - "legendLink": null, - "step": 10 - }, - { - "expr": "instance:node_network_transmit_bytes_excluding_lo:rate5m{job=\"node-exporter\", instance=\"$instance\"}", + "expr": "sort_desc(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Transmit", - "legendLink": null, + "intervalFactor": 1, + "legendFormat": "{{workload}}", + "refId": "A", "step": 10 } ], @@ -23456,10 +23430,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Net Utilisation (Bytes Receive/Transmit)", + "title": "Rate of Transmitted Packets Dropped", "tooltip": { - "shared": false, - "sort": 0, + "shared": true, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -23474,223 +23448,297 @@ items: }, "yaxes": [ { - "format": "Bps", + "format": "pps", "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true }, { - "format": "short", + "format": "pps", "label": null, "logBase": 1, "max": null, - "min": null, - "show": false + "min": 0, + "show": true } ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Errors", + "titleSize": "h6", + "type": "row" + } + ], + "refresh": "10s", + "rows": [ + + ], + "schemaVersion": 18, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" }, - { - "aliasColors": { + "hide": 0, + "label": null, + "name": "datasource", + "options": [ - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 6, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/Receive/", - "stack": "A" - }, - { - "alias": "/Transmit/", - "stack": "B", - "transform": "negative-Y" - } - ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "instance:node_network_receive_drop_excluding_lo:rate5m{job=\"node-exporter\", instance=\"$instance\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Receive drops", - "legendLink": null, - "step": 10 - }, - { - "expr": "instance:node_network_transmit_drop_excluding_lo:rate5m{job=\"node-exporter\", instance=\"$instance\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Transmit drops", - "legendLink": null, - "step": 10 - } - ], - "thresholds": [ + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(kube_pod_info, cluster)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "kube-system", + "value": "kube-system" + }, + "datasource": "$datasource", + "definition": "label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, namespace)", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "namespace", + "options": [ - ], - "timeFrom": null, - "timeShift": null, - "title": "Net Saturation (Drops Receive/Transmit)", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + ], + "query": "label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, namespace)", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [ - ] - }, - "yaxes": [ - { - "format": "rps", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Net", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "deployment", + "value": "deployment" + }, + "datasource": "$datasource", + "definition": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\".+\"}, workload_type)", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "type", + "options": [ + + ], + "query": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\".+\"}, workload_type)", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [ + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "5m", + "value": "5m" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "resolution", + "options": [ + { + "selected": false, + "text": "30s", + "value": "30s" }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 7, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false + { + "selected": true, + "text": "5m", + "value": "5m" }, - "lines": true, - "linewidth": 1, - "links": [ + { + "selected": false, + "text": "1h", + "value": "1h" + } + ], + "query": "30s,5m,1h", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [ - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ + ], + "tagsQuery": "", + "type": "interval", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "5m", + "value": "5m" + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "interval", + "options": [ + { + "selected": true, + "text": "4h", + "value": "4h" + } + ], + "query": "4h", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [ - ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "instance_device:node_disk_io_time_seconds:rate5m{job=\"node-exporter\", instance=\"$instance\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{device}}", - "legendLink": null, - "step": 10 - } - ], - "thresholds": [ + ], + "tagsQuery": "", + "type": "interval", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "UTC", + "title": "Kubernetes / Networking / Namespace (Workload)", + "uid": "bbb2a765a623ae38130206c7d94a160f", + "version": 0 + } + kind: ConfigMap + metadata: + labels: + app.kubernetes.io/component: grafana + app.kubernetes.io/name: grafana + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 7.5.4 + name: grafana-dashboard-namespace-by-workload + namespace: monitoring +- apiVersion: v1 + data: + node-cluster-rsrc-use.json: |- + { + "annotations": { + "list": [ - ], - "timeFrom": null, - "timeShift": null, - "title": "Disk IO Utilisation", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ - ] - }, - "yaxes": [ - { - "format": "percentunit", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ { "aliasColors": { @@ -23699,8 +23747,8 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "id": 8, + "fill": 10, + "id": 1, "legend": { "avg": false, "current": false, @@ -23711,7 +23759,7 @@ items: "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], @@ -23725,15 +23773,15 @@ items: ], "spaceLength": 10, "span": 6, - "stack": false, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "instance_device:node_disk_io_time_weighted_seconds:rate5m{job=\"node-exporter\", instance=\"$instance\"}", + "expr": "(\n instance:node_cpu_utilisation:rate5m{job=\"node-exporter\"}\n*\n instance:node_num_cpu:sum{job=\"node-exporter\"}\n)\n/ scalar(sum(instance:node_num_cpu:sum{job=\"node-exporter\"}))\n", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{device}}", - "legendLink": null, + "legendFormat": "{{instance}}", + "legendLink": "/dashboard/file/node-rsrc-use.json", "step": 10 } ], @@ -23742,7 +23790,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Disk IO Saturation", + "title": "CPU Utilisation", "tooltip": { "shared": false, "sort": 0, @@ -23763,7 +23811,7 @@ items: "format": "percentunit", "label": null, "logBase": 1, - "max": null, + "max": 1, "min": 0, "show": true }, @@ -23776,19 +23824,7 @@ items: "show": false } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Disk IO", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ + }, { "aliasColors": { @@ -23797,19 +23833,19 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "id": 9, + "fill": 10, + "id": 2, "legend": { "avg": false, "current": false, "max": false, "min": false, - "show": false, + "show": true, "total": false, "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], @@ -23822,16 +23858,16 @@ items: ], "spaceLength": 10, - "span": 12, - "stack": false, + "span": 6, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "1 -\n(\n max without (mountpoint, fstype) (node_filesystem_avail_bytes{job=\"node-exporter\", fstype!=\"\", instance=\"$instance\"})\n/\n max without (mountpoint, fstype) (node_filesystem_size_bytes{job=\"node-exporter\", fstype!=\"\", instance=\"$instance\"})\n)\n", + "expr": "instance:node_load1_per_cpu:ratio{job=\"node-exporter\"}\n/ scalar(count(instance:node_load1_per_cpu:ratio{job=\"node-exporter\"}))\n", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{device}}", - "legendLink": null, + "legendFormat": "{{instance}}", + "legendLink": "/dashboard/file/node-rsrc-use.json", "step": 10 } ], @@ -23840,7 +23876,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Disk Space Utilisation", + "title": "CPU Saturation (load1 per CPU)", "tooltip": { "shared": false, "sort": 0, @@ -23861,7 +23897,7 @@ items: "format": "percentunit", "label": null, "logBase": 1, - "max": null, + "max": 1, "min": 0, "show": true }, @@ -23880,171 +23916,42 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Disk Space", + "title": "CPU", "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": null, - "name": "datasource", - "options": [ - - ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "instance", - "multi": false, - "name": "instance", - "options": [ - - ], - "query": "label_values(up{job=\"node-exporter\"}, instance)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "USE Method / Node", - "uid": "", - "version": 0 - } - kind: ConfigMap - metadata: - labels: - app.kubernetes.io/component: grafana - app.kubernetes.io/name: grafana - app.kubernetes.io/part-of: kube-prometheus - app.kubernetes.io/version: 7.5.4 - name: grafana-dashboard-node-rsrc-use - namespace: monitoring -- apiVersion: v1 - data: - nodes.json: |- - { - "__inputs": [ - - ], - "__requires": [ - - ], - "annotations": { - "list": [ - - ] - }, - "editable": false, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "id": null, - "links": [ - - ], - "refresh": "", - "rows": [ + }, { "collapse": false, - "collapsed": false, + "height": "250px", "panels": [ { "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - - }, - "id": 2, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 3, "legend": { - "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, - "rightSide": false, "show": true, - "sideWidth": null, "total": false, "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], - "nullPointMode": "null", + "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [ ], @@ -24054,12 +23961,12 @@ items: "steppedLine": false, "targets": [ { - "expr": "(\n (1 - rate(node_cpu_seconds_total{job=\"node-exporter\", mode=\"idle\", instance=\"$instance\"}[$__interval]))\n/ ignoring(cpu) group_left\n count without (cpu)( node_cpu_seconds_total{job=\"node-exporter\", mode=\"idle\", instance=\"$instance\"})\n)\n", + "expr": "instance:node_memory_utilisation:ratio{job=\"node-exporter\"}\n/ scalar(count(instance:node_memory_utilisation:ratio{job=\"node-exporter\"}))\n", "format": "time_series", - "interval": "$__rate_interval", - "intervalFactor": 5, - "legendFormat": "{{cpu}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "legendLink": "/dashboard/file/node-rsrc-use.json", + "step": 10 } ], "thresholds": [ @@ -24067,9 +23974,9 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "CPU Usage", + "title": "Memory Utilisation", "tooltip": { - "shared": true, + "shared": false, "sort": 0, "value_type": "individual" }, @@ -24093,12 +24000,12 @@ items: "show": true }, { - "format": "percentunit", + "format": "short", "label": null, "logBase": 1, - "max": 1, - "min": 0, - "show": true + "max": null, + "min": null, + "show": false } ] }, @@ -24110,70 +24017,42 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 0, - "fillGradient": 0, - "gridPos": { - - }, - "id": 3, + "fill": 10, + "id": 4, "legend": { - "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, - "rightSide": false, "show": true, - "sideWidth": null, "total": false, "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], - "nullPointMode": "null", + "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 6, - "stack": false, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "node_load1{job=\"node-exporter\", instance=\"$instance\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "1m load average", - "refId": "A" - }, - { - "expr": "node_load5{job=\"node-exporter\", instance=\"$instance\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "5m load average", - "refId": "B" - }, - { - "expr": "node_load15{job=\"node-exporter\", instance=\"$instance\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "15m load average", - "refId": "C" - }, - { - "expr": "count(node_cpu_seconds_total{job=\"node-exporter\", instance=\"$instance\", mode=\"idle\"})", + "expr": "instance:node_vmstat_pgmajfault:rate5m{job=\"node-exporter\"}", "format": "time_series", "intervalFactor": 2, - "legendFormat": "logical cores", - "refId": "D" + "legendFormat": "{{instance}}", + "legendLink": "/dashboard/file/node-rsrc-use.json", + "step": 10 } ], "thresholds": [ @@ -24181,9 +24060,9 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Load Average", + "title": "Memory Saturation (Major Page Faults)", "tooltip": { - "shared": true, + "shared": false, "sort": 0, "value_type": "individual" }, @@ -24199,7 +24078,7 @@ items: }, "yaxes": [ { - "format": "short", + "format": "rps", "label": null, "logBase": 1, "max": null, @@ -24211,8 +24090,8 @@ items: "label": null, "logBase": 1, "max": null, - "min": 0, - "show": true + "min": null, + "show": false } ] } @@ -24220,14 +24099,13 @@ items: "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" + "showTitle": true, + "title": "Memory", + "titleSize": "h6" }, { "collapse": false, - "collapsed": false, + "height": "250px", "panels": [ { "aliasColors": { @@ -24237,70 +24115,58 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - - }, - "id": 4, + "fill": 10, + "id": 5, "legend": { - "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, - "rightSide": false, "show": true, - "sideWidth": null, "total": false, "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], - "nullPointMode": "null", + "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [ - + { + "alias": "/ Receive/", + "stack": "A" + }, + { + "alias": "/ Transmit/", + "stack": "B", + "transform": "negative-Y" + } ], "spaceLength": 10, - "span": 9, + "span": 6, "stack": true, "steppedLine": false, "targets": [ { - "expr": "(\n node_memory_MemTotal_bytes{job=\"node-exporter\", instance=\"$instance\"}\n-\n node_memory_MemFree_bytes{job=\"node-exporter\", instance=\"$instance\"}\n-\n node_memory_Buffers_bytes{job=\"node-exporter\", instance=\"$instance\"}\n-\n node_memory_Cached_bytes{job=\"node-exporter\", instance=\"$instance\"}\n)\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "memory used", - "refId": "A" - }, - { - "expr": "node_memory_Buffers_bytes{job=\"node-exporter\", instance=\"$instance\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "memory buffers", - "refId": "B" - }, - { - "expr": "node_memory_Cached_bytes{job=\"node-exporter\", instance=\"$instance\"}", + "expr": "instance:node_network_receive_bytes_excluding_lo:rate5m{job=\"node-exporter\"}", "format": "time_series", "intervalFactor": 2, - "legendFormat": "memory cached", - "refId": "C" + "legendFormat": "{{instance}} Receive", + "legendLink": "/dashboard/file/node-rsrc-use.json", + "step": 10 }, { - "expr": "node_memory_MemFree_bytes{job=\"node-exporter\", instance=\"$instance\"}", + "expr": "instance:node_network_transmit_bytes_excluding_lo:rate5m{job=\"node-exporter\"}", "format": "time_series", "intervalFactor": 2, - "legendFormat": "memory free", - "refId": "D" + "legendFormat": "{{instance}} Transmit", + "legendLink": "/dashboard/file/node-rsrc-use.json", + "step": 10 } ], "thresholds": [ @@ -24308,9 +24174,9 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Memory Usage", + "title": "Net Utilisation (Bytes Receive/Transmit)", "tooltip": { - "shared": true, + "shared": false, "sort": 0, "value_type": "individual" }, @@ -24319,124 +24185,30 @@ items: "buckets": null, "mode": "time", "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - } - ] - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": "$datasource", - "format": "percent", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": true, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - - }, - "id": 5, - "interval": null, - "links": [ - - ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "100 -\n(\n avg(node_memory_MemAvailable_bytes{job=\"node-exporter\", instance=\"$instance\"})\n/\n avg(node_memory_MemTotal_bytes{job=\"node-exporter\", instance=\"$instance\"})\n* 100\n)\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": "80, 90", - "title": "Memory Usage", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ + "show": true, + "values": [ + + ] + }, + "yaxes": [ { - "op": "=", - "text": "N/A", - "value": "null" + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false } - ], - "valueName": "current" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": false, - "collapsed": false, - "panels": [ + ] + }, { "aliasColors": { @@ -24445,73 +24217,58 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 0, - "fillGradient": 0, - "gridPos": { - - }, + "fill": 10, "id": 6, "legend": { - "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, - "rightSide": false, "show": true, - "sideWidth": null, "total": false, "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], - "nullPointMode": "null", + "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [ { - "alias": "/ read| written/", - "yaxis": 1 + "alias": "/ Receive/", + "stack": "A" }, { - "alias": "/ io time/", - "yaxis": 2 + "alias": "/ Transmit/", + "stack": "B", + "transform": "negative-Y" } ], "spaceLength": 10, "span": 6, - "stack": false, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "rate(node_disk_read_bytes_total{job=\"node-exporter\", instance=\"$instance\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\"}[$__interval])", - "format": "time_series", - "interval": "$__rate_interval", - "intervalFactor": 2, - "legendFormat": "{{device}} read", - "refId": "A" - }, - { - "expr": "rate(node_disk_written_bytes_total{job=\"node-exporter\", instance=\"$instance\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\"}[$__interval])", + "expr": "instance:node_network_receive_drop_excluding_lo:rate5m{job=\"node-exporter\"}", "format": "time_series", - "interval": "$__rate_interval", "intervalFactor": 2, - "legendFormat": "{{device}} written", - "refId": "B" + "legendFormat": "{{instance}} Receive", + "legendLink": "/dashboard/file/node-rsrc-use.json", + "step": 10 }, { - "expr": "rate(node_disk_io_time_seconds_total{job=\"node-exporter\", instance=\"$instance\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\"}[$__interval])", + "expr": "instance:node_network_transmit_drop_excluding_lo:rate5m{job=\"node-exporter\"}", "format": "time_series", - "interval": "$__rate_interval", "intervalFactor": 2, - "legendFormat": "{{device}} io time", - "refId": "C" + "legendFormat": "{{instance}} Transmit", + "legendLink": "/dashboard/file/node-rsrc-use.json", + "step": 10 } ], "thresholds": [ @@ -24519,9 +24276,9 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Disk I/O", + "title": "Net Saturation (Drops Receive/Transmit)", "tooltip": { - "shared": true, + "shared": false, "sort": 0, "value_type": "individual" }, @@ -24537,7 +24294,7 @@ items: }, "yaxes": [ { - "format": "bytes", + "format": "rps", "label": null, "logBase": 1, "max": null, @@ -24545,15 +24302,27 @@ items: "show": true }, { - "format": "s", + "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": true + "show": false } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ { "aliasColors": { @@ -24562,44 +24331,29 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - - }, + "fill": 10, "id": 7, "legend": { - "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, - "rightSide": false, "show": true, - "sideWidth": null, "total": false, "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], - "nullPointMode": "null", + "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [ - { - "alias": "used", - "color": "#E0B400" - }, - { - "alias": "available", - "color": "#73BF69" - } + ], "spaceLength": 10, "span": 6, @@ -24607,18 +24361,12 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(\n max by (device) (\n node_filesystem_size_bytes{job=\"node-exporter\", instance=\"$instance\", fstype!=\"\"}\n -\n node_filesystem_avail_bytes{job=\"node-exporter\", instance=\"$instance\", fstype!=\"\"}\n )\n)\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "used", - "refId": "A" - }, - { - "expr": "sum(\n max by (device) (\n node_filesystem_avail_bytes{job=\"node-exporter\", instance=\"$instance\", fstype!=\"\"}\n )\n)\n", + "expr": "instance_device:node_disk_io_time_seconds:rate5m{job=\"node-exporter\"}\n/ scalar(count(instance_device:node_disk_io_time_seconds:rate5m{job=\"node-exporter\"}))\n", "format": "time_series", "intervalFactor": 2, - "legendFormat": "available", - "refId": "B" + "legendFormat": "{{instance}} {{device}}", + "legendLink": "/dashboard/file/node-rsrc-use.json", + "step": 10 } ], "thresholds": [ @@ -24626,9 +24374,9 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Disk Space Usage", + "title": "Disk IO Utilisation", "tooltip": { - "shared": true, + "shared": false, "sort": 0, "value_type": "individual" }, @@ -24644,36 +24392,23 @@ items: }, "yaxes": [ { - "format": "bytes", + "format": "percentunit", "label": null, "logBase": 1, - "max": null, + "max": 1, "min": 0, "show": true }, { - "format": "bytes", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": 0, - "show": true + "min": null, + "show": false } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": false, - "collapsed": false, - "panels": [ + }, { "aliasColors": { @@ -24682,50 +24417,42 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 0, - "fillGradient": 0, - "gridPos": { - - }, + "fill": 10, "id": 8, "legend": { - "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, - "rightSide": false, "show": true, - "sideWidth": null, "total": false, "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], - "nullPointMode": "null", + "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 6, - "stack": false, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "rate(node_network_receive_bytes_total{job=\"node-exporter\", instance=\"$instance\", device!=\"lo\"}[$__interval])", + "expr": "instance_device:node_disk_io_time_weighted_seconds:rate5m{job=\"node-exporter\"}\n/ scalar(count(instance_device:node_disk_io_time_weighted_seconds:rate5m{job=\"node-exporter\"}))\n", "format": "time_series", - "interval": "$__rate_interval", "intervalFactor": 2, - "legendFormat": "{{device}}", - "refId": "A" + "legendFormat": "{{instance}} {{device}}", + "legendLink": "/dashboard/file/node-rsrc-use.json", + "step": 10 } ], "thresholds": [ @@ -24733,9 +24460,9 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Network Received", + "title": "Disk IO Saturation", "tooltip": { - "shared": true, + "shared": false, "sort": 0, "value_type": "individual" }, @@ -24751,23 +24478,35 @@ items: }, "yaxes": [ { - "format": "bytes", + "format": "percentunit", "label": null, "logBase": 1, - "max": null, + "max": 1, "min": 0, "show": true }, { - "format": "bytes", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": 0, - "show": true + "min": null, + "show": false } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Disk IO", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ { "aliasColors": { @@ -24776,50 +24515,42 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 0, - "fillGradient": 0, - "gridPos": { - - }, + "fill": 10, "id": 9, "legend": { - "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, - "rightSide": false, "show": true, - "sideWidth": null, "total": false, "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], - "nullPointMode": "null", + "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 6, - "stack": false, + "span": 12, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "rate(node_network_transmit_bytes_total{job=\"node-exporter\", instance=\"$instance\", device!=\"lo\"}[$__interval])", + "expr": "sum without (device) (\n max without (fstype, mountpoint) (\n node_filesystem_size_bytes{job=\"node-exporter\", fstype!=\"\"} - node_filesystem_avail_bytes{job=\"node-exporter\", fstype!=\"\"}\n )\n) \n/ scalar(sum(max without (fstype, mountpoint) (node_filesystem_size_bytes{job=\"node-exporter\", fstype!=\"\"})))\n", "format": "time_series", - "interval": "$__rate_interval", "intervalFactor": 2, - "legendFormat": "{{device}}", - "refId": "A" + "legendFormat": "{{instance}}", + "legendLink": "/dashboard/file/node-rsrc-use.json", + "step": 10 } ], "thresholds": [ @@ -24827,9 +24558,9 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Network Transmitted", + "title": "Disk Space Utilisation", "tooltip": { - "shared": true, + "shared": false, "sort": 0, "value_type": "individual" }, @@ -24845,20 +24576,20 @@ items: }, "yaxes": [ { - "format": "bytes", + "format": "percentunit", "label": null, "logBase": 1, - "max": null, + "max": 1, "min": 0, "show": true }, { - "format": "bytes", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": 0, - "show": true + "min": null, + "show": false } ] } @@ -24866,10 +24597,9 @@ items: "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" + "showTitle": true, + "title": "Disk Space", + "titleSize": "h6" } ], "schemaVersion": 14, @@ -24881,8 +24611,8 @@ items: "list": [ { "current": { - "text": "Prometheus", - "value": "Prometheus" + "text": "default", + "value": "default" }, "hide": 0, "label": null, @@ -24894,32 +24624,6 @@ items: "refresh": 1, "regex": "", "type": "datasource" - }, - { - "allValue": null, - "current": { - - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": null, - "multi": false, - "name": "instance", - "options": [ - - ], - "query": "label_values(node_exporter_build_info{job=\"node-exporter\"}, instance)", - "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false } ] }, @@ -24952,8 +24656,9 @@ items: "30d" ] }, - "timezone": "browser", - "title": "Nodes", + "timezone": "utc", + "title": "USE Method / Cluster", + "uid": "", "version": 0 } kind: ConfigMap @@ -24963,28 +24668,21 @@ items: app.kubernetes.io/name: grafana app.kubernetes.io/part-of: kube-prometheus app.kubernetes.io/version: 7.5.4 - name: grafana-dashboard-nodes + name: grafana-dashboard-node-cluster-rsrc-use namespace: monitoring - apiVersion: v1 data: - persistentvolumesusage.json: |- + node-rsrc-use.json: |- { - "__inputs": [ - - ], - "__requires": [ - - ], "annotations": { "list": [ ] }, - "editable": false, + "editable": true, "gnetId": null, "graphTooltip": 0, "hideControls": false, - "id": null, "links": [ ], @@ -24992,204 +24690,7 @@ items: "rows": [ { "collapse": false, - "collapsed": false, - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - - }, - "id": 2, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 9, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "(\n sum without(instance, node) (kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n -\n sum without(instance, node) (kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n)\n", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Used Space", - "refId": "A" - }, - { - "expr": "sum without(instance, node) (kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Free Space", - "refId": "B" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Volume Space Usage", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - } - ] - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": "$datasource", - "format": "percent", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": true, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - - }, - "id": 3, - "interval": null, - "links": [ - - ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "max without(instance,node) (\n(\n kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n -\n kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n)\n/\nkubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n* 100)\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": "80, 90", - "title": "Volume Space Usage", - "tooltip": { - "shared": false - }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": false, - "collapsed": false, + "height": "250px", "panels": [ { "aliasColors": { @@ -25200,55 +24701,41 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, - "fillGradient": 0, - "gridPos": { - - }, - "id": 4, + "id": 1, "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "sideWidth": null, + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, "total": false, - "values": true + "values": false }, "lines": true, "linewidth": 1, "links": [ ], - "nullPointMode": "null", + "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 9, - "stack": true, + "span": 6, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum without(instance, node) (kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Used inodes", - "refId": "A" - }, - { - "expr": "(\n sum without(instance, node) (kubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n -\n sum without(instance, node) (kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n)\n", + "expr": "instance:node_cpu_utilisation:rate5m{job=\"node-exporter\", instance=\"$instance\"}", "format": "time_series", - "intervalFactor": 1, - "legendFormat": " Free inodes", - "refId": "B" + "intervalFactor": 2, + "legendFormat": "Utilisation", + "legendLink": null, + "step": 10 } ], "thresholds": [ @@ -25256,7 +24743,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Volume inodes Usage", + "title": "CPU Utilisation", "tooltip": { "shared": false, "sort": 0, @@ -25274,7 +24761,7 @@ items: }, "yaxes": [ { - "format": "none", + "format": "percentunit", "label": null, "logBase": 1, "max": null, @@ -25282,795 +24769,915 @@ items: "show": true }, { - "format": "none", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": 0, - "show": true + "min": null, + "show": false } ] }, { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": "$datasource", - "format": "percent", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": true, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { + "aliasColors": { }, - "id": 5, - "interval": null, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, "links": [ ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "max without(instance,node) (\nkubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n/\nkubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n* 100)\n", + "expr": "instance:node_load1_per_cpu:ratio{job=\"node-exporter\", instance=\"$instance\"}", "format": "time_series", "intervalFactor": 2, - "legendFormat": "", - "refId": "A" + "legendFormat": "Saturation", + "legendLink": null, + "step": 10 } ], - "thresholds": "80, 90", - "title": "Volume inodes Usage", + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Saturation (Load1 per CPU)", "tooltip": { - "shared": false + "shared": false, + "sort": 0, + "value_type": "individual" }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ { - "op": "=", - "text": "N/A", - "value": "null" + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false } - ], - "valueName": "current" + ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "kubernetes-mixin" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": null, - "name": "datasource", - "options": [ - - ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": null, - "current": { - - }, - "datasource": "$datasource", - "hide": 2, - "includeAll": false, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ - - ], - "query": "label_values(kubelet_volume_stats_capacity_bytes, cluster)", - "refresh": 2, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ + "showTitle": true, + "title": "CPU", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { - ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "Namespace", - "multi": false, - "name": "namespace", - "options": [ + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ - ], - "query": "label_values(kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\"}, namespace)", - "refresh": 2, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "instance:node_memory_utilisation:ratio{job=\"node-exporter\", job=\"node-exporter\", instance=\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Memory", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ - ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Utilisation", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "PersistentVolumeClaim", - "multi": false, - "name": "volume", - "options": [ - - ], - "query": "label_values(kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\"}, persistentvolumeclaim)", - "refresh": 2, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ + { + "aliasColors": { - ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-7d", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "UTC", - "title": "Kubernetes / Persistent Volumes", - "uid": "919b92a8e8041bd567af9edab12c840c", - "version": 0 - } - kind: ConfigMap - metadata: - labels: - app.kubernetes.io/component: grafana - app.kubernetes.io/name: grafana - app.kubernetes.io/part-of: kube-prometheus - app.kubernetes.io/version: 7.5.4 - name: grafana-dashboard-persistentvolumesusage - namespace: monitoring -- apiVersion: v1 - data: - pod-total.json: |- - { - "__inputs": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ - ], - "__requires": [ + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ - ], - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "id": null, - "links": [ + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "instance:node_vmstat_pgmajfault:rate5m{job=\"node-exporter\", instance=\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Major page faults", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ - ], - "panels": [ - { - "collapse": false, - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 2, - "panels": [ + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Saturation (Major Page Faults)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Current Bandwidth", - "titleSize": "h6", - "type": "row" + "title": "Memory", + "titleSize": "h6" }, { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", - "decimals": 0, - "format": "time_series", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 9, - "w": 12, - "x": 0, - "y": 1 - }, - "height": 9, - "id": 3, - "interval": null, - "links": [ - - ], - "mappingType": 1, - "mappingTypes": [ + "collapse": false, + "height": "250px", + "panels": [ { - "name": "value to text", - "value": 1 + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/Receive/", + "stack": "A" + }, + { + "alias": "/Transmit/", + "stack": "B", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "instance:node_network_receive_bytes_excluding_lo:rate5m{job=\"node-exporter\", instance=\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Receive", + "legendLink": null, + "step": 10 + }, + { + "expr": "instance:node_network_transmit_bytes_excluding_lo:rate5m{job=\"node-exporter\", instance=\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Transmit", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Net Utilisation (Bytes Receive/Transmit)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] }, { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "minSpan": 12, - "nullPointMode": "connected", - "nullText": null, - "options": { - "fieldOptions": { - "calcs": [ - "last" - ], - "defaults": { - "max": 10000000000, - "min": 0, - "title": "$namespace: $pod", - "unit": "Bps" + "aliasColors": { + }, - "mappings": [ + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], - "override": { + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/Receive/", + "stack": "A" + }, + { + "alias": "/Transmit/", + "stack": "B", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "instance:node_network_receive_drop_excluding_lo:rate5m{job=\"node-exporter\", instance=\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Receive drops", + "legendLink": null, + "step": 10 + }, + { + "expr": "instance:node_network_transmit_drop_excluding_lo:rate5m{job=\"node-exporter\", instance=\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Transmit drops", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Net Saturation (Drops Receive/Transmit)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + ] }, - "thresholds": [ - { - "color": "dark-green", - "index": 0, - "value": null - }, + "yaxes": [ { - "color": "dark-yellow", - "index": 1, - "value": 5000000000 + "format": "rps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true }, { - "color": "dark-red", - "index": 2, - "value": 7000000000 + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false } - ], - "values": false - } - }, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 12, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution]))", - "format": "time_series", - "instant": null, - "intervalFactor": 1, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": "", - "timeFrom": null, - "timeShift": null, - "title": "Current Rate of Bytes Received", - "type": "gauge", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" + ] } ], - "valueName": "current" + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Net", + "titleSize": "h6" }, { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", - "decimals": 0, - "format": "time_series", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 1 - }, - "height": 9, - "id": 4, - "interval": null, - "links": [ - - ], - "mappingType": 1, - "mappingTypes": [ + "collapse": false, + "height": "250px", + "panels": [ { - "name": "value to text", - "value": 1 + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "instance_device:node_disk_io_time_seconds:rate5m{job=\"node-exporter\", instance=\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{device}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Disk IO Utilisation", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] }, { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "minSpan": 12, - "nullPointMode": "connected", - "nullText": null, - "options": { - "fieldOptions": { - "calcs": [ - "last" + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "instance_device:node_disk_io_time_weighted_seconds:rate5m{job=\"node-exporter\", instance=\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{device}}", + "legendLink": null, + "step": 10 + } ], - "defaults": { - "max": 10000000000, - "min": 0, - "title": "$namespace: $pod", - "unit": "Bps" - }, - "mappings": [ + "thresholds": [ ], - "override": { + "timeFrom": null, + "timeShift": null, + "title": "Disk IO Saturation", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + ] }, - "thresholds": [ - { - "color": "dark-green", - "index": 0, - "value": null - }, + "yaxes": [ { - "color": "dark-yellow", - "index": 1, - "value": 5000000000 + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true }, { - "color": "dark-red", - "index": 2, - "value": 7000000000 + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false } - ], - "values": false - } - }, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 12, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution]))", - "format": "time_series", - "instant": null, - "intervalFactor": 1, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": "", - "timeFrom": null, - "timeShift": null, - "title": "Current Rate of Bytes Transmitted", - "type": "gauge", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" + ] } - ], - "valueName": "current" - }, - { - "collapse": false, - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 10 - }, - "id": 5, - "panels": [ - ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Bandwidth", - "titleSize": "h6", - "type": "row" + "title": "Disk IO", + "titleSize": "h6" }, { - "aliasColors": { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 9, - "w": 12, - "x": 0, - "y": 11 - }, - "id": 6, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "hideEmpty": true, - "hideZero": true, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ - ], - "minSpan": 12, - "nullPointMode": "connected", - "paceLength": 10, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ - ], - "spaceLength": 10, - "span": 12, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{pod}}", - "refId": "A", - "step": 10 + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "1 -\n(\n max without (mountpoint, fstype) (node_filesystem_avail_bytes{job=\"node-exporter\", fstype!=\"\", instance=\"$instance\"})\n/\n max without (mountpoint, fstype) (node_filesystem_size_bytes{job=\"node-exporter\", fstype!=\"\", instance=\"$instance\"})\n)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{device}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Disk Space Utilisation", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] } ], - "thresholds": [ + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Disk Space", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ - ], - "timeFrom": null, - "timeShift": null, - "title": "Receive Bandwidth", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ - ] + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" }, - "yaxes": [ - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" }, - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - } - ] - }, - { - "aliasColors": { + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "instance", + "multi": false, + "name": "instance", + "options": [ - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 11 - }, - "id": 7, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "hideEmpty": true, - "hideZero": true, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [ + ], + "query": "label_values(up{job=\"node-exporter\"}, instance)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ - ], - "minSpan": 12, - "nullPointMode": "connected", - "paceLength": 10, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "USE Method / Node", + "uid": "", + "version": 0 + } + kind: ConfigMap + metadata: + labels: + app.kubernetes.io/component: grafana + app.kubernetes.io/name: grafana + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 7.5.4 + name: grafana-dashboard-node-rsrc-use + namespace: monitoring +- apiVersion: v1 + data: + nodes.json: |- + { + "__inputs": [ - ], - "spaceLength": 10, - "span": 12, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{pod}}", - "refId": "A", - "step": 10 - } - ], - "thresholds": [ + ], + "__requires": [ - ], - "timeFrom": null, - "timeShift": null, - "title": "Transmit Bandwidth", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + ], + "annotations": { + "list": [ - ] - }, - "yaxes": [ - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - } - ] - }, + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ + + ], + "refresh": "", + "rows": [ { - "collapse": true, - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 20 - }, - "id": 8, + "collapse": false, + "collapsed": false, "panels": [ { "aliasColors": { @@ -26080,21 +25687,16 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 2, + "fill": 1, "fillGradient": 0, "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 21 + }, - "id": 9, + "id": 2, "legend": { "alignAsTable": false, "avg": false, "current": false, - "hideEmpty": true, - "hideZero": true, "max": false, "min": false, "rightSide": false, @@ -26104,13 +25706,11 @@ items: "values": false }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [ ], - "minSpan": 12, - "nullPointMode": "connected", - "paceLength": 10, + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, @@ -26120,17 +25720,16 @@ items: ], "spaceLength": 10, - "span": 12, + "span": 6, "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)", + "expr": "(\n (1 - rate(node_cpu_seconds_total{job=\"node-exporter\", mode=\"idle\", instance=\"$instance\"}[$__rate_interval]))\n/ ignoring(cpu) group_left\n count without (cpu)( node_cpu_seconds_total{job=\"node-exporter\", mode=\"idle\", instance=\"$instance\"})\n)\n", "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{pod}}", - "refId": "A", - "step": 10 + "intervalFactor": 5, + "legendFormat": "{{cpu}}", + "refId": "A" } ], "thresholds": [ @@ -26138,10 +25737,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Rate of Received Packets", + "title": "CPU Usage", "tooltip": { "shared": true, - "sort": 2, + "sort": 0, "value_type": "individual" }, "type": "graph", @@ -26156,18 +25755,18 @@ items: }, "yaxes": [ { - "format": "pps", + "format": "percentunit", "label": null, "logBase": 1, - "max": null, + "max": 1, "min": 0, "show": true }, { - "format": "pps", + "format": "percentunit", "label": null, "logBase": 1, - "max": null, + "max": 1, "min": 0, "show": true } @@ -26181,21 +25780,16 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 2, + "fill": 0, "fillGradient": 0, "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 21 + }, - "id": 10, + "id": 3, "legend": { "alignAsTable": false, "avg": false, "current": false, - "hideEmpty": true, - "hideZero": true, "max": false, "min": false, "rightSide": false, @@ -26205,13 +25799,11 @@ items: "values": false }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [ ], - "minSpan": 12, - "nullPointMode": "connected", - "paceLength": 10, + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, @@ -26221,17 +25813,37 @@ items: ], "spaceLength": 10, - "span": 12, - "stack": true, + "span": 6, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)", + "expr": "node_load1{job=\"node-exporter\", instance=\"$instance\"}", "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{pod}}", - "refId": "A", - "step": 10 + "intervalFactor": 2, + "legendFormat": "1m load average", + "refId": "A" + }, + { + "expr": "node_load5{job=\"node-exporter\", instance=\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "5m load average", + "refId": "B" + }, + { + "expr": "node_load15{job=\"node-exporter\", instance=\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "15m load average", + "refId": "C" + }, + { + "expr": "count(node_cpu_seconds_total{job=\"node-exporter\", instance=\"$instance\", mode=\"idle\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "logical cores", + "refId": "D" } ], "thresholds": [ @@ -26239,10 +25851,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Rate of Transmitted Packets", + "title": "Load Average", "tooltip": { "shared": true, - "sort": 2, + "sort": 0, "value_type": "individual" }, "type": "graph", @@ -26257,7 +25869,7 @@ items: }, "yaxes": [ { - "format": "pps", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -26265,7 +25877,7 @@ items: "show": true }, { - "format": "pps", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -26278,21 +25890,14 @@ items: "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": true, - "title": "Packets", + "showTitle": false, + "title": "Dashboard Row", "titleSize": "h6", "type": "row" }, { - "collapse": true, - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 21 - }, - "id": 11, + "collapse": false, + "collapsed": false, "panels": [ { "aliasColors": { @@ -26302,21 +25907,16 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 2, + "fill": 1, "fillGradient": 0, "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 32 + }, - "id": 12, + "id": 4, "legend": { "alignAsTable": false, "avg": false, "current": false, - "hideEmpty": true, - "hideZero": true, "max": false, "min": false, "rightSide": false, @@ -26326,13 +25926,11 @@ items: "values": false }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [ ], - "minSpan": 12, - "nullPointMode": "connected", - "paceLength": 10, + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, @@ -26342,17 +25940,37 @@ items: ], "spaceLength": 10, - "span": 12, + "span": 9, "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)", + "expr": "(\n node_memory_MemTotal_bytes{job=\"node-exporter\", instance=\"$instance\"}\n-\n node_memory_MemFree_bytes{job=\"node-exporter\", instance=\"$instance\"}\n-\n node_memory_Buffers_bytes{job=\"node-exporter\", instance=\"$instance\"}\n-\n node_memory_Cached_bytes{job=\"node-exporter\", instance=\"$instance\"}\n)\n", "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{pod}}", - "refId": "A", - "step": 10 + "intervalFactor": 2, + "legendFormat": "memory used", + "refId": "A" + }, + { + "expr": "node_memory_Buffers_bytes{job=\"node-exporter\", instance=\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "memory buffers", + "refId": "B" + }, + { + "expr": "node_memory_Cached_bytes{job=\"node-exporter\", instance=\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "memory cached", + "refId": "C" + }, + { + "expr": "node_memory_MemFree_bytes{job=\"node-exporter\", instance=\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "memory free", + "refId": "D" } ], "thresholds": [ @@ -26360,10 +25978,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Rate of Received Packets Dropped", + "title": "Memory Usage", "tooltip": { "shared": true, - "sort": 2, + "sort": 0, "value_type": "individual" }, "type": "graph", @@ -26378,7 +25996,7 @@ items: }, "yaxes": [ { - "format": "pps", + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -26386,15 +26004,109 @@ items: "show": true }, { - "format": "pps", + "format": "bytes", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } - ] - }, + ] + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "$datasource", + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + + }, + "id": 5, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "100 -\n(\n avg(node_memory_MemAvailable_bytes{job=\"node-exporter\", instance=\"$instance\"})\n/\n avg(node_memory_MemTotal_bytes{job=\"node-exporter\", instance=\"$instance\"})\n* 100\n)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "80, 90", + "title": "Memory Usage", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ { "aliasColors": { @@ -26403,21 +26115,16 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 2, + "fill": 0, "fillGradient": 0, "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 32 + }, - "id": 13, + "id": 6, "legend": { "alignAsTable": false, "avg": false, "current": false, - "hideEmpty": true, - "hideZero": true, "max": false, "min": false, "rightSide": false, @@ -26427,352 +26134,209 @@ items: "values": false }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [ ], - "minSpan": 12, - "nullPointMode": "connected", - "paceLength": 10, + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 12, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{pod}}", - "refId": "A", - "step": 10 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Rate of Transmitted Packets Dropped", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ { - "format": "pps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true + "alias": "/ read| written/", + "yaxis": 1 }, { - "format": "pps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true + "alias": "/ io time/", + "yaxis": 2 } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Errors", - "titleSize": "h6", - "type": "row" - } - ], - "refresh": "10s", - "rows": [ - - ], - "schemaVersion": 18, - "style": "dark", - "tags": [ - "kubernetes-mixin" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": null, - "name": "datasource", - "options": [ - - ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": null, - "current": { - - }, - "datasource": "$datasource", - "hide": 2, - "includeAll": false, - "label": null, - "multi": false, - "name": "cluster", - "options": [ - - ], - "query": "label_values(kube_pod_info, cluster)", - "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "text": "kube-system", - "value": "kube-system" - }, - "datasource": "$datasource", - "definition": "label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, namespace)", - "hide": 0, - "includeAll": true, - "label": null, - "multi": false, - "name": "namespace", - "options": [ - - ], - "query": "label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, namespace)", - "refresh": 2, - "regex": "", - "skipUrlSync": false, - "sort": 1, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "text": "", - "value": "" - }, - "datasource": "$datasource", - "definition": "label_values(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}, pod)", - "hide": 0, - "includeAll": false, - "label": null, - "multi": false, - "name": "pod", - "options": [ - - ], - "query": "label_values(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}, pod)", - "refresh": 2, - "regex": "", - "skipUrlSync": false, - "sort": 1, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "text": "5m", - "value": "5m" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": null, - "multi": false, - "name": "resolution", - "options": [ - { - "selected": false, - "text": "30s", - "value": "30s" - }, - { - "selected": true, - "text": "5m", - "value": "5m" + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(node_disk_read_bytes_total{job=\"node-exporter\", instance=\"$instance\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{device}} read", + "refId": "A" + }, + { + "expr": "rate(node_disk_written_bytes_total{job=\"node-exporter\", instance=\"$instance\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{device}} written", + "refId": "B" + }, + { + "expr": "rate(node_disk_io_time_seconds_total{job=\"node-exporter\", instance=\"$instance\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{device}} io time", + "refId": "C" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Disk I/O", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" }, - { - "selected": false, - "text": "1h", - "value": "1h" - } - ], - "query": "30s,5m,1h", - "refresh": 2, - "regex": "", - "skipUrlSync": false, - "sort": 1, - "tagValuesQuery": "", - "tags": [ + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ - ], - "tagsQuery": "", - "type": "interval", - "useTags": false - }, - { - "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "text": "5m", - "value": "5m" + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] }, - "datasource": "$datasource", - "hide": 2, - "includeAll": false, - "label": null, - "multi": false, - "name": "interval", - "options": [ - { - "selected": true, - "text": "4h", - "value": "4h" - } - ], - "query": "4h", - "refresh": 2, - "regex": "", - "skipUrlSync": false, - "sort": 1, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "interval", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "UTC", - "title": "Kubernetes / Networking / Pod", - "uid": "7a18067ce943a40ae25454675c19ff5c", - "version": 0 - } - kind: ConfigMap - metadata: - labels: - app.kubernetes.io/component: grafana - app.kubernetes.io/name: grafana - app.kubernetes.io/part-of: kube-prometheus - app.kubernetes.io/version: 7.5.4 - name: grafana-dashboard-pod-total - namespace: monitoring -- apiVersion: v1 - data: - prometheus-remote-write.json: |- - { - "__inputs": [ + { + "aliasColors": { - ], - "__requires": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { - ], - "annotations": { - "list": [ + }, + "id": 7, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ - ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "id": null, - "links": [ + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + { + "alias": "used", + "color": "#E0B400" + }, + { + "alias": "available", + "color": "#73BF69" + } + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(\n max by (device) (\n node_filesystem_size_bytes{job=\"node-exporter\", instance=\"$instance\", fstype!=\"\"}\n -\n node_filesystem_avail_bytes{job=\"node-exporter\", instance=\"$instance\", fstype!=\"\"}\n )\n)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "used", + "refId": "A" + }, + { + "expr": "sum(\n max by (device) (\n node_filesystem_avail_bytes{job=\"node-exporter\", instance=\"$instance\", fstype!=\"\"}\n )\n)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "available", + "refId": "B" + } + ], + "thresholds": [ - ], - "refresh": "60s", - "rows": [ + ], + "timeFrom": null, + "timeShift": null, + "title": "Disk Space Usage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, { "collapse": false, "collapsed": false, @@ -26785,12 +26349,12 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, + "fill": 0, "fillGradient": 0, "gridPos": { }, - "id": 2, + "id": 8, "legend": { "alignAsTable": false, "avg": false, @@ -26823,10 +26387,10 @@ items: "steppedLine": false, "targets": [ { - "expr": "(\n prometheus_remote_storage_highest_timestamp_in_seconds{cluster=~\"$cluster\", instance=~\"$instance\"} \n- \n ignoring(remote_name, url) group_right(instance) (prometheus_remote_storage_queue_highest_sent_timestamp_seconds{cluster=~\"$cluster\", instance=~\"$instance\"} != 0)\n)\n", + "expr": "rate(node_network_receive_bytes_total{job=\"node-exporter\", instance=\"$instance\", device!=\"lo\"}[$__rate_interval])", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}", + "legendFormat": "{{device}}", "refId": "A" } ], @@ -26835,7 +26399,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Highest Timestamp In vs. Highest Timestamp Sent", + "title": "Network Received", "tooltip": { "shared": true, "sort": 0, @@ -26853,23 +26417,248 @@ items: }, "yaxes": [ { - "format": "short", + "format": "bytes", "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true }, { - "format": "short", + "format": "bytes", "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 0, + "fillGradient": 0, + "gridPos": { + + }, + "id": 9, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(node_network_transmit_bytes_total{job=\"node-exporter\", instance=\"$instance\", device!=\"lo\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{device}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Network Transmitted", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, "show": true } ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "instance", + "options": [ + + ], + "query": "label_values(node_exporter_build_info{job=\"node-exporter\"}, instance)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Nodes", + "version": 0 + } + kind: ConfigMap + metadata: + labels: + app.kubernetes.io/component: grafana + app.kubernetes.io/name: grafana + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 7.5.4 + name: grafana-dashboard-nodes + namespace: monitoring +- apiVersion: v1 + data: + persistentvolumesusage.json: |- + { + "__inputs": [ + + ], + "__requires": [ + + ], + "annotations": { + "list": [ + + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ + + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "collapsed": false, + "panels": [ { "aliasColors": { @@ -26883,18 +26672,18 @@ items: "gridPos": { }, - "id": 3, + "id": 2, "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, "rightSide": false, "show": true, "sideWidth": null, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, @@ -26911,16 +26700,23 @@ items: ], "spaceLength": 10, - "span": 6, - "stack": false, + "span": 9, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "clamp_min(\n rate(prometheus_remote_storage_highest_timestamp_in_seconds{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) \n- \n ignoring (remote_name, url) group_right(instance) rate(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])\n, 0)\n", + "expr": "(\n sum without(instance, node) (topk(1, (kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n -\n sum without(instance, node) (topk(1, (kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n)\n", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}", + "intervalFactor": 1, + "legendFormat": "Used Space", "refId": "A" + }, + { + "expr": "sum without(instance, node) (topk(1, (kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Free Space", + "refId": "B" } ], "thresholds": [ @@ -26928,9 +26724,9 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Rate[5m]", + "title": "Volume Space Usage", "tooltip": { - "shared": true, + "shared": false, "sort": 0, "value_type": "individual" }, @@ -26946,29 +26742,113 @@ items: }, "yaxes": [ { - "format": "short", + "format": "bytes", "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true }, { - "format": "short", + "format": "bytes", "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true } ] + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "$datasource", + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + + }, + "id": 3, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "max without(instance,node) (\n(\n topk(1, kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n -\n topk(1, kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n)\n/\ntopk(1, kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n* 100)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "80, 90", + "title": "Volume Space Usage", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": true, - "title": "Timestamps", + "showTitle": false, + "title": "Dashboard Row", "titleSize": "h6", "type": "row" }, @@ -26991,16 +26871,16 @@ items: }, "id": 4, "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, "rightSide": false, "show": true, "sideWidth": null, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, @@ -27017,16 +26897,23 @@ items: ], "spaceLength": 10, - "span": 12, - "stack": false, + "span": 9, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "rate(\n prometheus_remote_storage_samples_in_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])\n- \n ignoring(remote_name, url) group_right(instance) (rate(prometheus_remote_storage_succeeded_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) or rate(prometheus_remote_storage_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]))\n- \n (rate(prometheus_remote_storage_dropped_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) or rate(prometheus_remote_storage_samples_dropped_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]))\n", + "expr": "sum without(instance, node) (topk(1, (kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}", + "intervalFactor": 1, + "legendFormat": "Used inodes", "refId": "A" + }, + { + "expr": "(\n sum without(instance, node) (topk(1, (kubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n -\n sum without(instance, node) (topk(1, (kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": " Free inodes", + "refId": "B" } ], "thresholds": [ @@ -27034,9 +26921,9 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Rate, in vs. succeeded or dropped [5m]", + "title": "Volume inodes Usage", "tooltip": { - "shared": true, + "shared": false, "sort": 0, "value_type": "individual" }, @@ -27052,421 +26939,803 @@ items: }, "yaxes": [ { - "format": "short", + "format": "none", "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, + "show": true + }, + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": 0, "show": true + } + ] + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "$datasource", + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + + }, + "id": 5, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 }, { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "max without(instance,node) (\ntopk(1, kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n/\ntopk(1, kubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n* 100)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "80, 90", + "title": "Volume inodes Usage", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" } - ] + ], + "valueName": "current" } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": true, - "title": "Samples", + "showTitle": false, + "title": "Dashboard Row", "titleSize": "h6", "type": "row" - }, + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(kubelet_volume_stats_capacity_bytes, cluster)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "Namespace", + "multi": false, + "name": "namespace", + "options": [ + + ], + "query": "label_values(kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\"}, namespace)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "PersistentVolumeClaim", + "multi": false, + "name": "volume", + "options": [ + + ], + "query": "label_values(kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\"}, persistentvolumeclaim)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-7d", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "UTC", + "title": "Kubernetes / Persistent Volumes", + "uid": "919b92a8e8041bd567af9edab12c840c", + "version": 0 + } + kind: ConfigMap + metadata: + labels: + app.kubernetes.io/component: grafana + app.kubernetes.io/name: grafana + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 7.5.4 + name: grafana-dashboard-persistentvolumesusage + namespace: monitoring +- apiVersion: v1 + data: + pod-total.json: |- + { + "__inputs": [ + + ], + "__requires": [ + + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ + + ], + "panels": [ { "collapse": false, "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 2, "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - - }, - "id": 5, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - ], - "minSpan": 6, - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Current Bandwidth", + "titleSize": "h6", + "type": "row" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "decimals": 0, + "format": "time_series", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 1 + }, + "height": 9, + "id": 3, + "interval": null, + "links": [ + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 12, + "nullPointMode": "connected", + "nullText": null, + "options": { + "fieldOptions": { + "calcs": [ + "last" ], - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "prometheus_remote_storage_shards{cluster=~\"$cluster\", instance=~\"$instance\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}", - "refId": "A" - } - ], - "thresholds": [ + "defaults": { + "max": 10000000000, + "min": 0, + "title": "$namespace: $pod", + "unit": "Bps" + }, + "mappings": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Current Shards", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + "override": { - ] }, - "yaxes": [ + "thresholds": [ { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "color": "dark-green", + "index": 0, + "value": null }, { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "color": "dark-yellow", + "index": 1, + "value": 5000000000 + }, + { + "color": "dark-red", + "index": 2, + "value": 7000000000 } - ] - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - - }, - "id": 6, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ + "values": false + } + }, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 12, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution]))", + "format": "time_series", + "instant": null, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "timeFrom": null, + "timeShift": null, + "title": "Current Rate of Bytes Received", + "type": "gauge", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "decimals": 0, + "format": "time_series", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 1 + }, + "height": 9, + "id": 4, + "interval": null, + "links": [ + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 12, + "nullPointMode": "connected", + "nullText": null, + "options": { + "fieldOptions": { + "calcs": [ + "last" ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "prometheus_remote_storage_shards_max{cluster=~\"$cluster\", instance=~\"$instance\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}", - "refId": "A" - } - ], - "thresholds": [ + "defaults": { + "max": 10000000000, + "min": 0, + "title": "$namespace: $pod", + "unit": "Bps" + }, + "mappings": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Max Shards", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + "override": { - ] }, - "yaxes": [ + "thresholds": [ { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "color": "dark-green", + "index": 0, + "value": null }, { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "color": "dark-yellow", + "index": 1, + "value": 5000000000 + }, + { + "color": "dark-red", + "index": 2, + "value": 7000000000 } - ] - }, + ], + "values": false + } + }, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ { - "aliasColors": { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 12, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution]))", + "format": "time_series", + "instant": null, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "timeFrom": null, + "timeShift": null, + "title": "Current Rate of Bytes Transmitted", + "type": "gauge", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "collapse": false, + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 10 + }, + "id": 5, + "panels": [ - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Bandwidth", + "titleSize": "h6", + "type": "row" + }, + { + "aliasColors": { - }, - "id": 7, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 11 + }, + "id": 6, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [ - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ + ], + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ - ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "prometheus_remote_storage_shards_min{cluster=~\"$cluster\", instance=~\"$instance\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}", - "refId": "A" - } - ], - "thresholds": [ + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{pod}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ - ], - "timeFrom": null, - "timeShift": null, - "title": "Min Shards", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + ], + "timeFrom": null, + "timeShift": null, + "title": "Receive Bandwidth", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true }, { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { - }, - "id": 8, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 11 + }, + "id": 7, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [ - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ + ], + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ - ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "prometheus_remote_storage_shards_desired{cluster=~\"$cluster\", instance=~\"$instance\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}", - "refId": "A" - } - ], - "thresholds": [ + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{pod}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ - ], - "timeFrom": null, - "timeShift": null, - "title": "Desired Shards", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + ], + "timeFrom": null, + "timeShift": null, + "title": "Transmit Bandwidth", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Shards", - "titleSize": "h6", - "type": "row" + ] }, { - "collapse": false, - "collapsed": false, + "collapse": true, + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 20 + }, + "id": 8, "panels": [ { "aliasColors": { @@ -27476,16 +27745,21 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, + "fill": 2, "fillGradient": 0, "gridPos": { - + "h": 10, + "w": 12, + "x": 0, + "y": 21 }, "id": 9, "legend": { "alignAsTable": false, "avg": false, "current": false, + "hideEmpty": true, + "hideZero": true, "max": false, "min": false, "rightSide": false, @@ -27495,11 +27769,13 @@ items: "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 2, "links": [ ], - "nullPointMode": "null", + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, @@ -27509,16 +27785,17 @@ items: ], "spaceLength": 10, - "span": 6, - "stack": false, + "span": 12, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "prometheus_remote_storage_shard_capacity{cluster=~\"$cluster\", instance=~\"$instance\"}", + "expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{pod}}", + "refId": "A", + "step": 10 } ], "thresholds": [ @@ -27526,10 +27803,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Shard Capacity", + "title": "Rate of Received Packets", "tooltip": { "shared": true, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -27544,19 +27821,19 @@ items: }, "yaxes": [ { - "format": "short", + "format": "pps", "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true }, { - "format": "short", + "format": "pps", "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true } ] @@ -27569,16 +27846,21 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, + "fill": 2, "fillGradient": 0, "gridPos": { - + "h": 10, + "w": 12, + "x": 12, + "y": 21 }, "id": 10, "legend": { "alignAsTable": false, "avg": false, "current": false, + "hideEmpty": true, + "hideZero": true, "max": false, "min": false, "rightSide": false, @@ -27588,11 +27870,13 @@ items: "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 2, "links": [ ], - "nullPointMode": "null", + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, @@ -27602,16 +27886,17 @@ items: ], "spaceLength": 10, - "span": 6, - "stack": false, + "span": 12, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "prometheus_remote_storage_pending_samples{cluster=~\"$cluster\", instance=~\"$instance\"} or prometheus_remote_storage_samples_pending{cluster=~\"$cluster\", instance=~\"$instance\"}", + "expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{pod}}", + "refId": "A", + "step": 10 } ], "thresholds": [ @@ -27619,10 +27904,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Pending Samples", + "title": "Rate of Transmitted Packets", "tooltip": { "shared": true, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -27637,19 +27922,19 @@ items: }, "yaxes": [ { - "format": "short", + "format": "pps", "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true }, { - "format": "short", + "format": "pps", "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true } ] @@ -27659,13 +27944,20 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Shard Details", + "title": "Packets", "titleSize": "h6", "type": "row" }, { - "collapse": false, - "collapsed": false, + "collapse": true, + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 21 + }, + "id": 11, "panels": [ { "aliasColors": { @@ -27675,16 +27967,21 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, + "fill": 2, "fillGradient": 0, "gridPos": { - + "h": 10, + "w": 12, + "x": 0, + "y": 32 }, - "id": 11, + "id": 12, "legend": { "alignAsTable": false, "avg": false, "current": false, + "hideEmpty": true, + "hideZero": true, "max": false, "min": false, "rightSide": false, @@ -27694,11 +27991,13 @@ items: "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 2, "links": [ ], - "nullPointMode": "null", + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, @@ -27708,16 +28007,17 @@ items: ], "spaceLength": 10, - "span": 6, - "stack": false, + "span": 12, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "prometheus_tsdb_wal_segment_current{cluster=~\"$cluster\", instance=~\"$instance\"}", + "expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cluster}}:{{instance}}", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{pod}}", + "refId": "A", + "step": 10 } ], "thresholds": [ @@ -27725,10 +28025,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "TSDB Current Segment", + "title": "Rate of Received Packets Dropped", "tooltip": { "shared": true, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -27743,19 +28043,19 @@ items: }, "yaxes": [ { - "format": "none", + "format": "pps", "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true }, { - "format": "short", + "format": "pps", "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true } ] @@ -27768,16 +28068,21 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, + "fill": 2, "fillGradient": 0, "gridPos": { - + "h": 10, + "w": 12, + "x": 12, + "y": 32 }, - "id": 12, + "id": 13, "legend": { "alignAsTable": false, "avg": false, "current": false, + "hideEmpty": true, + "hideZero": true, "max": false, "min": false, "rightSide": false, @@ -27787,11 +28092,13 @@ items: "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 2, "links": [ ], - "nullPointMode": "null", + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, @@ -27801,16 +28108,17 @@ items: ], "spaceLength": 10, - "span": 6, - "stack": false, + "span": 12, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "prometheus_wal_watcher_current_segment{cluster=~\"$cluster\", instance=~\"$instance\"}", + "expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cluster}}:{{instance}} {{consumer}}", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{pod}}", + "refId": "A", + "step": 10 } ], "thresholds": [ @@ -27818,10 +28126,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Remote Write Current Segment", + "title": "Rate of Transmitted Packets Dropped", "tooltip": { "shared": true, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -27836,19 +28144,19 @@ items: }, "yaxes": [ { - "format": "none", + "format": "pps", "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true }, { - "format": "short", + "format": "pps", "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true } ] @@ -27858,107 +28166,282 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Segments", + "title": "Errors", "titleSize": "h6", "type": "row" - }, - { - "collapse": false, - "collapsed": false, - "panels": [ - { - "aliasColors": { + } + ], + "refresh": "10s", + "rows": [ + + ], + "schemaVersion": 18, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(kube_pod_info, cluster)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "kube-system", + "value": "kube-system" + }, + "datasource": "$datasource", + "definition": "label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, namespace)", + "hide": 0, + "includeAll": true, + "label": null, + "multi": false, + "name": "namespace", + "options": [ + + ], + "query": "label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, namespace)", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [ - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "", + "value": "" + }, + "datasource": "$datasource", + "definition": "label_values(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}, pod)", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "pod", + "options": [ + + ], + "query": "label_values(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}, pod)", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [ + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "5m", + "value": "5m" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "resolution", + "options": [ + { + "selected": false, + "text": "30s", + "value": "30s" }, - "id": 13, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false + { + "selected": true, + "text": "5m", + "value": "5m" }, - "lines": true, - "linewidth": 1, - "links": [ + { + "selected": false, + "text": "1h", + "value": "1h" + } + ], + "query": "30s,5m,1h", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [ - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ + ], + "tagsQuery": "", + "type": "interval", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "5m", + "value": "5m" + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "interval", + "options": [ + { + "selected": true, + "text": "4h", + "value": "4h" + } + ], + "query": "4h", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [ - ], - "spaceLength": 10, - "span": 3, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "rate(prometheus_remote_storage_dropped_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) or rate(prometheus_remote_storage_samples_dropped_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}", - "refId": "A" - } - ], - "thresholds": [ + ], + "tagsQuery": "", + "type": "interval", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "UTC", + "title": "Kubernetes / Networking / Pod", + "uid": "7a18067ce943a40ae25454675c19ff5c", + "version": 0 + } + kind: ConfigMap + metadata: + labels: + app.kubernetes.io/component: grafana + app.kubernetes.io/name: grafana + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 7.5.4 + name: grafana-dashboard-pod-total + namespace: monitoring +- apiVersion: v1 + data: + prometheus-remote-write.json: |- + { + "__inputs": [ - ], - "timeFrom": null, - "timeShift": null, - "title": "Dropped Samples", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + ], + "__requires": [ - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, + ], + "annotations": { + "list": [ + + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ + + ], + "refresh": "60s", + "rows": [ + { + "collapse": false, + "collapsed": false, + "panels": [ { "aliasColors": { @@ -27972,7 +28455,7 @@ items: "gridPos": { }, - "id": 14, + "id": 2, "legend": { "alignAsTable": false, "avg": false, @@ -28000,12 +28483,12 @@ items: ], "spaceLength": 10, - "span": 3, + "span": 6, "stack": false, "steppedLine": false, "targets": [ { - "expr": "rate(prometheus_remote_storage_failed_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])", + "expr": "(\n prometheus_remote_storage_highest_timestamp_in_seconds{cluster=~\"$cluster\", instance=~\"$instance\"} \n- \n ignoring(remote_name, url) group_right(instance) (prometheus_remote_storage_queue_highest_sent_timestamp_seconds{cluster=~\"$cluster\", instance=~\"$instance\"} != 0)\n)\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}", @@ -28017,7 +28500,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Failed Samples", + "title": "Highest Timestamp In vs. Highest Timestamp Sent", "tooltip": { "shared": true, "sort": 0, @@ -28065,7 +28548,7 @@ items: "gridPos": { }, - "id": 15, + "id": 3, "legend": { "alignAsTable": false, "avg": false, @@ -28093,12 +28576,12 @@ items: ], "spaceLength": 10, - "span": 3, + "span": 6, "stack": false, "steppedLine": false, "targets": [ { - "expr": "rate(prometheus_remote_storage_retried_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) or rate(prometheus_remote_storage_samples_retried_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])", + "expr": "clamp_min(\n rate(prometheus_remote_storage_highest_timestamp_in_seconds{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) \n- \n ignoring (remote_name, url) group_right(instance) rate(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])\n, 0)\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}", @@ -28110,7 +28593,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Retried Samples", + "title": "Rate[5m]", "tooltip": { "shared": true, "sort": 0, @@ -28144,7 +28627,20 @@ items: "show": true } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Timestamps", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ { "aliasColors": { @@ -28158,7 +28654,7 @@ items: "gridPos": { }, - "id": 16, + "id": 4, "legend": { "alignAsTable": false, "avg": false, @@ -28186,12 +28682,12 @@ items: ], "spaceLength": 10, - "span": 3, + "span": 12, "stack": false, "steppedLine": false, "targets": [ { - "expr": "rate(prometheus_remote_storage_enqueue_retries_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])", + "expr": "rate(\n prometheus_remote_storage_samples_in_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])\n- \n ignoring(remote_name, url) group_right(instance) (rate(prometheus_remote_storage_succeeded_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) or rate(prometheus_remote_storage_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]))\n- \n (rate(prometheus_remote_storage_dropped_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) or rate(prometheus_remote_storage_samples_dropped_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}", @@ -28203,7 +28699,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Enqueue Retries", + "title": "Rate, in vs. succeeded or dropped [5m]", "tooltip": { "shared": true, "sort": 0, @@ -28243,192 +28739,108 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Misc. Rates", + "title": "Samples", "titleSize": "h6", "type": "row" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "prometheus-mixin" - ], - "templating": { - "list": [ - { - "hide": 0, - "label": null, - "name": "datasource", - "options": [ + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { - ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": null, - "current": { - "text": { - "selected": true, - "text": "All", - "value": "$__all" }, - "value": { - "selected": true, - "text": "All", - "value": "$__all" - } - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": null, - "multi": false, - "name": "instance", - "options": [ - - ], - "query": "label_values(prometheus_build_info, instance)", - "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { - ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": { - "selected": true, - "text": "All", - "value": "$__all" }, - "value": { - "selected": true, - "text": "All", - "value": "$__all" - } - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": null, - "multi": false, - "name": "cluster", - "options": [ - - ], - "query": "label_values(kube_pod_container_info{image=~\".*prometheus.*\"}, cluster)", - "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": null, - "multi": false, - "name": "url", - "options": [ - - ], - "query": "label_values(prometheus_remote_storage_shards{cluster=~\"$cluster\", instance=~\"$instance\"}, url)", - "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ + "id": 5, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ - ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-6h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "browser", - "title": "Prometheus / Remote Write", - "version": 0 - } - kind: ConfigMap - metadata: - labels: - app.kubernetes.io/component: grafana - app.kubernetes.io/name: grafana - app.kubernetes.io/part-of: kube-prometheus - app.kubernetes.io/version: 7.5.4 - name: grafana-dashboard-prometheus-remote-write - namespace: monitoring -- apiVersion: v1 - data: - prometheus.json: |- - { - "annotations": { - "list": [ + ], + "minSpan": 6, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ - ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "links": [ + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "prometheus_remote_storage_shards{cluster=~\"$cluster\", instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}", + "refId": "A" + } + ], + "thresholds": [ - ], - "refresh": "60s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ + ], + "timeFrom": null, + "timeShift": null, + "title": "Current Shards", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, { "aliasColors": { @@ -28438,13 +28850,20 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 6, "legend": { + "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, + "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -28453,154 +28872,119 @@ items: "links": [ ], - "nullPointMode": "null as zero", + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 12, + "span": 4, "stack": false, "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "Count", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ - - ], - "type": "hidden", - "unit": "short" - }, - { - "alias": "Uptime", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #B", - "thresholds": [ - - ], - "type": "number", - "unit": "short" - }, + "targets": [ { - "alias": "Instance", - "colorMode": null, - "colors": [ + "expr": "prometheus_remote_storage_shards_max{cluster=~\"$cluster\", instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}", + "refId": "A" + } + ], + "thresholds": [ - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "instance", - "thresholds": [ + ], + "timeFrom": null, + "timeShift": null, + "title": "Max Shards", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ - ], - "type": "number", - "unit": "short" - }, + ] + }, + "yaxes": [ { - "alias": "Job", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "job", - "thresholds": [ - - ], - "type": "number", - "unit": "short" + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true }, { - "alias": "Version", - "colorMode": null, - "colors": [ + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "version", - "thresholds": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { - ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ + }, + "id": 7, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ - ], - "type": "string", - "unit": "short" - } ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "count by (job, instance, version) (prometheus_build_info{job=~\"$job\", instance=~\"$instance\"})", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "A", - "step": 10 - }, - { - "expr": "max by (job, instance) (time() - process_start_time_seconds{job=~\"$job\", instance=~\"$instance\"})", - "format": "table", - "instant": true, + "expr": "prometheus_remote_storage_shards_min{cluster=~\"$cluster\", instance=~\"$instance\"}", + "format": "time_series", "intervalFactor": 2, - "legendFormat": "", - "refId": "B", - "step": 10 + "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}", + "refId": "A" } ], "thresholds": [ @@ -28608,14 +28992,13 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Prometheus Stats", + "title": "Min Shards", "tooltip": { - "shared": false, + "shared": true, "sort": 0, "value_type": "individual" }, - "transform": "table", - "type": "table", + "type": "graph", "xaxis": { "buckets": null, "mode": "time", @@ -28631,7 +29014,7 @@ items: "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true }, { @@ -28640,22 +29023,10 @@ items: "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Prometheus Stats", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ + }, { "aliasColors": { @@ -28665,13 +29036,20 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 2, + "fillGradient": 0, + "gridPos": { + + }, + "id": 8, "legend": { + "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, + "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -28680,26 +29058,26 @@ items: "links": [ ], - "nullPointMode": "null as zero", + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 6, + "span": 4, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(prometheus_target_sync_length_seconds_sum{job=~\"$job\",instance=~\"$instance\"}[5m])) by (scrape_job) * 1e3", + "expr": "prometheus_remote_storage_shards_desired{cluster=~\"$cluster\", instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{scrape_job}}", - "legendLink": null, - "step": 10 + "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}", + "refId": "A" } ], "thresholds": [ @@ -28707,9 +29085,9 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Target Sync", + "title": "Desired Shards", "tooltip": { - "shared": false, + "shared": true, "sort": 0, "value_type": "individual" }, @@ -28725,11 +29103,11 @@ items: }, "yaxes": [ { - "format": "ms", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true }, { @@ -28738,10 +29116,23 @@ items: "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Shards", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ { "aliasColors": { @@ -28750,42 +29141,49 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 10, - "id": 3, + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 9, "legend": { + "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, + "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, "lines": true, - "linewidth": 0, + "linewidth": 1, "links": [ ], - "nullPointMode": "null as zero", + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 6, - "stack": true, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(prometheus_sd_discovered_targets{job=~\"$job\",instance=~\"$instance\"})", + "expr": "prometheus_remote_storage_shard_capacity{cluster=~\"$cluster\", instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, - "legendFormat": "Targets", - "legendLink": null, - "step": 10 + "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}", + "refId": "A" } ], "thresholds": [ @@ -28793,9 +29191,9 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Targets", + "title": "Shard Capacity", "tooltip": { - "shared": false, + "shared": true, "sort": 0, "value_type": "individual" }, @@ -28815,7 +29213,7 @@ items: "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true }, { @@ -28824,22 +29222,10 @@ items: "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Discovery", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ + }, { "aliasColors": { @@ -28849,13 +29235,20 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 4, + "fillGradient": 0, + "gridPos": { + + }, + "id": 10, "legend": { + "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, + "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -28864,26 +29257,26 @@ items: "links": [ ], - "nullPointMode": "null as zero", + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 4, + "span": 6, "stack": false, "steppedLine": false, "targets": [ { - "expr": "rate(prometheus_target_interval_length_seconds_sum{job=~\"$job\",instance=~\"$instance\"}[5m]) / rate(prometheus_target_interval_length_seconds_count{job=~\"$job\",instance=~\"$instance\"}[5m]) * 1e3", + "expr": "prometheus_remote_storage_pending_samples{cluster=~\"$cluster\", instance=~\"$instance\"} or prometheus_remote_storage_samples_pending{cluster=~\"$cluster\", instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{interval}} configured", - "legendLink": null, - "step": 10 + "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}", + "refId": "A" } ], "thresholds": [ @@ -28891,9 +29284,9 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Average Scrape Interval Duration", + "title": "Pending Samples", "tooltip": { - "shared": false, + "shared": true, "sort": 0, "value_type": "individual" }, @@ -28909,11 +29302,11 @@ items: }, "yaxes": [ { - "format": "ms", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true }, { @@ -28922,10 +29315,23 @@ items: "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Shard Details", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ { "aliasColors": { @@ -28934,66 +29340,49 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 10, - "id": 5, + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 11, "legend": { + "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, + "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, "lines": true, - "linewidth": 0, + "linewidth": 1, "links": [ ], - "nullPointMode": "null as zero", + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 4, - "stack": true, + "span": 6, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum by (job) (rate(prometheus_target_scrapes_exceeded_sample_limit_total[1m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "exceeded sample limit: {{job}}", - "legendLink": null, - "step": 10 - }, - { - "expr": "sum by (job) (rate(prometheus_target_scrapes_sample_duplicate_timestamp_total[1m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "duplicate timestamp: {{job}}", - "legendLink": null, - "step": 10 - }, - { - "expr": "sum by (job) (rate(prometheus_target_scrapes_sample_out_of_bounds_total[1m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "out of bounds: {{job}}", - "legendLink": null, - "step": 10 - }, - { - "expr": "sum by (job) (rate(prometheus_target_scrapes_sample_out_of_order_total[1m]))", + "expr": "prometheus_tsdb_wal_segment_current{cluster=~\"$cluster\", instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, - "legendFormat": "out of order: {{job}}", - "legendLink": null, - "step": 10 + "legendFormat": "{{cluster}}:{{instance}}", + "refId": "A" } ], "thresholds": [ @@ -29001,9 +29390,9 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Scrape failures", + "title": "TSDB Current Segment", "tooltip": { - "shared": false, + "shared": true, "sort": 0, "value_type": "individual" }, @@ -29019,11 +29408,11 @@ items: }, "yaxes": [ { - "format": "short", + "format": "none", "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true }, { @@ -29032,7 +29421,7 @@ items: "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ] }, @@ -29044,42 +29433,49 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 10, - "id": 6, + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 12, "legend": { + "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, + "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, "lines": true, - "linewidth": 0, + "linewidth": 1, "links": [ ], - "nullPointMode": "null as zero", + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 4, - "stack": true, + "span": 6, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "rate(prometheus_tsdb_head_samples_appended_total{job=~\"$job\",instance=~\"$instance\"}[5m])", + "expr": "prometheus_wal_watcher_current_segment{cluster=~\"$cluster\", instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{job}} {{instance}}", - "legendLink": null, - "step": 10 + "legendFormat": "{{cluster}}:{{instance}} {{consumer}}", + "refId": "A" } ], "thresholds": [ @@ -29087,9 +29483,9 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Appended Samples", + "title": "Remote Write Current Segment", "tooltip": { - "shared": false, + "shared": true, "sort": 0, "value_type": "individual" }, @@ -29105,11 +29501,11 @@ items: }, "yaxes": [ { - "format": "short", + "format": "none", "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true }, { @@ -29118,7 +29514,7 @@ items: "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ] } @@ -29127,12 +29523,13 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Retrieval", - "titleSize": "h6" + "title": "Segments", + "titleSize": "h6", + "type": "row" }, { "collapse": false, - "height": "250px", + "collapsed": false, "panels": [ { "aliasColors": { @@ -29142,42 +29539,49 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 10, - "id": 7, + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 13, "legend": { + "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, + "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, "lines": true, - "linewidth": 0, + "linewidth": 1, "links": [ ], - "nullPointMode": "null as zero", + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 6, - "stack": true, + "span": 3, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "prometheus_tsdb_head_series{job=~\"$job\",instance=~\"$instance\"}", + "expr": "rate(prometheus_remote_storage_dropped_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) or rate(prometheus_remote_storage_samples_dropped_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{job}} {{instance}} head series", - "legendLink": null, - "step": 10 + "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}", + "refId": "A" } ], "thresholds": [ @@ -29185,9 +29589,9 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Head Series", + "title": "Dropped Samples", "tooltip": { - "shared": false, + "shared": true, "sort": 0, "value_type": "individual" }, @@ -29207,7 +29611,7 @@ items: "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true }, { @@ -29216,7 +29620,7 @@ items: "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ] }, @@ -29228,42 +29632,49 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 10, - "id": 8, + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 14, "legend": { + "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, + "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, "lines": true, - "linewidth": 0, + "linewidth": 1, "links": [ ], - "nullPointMode": "null as zero", + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 6, - "stack": true, + "span": 3, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "prometheus_tsdb_head_chunks{job=~\"$job\",instance=~\"$instance\"}", + "expr": "rate(prometheus_remote_storage_failed_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{job}} {{instance}} head chunks", - "legendLink": null, - "step": 10 + "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}", + "refId": "A" } ], "thresholds": [ @@ -29271,9 +29682,9 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Head Chunks", + "title": "Failed Samples", "tooltip": { - "shared": false, + "shared": true, "sort": 0, "value_type": "individual" }, @@ -29293,7 +29704,7 @@ items: "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true }, { @@ -29302,22 +29713,10 @@ items: "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Storage", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ + }, { "aliasColors": { @@ -29326,42 +29725,49 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 10, - "id": 9, + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 15, "legend": { + "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, + "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, "lines": true, - "linewidth": 0, + "linewidth": 1, "links": [ ], - "nullPointMode": "null as zero", + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 6, - "stack": true, + "span": 3, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "rate(prometheus_engine_query_duration_seconds_count{job=~\"$job\",instance=~\"$instance\",slice=\"inner_eval\"}[5m])", + "expr": "rate(prometheus_remote_storage_retried_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) or rate(prometheus_remote_storage_samples_retried_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{job}} {{instance}}", - "legendLink": null, - "step": 10 + "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}", + "refId": "A" } ], "thresholds": [ @@ -29369,9 +29775,9 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Query Rate", + "title": "Retried Samples", "tooltip": { - "shared": false, + "shared": true, "sort": 0, "value_type": "individual" }, @@ -29391,7 +29797,7 @@ items: "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true }, { @@ -29400,7 +29806,7 @@ items: "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ] }, @@ -29412,42 +29818,49 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 10, - "id": 10, + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 16, "legend": { + "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, + "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, "lines": true, - "linewidth": 0, + "linewidth": 1, "links": [ ], - "nullPointMode": "null as zero", + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 6, - "stack": true, + "span": 3, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "max by (slice) (prometheus_engine_query_duration_seconds{quantile=\"0.9\",job=~\"$job\",instance=~\"$instance\"}) * 1e3", + "expr": "rate(prometheus_remote_storage_enqueue_retries_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{slice}}", - "legendLink": null, - "step": 10 + "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}", + "refId": "A" } ], "thresholds": [ @@ -29455,9 +29868,9 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Stage Duration", + "title": "Enqueue Retries", "tooltip": { - "shared": false, + "shared": true, "sort": 0, "value_type": "individual" }, @@ -29473,11 +29886,11 @@ items: }, "yaxes": [ { - "format": "ms", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true }, { @@ -29486,7 +29899,7 @@ items: "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ] } @@ -29495,8 +29908,9 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Query", - "titleSize": "h6" + "title": "Misc. Rates", + "titleSize": "h6", + "type": "row" } ], "schemaVersion": 14, @@ -29507,10 +29921,6 @@ items: "templating": { "list": [ { - "current": { - "text": "default", - "value": "default" - }, "hide": 0, "label": null, "name": "datasource", @@ -29523,25 +29933,67 @@ items: "type": "datasource" }, { - "allValue": ".+", + "allValue": null, "current": { - "selected": true, - "text": "All", - "value": "$__all" + "text": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "value": { + "selected": true, + "text": "All", + "value": "$__all" + } + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": null, + "multi": false, + "name": "instance", + "options": [ + + ], + "query": "label_values(prometheus_build_info, instance)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "value": { + "selected": true, + "text": "All", + "value": "$__all" + } }, "datasource": "$datasource", "hide": 0, "includeAll": true, - "label": "job", - "multi": true, - "name": "job", + "label": null, + "multi": false, + "name": "cluster", "options": [ ], - "query": "label_values(prometheus_build_info, job)", - "refresh": 1, + "query": "label_values(kube_pod_container_info{image=~\".*prometheus.*\"}, cluster)", + "refresh": 2, "regex": "", - "sort": 2, + "sort": 0, "tagValuesQuery": "", "tags": [ @@ -29551,25 +30003,23 @@ items: "useTags": false }, { - "allValue": ".+", + "allValue": null, "current": { - "selected": true, - "text": "All", - "value": "$__all" + }, "datasource": "$datasource", "hide": 0, "includeAll": true, - "label": "instance", - "multi": true, - "name": "instance", + "label": null, + "multi": false, + "name": "url", "options": [ ], - "query": "label_values(prometheus_build_info, instance)", - "refresh": 1, + "query": "label_values(prometheus_remote_storage_shards{cluster=~\"$cluster\", instance=~\"$instance\"}, url)", + "refresh": 2, "regex": "", - "sort": 2, + "sort": 0, "tagValuesQuery": "", "tags": [ @@ -29581,7 +30031,7 @@ items: ] }, "time": { - "from": "now-1h", + "from": "now-6h", "to": "now" }, "timepicker": { @@ -29609,9 +30059,8 @@ items: "30d" ] }, - "timezone": "utc", - "title": "Prometheus / Overview", - "uid": "", + "timezone": "browser", + "title": "Prometheus / Remote Write", "version": 0 } kind: ConfigMap @@ -29621,172 +30070,202 @@ items: app.kubernetes.io/name: grafana app.kubernetes.io/part-of: kube-prometheus app.kubernetes.io/version: 7.5.4 - name: grafana-dashboard-prometheus + name: grafana-dashboard-prometheus-remote-write namespace: monitoring - apiVersion: v1 data: - proxy.json: |- + prometheus.json: |- { - "__inputs": [ - - ], - "__requires": [ - - ], "annotations": { "list": [ ] }, - "editable": false, + "editable": true, "gnetId": null, "graphTooltip": 0, "hideControls": false, - "id": null, "links": [ ], - "refresh": "10s", + "refresh": "60s", "rows": [ { "collapse": false, - "collapsed": false, + "height": "250px", "panels": [ { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { + "aliasColors": { }, - "id": 2, - "interval": null, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, "links": [ ], - "mappingType": 1, - "mappingTypes": [ + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ { - "name": "value to text", - "value": 1 + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" }, { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ + "alias": "Count", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "hidden", + "unit": "short" + }, { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 2, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ + "alias": "Uptime", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, { - "expr": "sum(up{cluster=\"$cluster\", job=\"kube-proxy\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": "", - "title": "Up", - "tooltip": { - "shared": false - }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ + "alias": "Instance", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "instance", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "min" - }, - { - "aliasColors": { + "alias": "Job", + "colorMode": null, + "colors": [ - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "job", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "Version", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "version", + "thresholds": [ - }, - "id": 3, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + ], + "type": "string", + "unit": "short" + } ], - "spaceLength": 10, - "span": 5, - "stack": false, - "steppedLine": false, "targets": [ { - "expr": "sum(rate(kubeproxy_sync_proxy_rules_duration_seconds_count{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\"}[5m]))", - "format": "time_series", + "expr": "count by (job, instance, version) (prometheus_build_info{job=~\"$job\", instance=~\"$instance\"})", + "format": "table", + "instant": true, "intervalFactor": 2, - "legendFormat": "rate", - "refId": "A" + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "max by (job, instance) (time() - process_start_time_seconds{job=~\"$job\", instance=~\"$instance\"})", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 } ], "thresholds": [ @@ -29794,13 +30273,14 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Rules Sync Rate", + "title": "Prometheus Stats", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "type": "graph", + "transform": "table", + "type": "table", "xaxis": { "buckets": null, "mode": "time", @@ -29812,7 +30292,7 @@ items: }, "yaxes": [ { - "format": "ops", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -29820,15 +30300,27 @@ items: "show": true }, { - "format": "ops", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": 0, - "show": true + "min": null, + "show": false } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Prometheus Stats", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ { "aliasColors": { @@ -29838,48 +30330,41 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, - "fillGradient": 0, - "gridPos": { - - }, - "id": 4, + "id": 2, "legend": { - "alignAsTable": true, "avg": false, - "current": true, + "current": false, "max": false, "min": false, - "rightSide": true, "show": true, - "sideWidth": null, "total": false, - "values": true + "values": false }, "lines": true, "linewidth": 1, "links": [ ], - "nullPointMode": "null", + "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 5, + "span": 6, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99,rate(kubeproxy_sync_proxy_rules_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\"}[5m]))", + "expr": "sum(rate(prometheus_target_sync_length_seconds_sum{job=~\"$job\",instance=~\"$instance\"}[5m])) by (scrape_job) * 1e3", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "A" + "legendFormat": "{{scrape_job}}", + "legendLink": null, + "step": 10 } ], "thresholds": [ @@ -29887,7 +30372,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Rule Sync Latency 99th Quantile", + "title": "Target Sync", "tooltip": { "shared": false, "sort": 0, @@ -29905,7 +30390,7 @@ items: }, "yaxes": [ { - "format": "s", + "format": "ms", "label": null, "logBase": 1, "max": null, @@ -29913,28 +30398,15 @@ items: "show": true }, { - "format": "s", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": 0, - "show": true + "min": null, + "show": false } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": false, - "collapsed": false, - "panels": [ + }, { "aliasColors": { @@ -29943,49 +30415,42 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - - }, - "id": 5, + "fill": 10, + "id": 3, "legend": { - "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, - "rightSide": false, "show": true, - "sideWidth": null, "total": false, "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], - "nullPointMode": "null", + "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 6, - "stack": false, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(rate(kubeproxy_network_programming_duration_seconds_count{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\"}[5m]))", + "expr": "sum(prometheus_sd_discovered_targets{job=~\"$job\",instance=~\"$instance\"})", "format": "time_series", "intervalFactor": 2, - "legendFormat": "rate", - "refId": "A" + "legendFormat": "Targets", + "legendLink": null, + "step": 10 } ], "thresholds": [ @@ -29993,7 +30458,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Network Programming Rate", + "title": "Targets", "tooltip": { "shared": false, "sort": 0, @@ -30011,7 +30476,7 @@ items: }, "yaxes": [ { - "format": "ops", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -30019,15 +30484,27 @@ items: "show": true }, { - "format": "ops", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": 0, - "show": true + "min": null, + "show": false } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Discovery", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ { "aliasColors": { @@ -30037,48 +30514,41 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, - "fillGradient": 0, - "gridPos": { - - }, - "id": 6, + "id": 4, "legend": { - "alignAsTable": true, "avg": false, - "current": true, + "current": false, "max": false, "min": false, - "rightSide": true, "show": true, - "sideWidth": null, "total": false, - "values": true + "values": false }, "lines": true, "linewidth": 1, "links": [ ], - "nullPointMode": "null", + "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 6, + "span": 4, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(kubeproxy_network_programming_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\"}[5m])) by (instance, le))", + "expr": "rate(prometheus_target_interval_length_seconds_sum{job=~\"$job\",instance=~\"$instance\"}[5m]) / rate(prometheus_target_interval_length_seconds_count{job=~\"$job\",instance=~\"$instance\"}[5m]) * 1e3", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "A" + "legendFormat": "{{interval}} configured", + "legendLink": null, + "step": 10 } ], "thresholds": [ @@ -30086,7 +30556,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Network Programming Latency 99th Quantile", + "title": "Average Scrape Interval Duration", "tooltip": { "shared": false, "sort": 0, @@ -30104,7 +30574,7 @@ items: }, "yaxes": [ { - "format": "s", + "format": "ms", "label": null, "logBase": 1, "max": null, @@ -30112,100 +30582,83 @@ items: "show": true }, { - "format": "s", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": 0, - "show": true + "min": null, + "show": false } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": false, - "collapsed": false, - "panels": [ + }, { "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - - }, - "id": 7, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 5, "legend": { - "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, - "rightSide": false, "show": true, - "sideWidth": null, "total": false, "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], - "nullPointMode": "null", + "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 4, - "stack": false, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\",code=~\"2..\"}[5m]))", + "expr": "sum by (job) (rate(prometheus_target_scrapes_exceeded_sample_limit_total[1m]))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "2xx", - "refId": "A" + "legendFormat": "exceeded sample limit: {{job}}", + "legendLink": null, + "step": 10 }, { - "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\",code=~\"3..\"}[5m]))", + "expr": "sum by (job) (rate(prometheus_target_scrapes_sample_duplicate_timestamp_total[1m]))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "3xx", - "refId": "B" + "legendFormat": "duplicate timestamp: {{job}}", + "legendLink": null, + "step": 10 }, { - "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\",code=~\"4..\"}[5m]))", + "expr": "sum by (job) (rate(prometheus_target_scrapes_sample_out_of_bounds_total[1m]))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "4xx", - "refId": "C" + "legendFormat": "out of bounds: {{job}}", + "legendLink": null, + "step": 10 }, { - "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\",code=~\"5..\"}[5m]))", + "expr": "sum by (job) (rate(prometheus_target_scrapes_sample_out_of_order_total[1m]))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "5xx", - "refId": "D" + "legendFormat": "out of order: {{job}}", + "legendLink": null, + "step": 10 } ], "thresholds": [ @@ -30213,7 +30666,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Kube API Request Rate", + "title": "Scrape failures", "tooltip": { "shared": false, "sort": 0, @@ -30231,20 +30684,20 @@ items: }, "yaxes": [ { - "format": "ops", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true }, { - "format": "ops", + "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": true + "show": false } ] }, @@ -30256,49 +30709,42 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - - }, - "id": 8, + "fill": 10, + "id": 6, "legend": { - "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, - "rightSide": false, "show": true, - "sideWidth": null, "total": false, "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], - "nullPointMode": "null", + "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 8, - "stack": false, + "span": 4, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\",instance=~\"$instance\",verb=\"POST\"}[5m])) by (verb, url, le))", + "expr": "rate(prometheus_tsdb_head_samples_appended_total{job=~\"$job\",instance=~\"$instance\"}[5m])", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{verb}} {{url}}", - "refId": "A" + "legendFormat": "{{job}} {{instance}}", + "legendLink": null, + "step": 10 } ], "thresholds": [ @@ -30306,7 +30752,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Post Request Latency 99th Quantile", + "title": "Appended Samples", "tooltip": { "shared": false, "sort": 0, @@ -30324,7 +30770,7 @@ items: }, "yaxes": [ { - "format": "s", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -30332,12 +30778,12 @@ items: "show": true }, { - "format": "s", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": 0, - "show": true + "min": null, + "show": false } ] } @@ -30345,14 +30791,13 @@ items: "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" + "showTitle": true, + "title": "Retrieval", + "titleSize": "h6" }, { "collapse": false, - "collapsed": false, + "height": "250px", "panels": [ { "aliasColors": { @@ -30362,49 +30807,42 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - - }, - "id": 9, + "fill": 10, + "id": 7, "legend": { - "alignAsTable": true, "avg": false, - "current": true, + "current": false, "max": false, "min": false, - "rightSide": true, "show": true, - "sideWidth": null, "total": false, - "values": true + "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], - "nullPointMode": "null", + "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 12, - "stack": false, + "span": 6, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\", verb=\"GET\"}[5m])) by (verb, url, le))", + "expr": "prometheus_tsdb_head_series{job=~\"$job\",instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{verb}} {{url}}", - "refId": "A" + "legendFormat": "{{job}} {{instance}} head series", + "legendLink": null, + "step": 10 } ], "thresholds": [ @@ -30412,7 +30850,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Get Request Latency 99th Quantile", + "title": "Head Series", "tooltip": { "shared": false, "sort": 0, @@ -30430,7 +30868,7 @@ items: }, "yaxes": [ { - "format": "s", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -30438,28 +30876,15 @@ items: "show": true }, { - "format": "s", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": 0, - "show": true + "min": null, + "show": false } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": false, - "collapsed": false, - "panels": [ + }, { "aliasColors": { @@ -30468,49 +30893,42 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - - }, - "id": 10, + "fill": 10, + "id": 8, "legend": { - "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, - "rightSide": false, "show": true, - "sideWidth": null, "total": false, "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], - "nullPointMode": "null", + "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 4, - "stack": false, + "span": 6, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "process_resident_memory_bytes{cluster=\"$cluster\", job=\"kube-proxy\",instance=~\"$instance\"}", + "expr": "prometheus_tsdb_head_chunks{job=~\"$job\",instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "A" + "legendFormat": "{{job}} {{instance}} head chunks", + "legendLink": null, + "step": 10 } ], "thresholds": [ @@ -30518,7 +30936,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Memory", + "title": "Head Chunks", "tooltip": { "shared": false, "sort": 0, @@ -30536,23 +30954,35 @@ items: }, "yaxes": [ { - "format": "bytes", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true }, { - "format": "bytes", + "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": true + "show": false } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Storage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ { "aliasColors": { @@ -30561,49 +30991,42 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - - }, - "id": 11, + "fill": 10, + "id": 9, "legend": { - "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, - "rightSide": false, "show": true, - "sideWidth": null, "total": false, "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], - "nullPointMode": "null", + "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 4, - "stack": false, + "span": 6, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\", job=\"kube-proxy\",instance=~\"$instance\"}[5m])", + "expr": "rate(prometheus_engine_query_duration_seconds_count{job=~\"$job\",instance=~\"$instance\",slice=\"inner_eval\"}[5m])", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "A" + "legendFormat": "{{job}} {{instance}}", + "legendLink": null, + "step": 10 } ], "thresholds": [ @@ -30611,7 +31034,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "CPU usage", + "title": "Query Rate", "tooltip": { "shared": false, "sort": 0, @@ -30641,8 +31064,8 @@ items: "label": null, "logBase": 1, "max": null, - "min": 0, - "show": true + "min": null, + "show": false } ] }, @@ -30654,49 +31077,42 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - - }, - "id": 12, + "fill": 10, + "id": 10, "legend": { - "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, - "rightSide": false, "show": true, - "sideWidth": null, "total": false, "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], - "nullPointMode": "null", + "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 4, - "stack": false, + "span": 6, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "go_goroutines{cluster=\"$cluster\", job=\"kube-proxy\",instance=~\"$instance\"}", + "expr": "max by (slice) (prometheus_engine_query_duration_seconds{quantile=\"0.9\",job=~\"$job\",instance=~\"$instance\"}) * 1e3", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "A" + "legendFormat": "{{slice}}", + "legendLink": null, + "step": 10 } ], "thresholds": [ @@ -30704,7 +31120,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Goroutines", + "title": "Stage Duration", "tooltip": { "shared": false, "sort": 0, @@ -30722,11 +31138,11 @@ items: }, "yaxes": [ { - "format": "short", + "format": "ms", "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true }, { @@ -30735,7 +31151,7 @@ items: "logBase": 1, "max": null, "min": null, - "show": true + "show": false } ] } @@ -30743,16 +31159,15 @@ items: "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" + "showTitle": true, + "title": "Query", + "titleSize": "h6" } ], "schemaVersion": 14, "style": "dark", "tags": [ - "kubernetes-mixin" + "prometheus-mixin" ], "templating": { "list": [ @@ -30773,23 +31188,25 @@ items: "type": "datasource" }, { - "allValue": null, + "allValue": ".+", "current": { - + "selected": true, + "text": "All", + "value": "$__all" }, "datasource": "$datasource", - "hide": 2, - "includeAll": false, - "label": "cluster", - "multi": false, - "name": "cluster", + "hide": 0, + "includeAll": true, + "label": "job", + "multi": true, + "name": "job", "options": [ ], - "query": "label_values(kube_pod_info, cluster)", - "refresh": 2, + "query": "label_values(prometheus_build_info, job)", + "refresh": 1, "regex": "", - "sort": 1, + "sort": 2, "tagValuesQuery": "", "tags": [ @@ -30799,23 +31216,25 @@ items: "useTags": false }, { - "allValue": null, + "allValue": ".+", "current": { - + "selected": true, + "text": "All", + "value": "$__all" }, "datasource": "$datasource", "hide": 0, "includeAll": true, - "label": null, - "multi": false, + "label": "instance", + "multi": true, "name": "instance", "options": [ ], - "query": "label_values(kubeproxy_network_programming_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\"}, instance)", - "refresh": 2, + "query": "label_values(prometheus_build_info, instance)", + "refresh": 1, "regex": "", - "sort": 1, + "sort": 2, "tagValuesQuery": "", "tags": [ @@ -30855,9 +31274,9 @@ items: "30d" ] }, - "timezone": "UTC", - "title": "Kubernetes / Proxy", - "uid": "632e265de029684c40b21cb76bca4f94", + "timezone": "utc", + "title": "Prometheus / Overview", + "uid": "", "version": 0 } kind: ConfigMap @@ -30867,11 +31286,11 @@ items: app.kubernetes.io/name: grafana app.kubernetes.io/part-of: kube-prometheus app.kubernetes.io/version: 7.5.4 - name: grafana-dashboard-proxy + name: grafana-dashboard-prometheus namespace: monitoring - apiVersion: v1 data: - scheduler.json: |- + proxy.json: |- { "__inputs": [ @@ -30959,7 +31378,7 @@ items: "tableColumn": "", "targets": [ { - "expr": "sum(up{cluster=\"$cluster\", job=\"kube-scheduler\"})", + "expr": "sum(up{cluster=\"$cluster\", job=\"kube-proxy\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "", @@ -30975,13 +31394,212 @@ items: "valueFontSize": "80%", "valueMaps": [ { - "op": "=", - "text": "N/A", - "value": "null" + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "min" + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 3, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 5, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(kubeproxy_sync_proxy_rules_duration_seconds_count{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "rate", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rules Sync Rate", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 4, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 5, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99,rate(kubeproxy_sync_proxy_rules_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rule Sync Latency 99th Quantile", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true } - ], - "valueName": "min" - }, + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ { "aliasColors": { @@ -30995,18 +31613,18 @@ items: "gridPos": { }, - "id": 3, + "id": 5, "legend": { - "alignAsTable": true, + "alignAsTable": false, "avg": false, - "current": true, + "current": false, "max": false, "min": false, - "rightSide": true, + "rightSide": false, "show": true, "sideWidth": null, "total": false, - "values": true + "values": false }, "lines": true, "linewidth": 1, @@ -31023,37 +31641,16 @@ items: ], "spaceLength": 10, - "span": 5, + "span": 6, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(scheduler_e2e_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (instance)", + "expr": "sum(rate(kubeproxy_network_programming_duration_seconds_count{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\"}[5m]))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}} e2e", + "legendFormat": "rate", "refId": "A" - }, - { - "expr": "sum(rate(scheduler_binding_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (instance)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} binding", - "refId": "B" - }, - { - "expr": "sum(rate(scheduler_scheduling_algorithm_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (instance)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} scheduling algorithm", - "refId": "C" - }, - { - "expr": "sum(rate(scheduler_volume_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (instance)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} volume", - "refId": "D" } ], "thresholds": [ @@ -31061,7 +31658,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Scheduling Rate", + "title": "Network Programming Rate", "tooltip": { "shared": false, "sort": 0, @@ -31109,7 +31706,7 @@ items: "gridPos": { }, - "id": 4, + "id": 6, "legend": { "alignAsTable": true, "avg": false, @@ -31137,37 +31734,16 @@ items: ], "spaceLength": 10, - "span": 5, + "span": 6, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (instance, le))", + "expr": "histogram_quantile(0.99, sum(rate(kubeproxy_network_programming_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\"}[5m])) by (instance, le))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}} e2e", + "legendFormat": "{{instance}}", "refId": "A" - }, - { - "expr": "histogram_quantile(0.99, sum(rate(scheduler_binding_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (instance, le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} binding", - "refId": "B" - }, - { - "expr": "histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (instance, le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} scheduling algorithm", - "refId": "C" - }, - { - "expr": "histogram_quantile(0.99, sum(rate(scheduler_volume_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (instance, le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} volume", - "refId": "D" } ], "thresholds": [ @@ -31175,7 +31751,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Scheduling latency 99th Quantile", + "title": "Network Programming Latency 99th Quantile", "tooltip": { "shared": false, "sort": 0, @@ -31236,7 +31812,7 @@ items: "gridPos": { }, - "id": 5, + "id": 7, "legend": { "alignAsTable": false, "avg": false, @@ -31269,28 +31845,28 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\",code=~\"2..\"}[5m]))", + "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\",code=~\"2..\"}[5m]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "2xx", "refId": "A" }, { - "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\",code=~\"3..\"}[5m]))", + "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\",code=~\"3..\"}[5m]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "3xx", "refId": "B" }, { - "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\",code=~\"4..\"}[5m]))", + "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\",code=~\"4..\"}[5m]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "4xx", "refId": "C" }, { - "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\",code=~\"5..\"}[5m]))", + "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\",code=~\"5..\"}[5m]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "5xx", @@ -31324,7 +31900,7 @@ items: "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true }, { @@ -31332,7 +31908,7 @@ items: "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true } ] @@ -31350,7 +31926,7 @@ items: "gridPos": { }, - "id": 6, + "id": 8, "legend": { "alignAsTable": false, "avg": false, @@ -31383,7 +31959,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\", verb=\"POST\"}[5m])) by (verb, url, le))", + "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\",instance=~\"$instance\",verb=\"POST\"}[5m])) by (verb, url, le))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{verb}} {{url}}", @@ -31456,7 +32032,7 @@ items: "gridPos": { }, - "id": 7, + "id": 9, "legend": { "alignAsTable": true, "avg": false, @@ -31489,7 +32065,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\", verb=\"GET\"}[5m])) by (verb, url, le))", + "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\", verb=\"GET\"}[5m])) by (verb, url, le))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{verb}} {{url}}", @@ -31562,7 +32138,7 @@ items: "gridPos": { }, - "id": 8, + "id": 10, "legend": { "alignAsTable": false, "avg": false, @@ -31595,7 +32171,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "process_resident_memory_bytes{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}", + "expr": "process_resident_memory_bytes{cluster=\"$cluster\", job=\"kube-proxy\",instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", @@ -31655,7 +32231,7 @@ items: "gridPos": { }, - "id": 9, + "id": 11, "legend": { "alignAsTable": false, "avg": false, @@ -31688,7 +32264,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])", + "expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\", job=\"kube-proxy\",instance=~\"$instance\"}[5m])", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", @@ -31718,7 +32294,7 @@ items: }, "yaxes": [ { - "format": "bytes", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -31726,7 +32302,7 @@ items: "show": true }, { - "format": "bytes", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -31748,7 +32324,7 @@ items: "gridPos": { }, - "id": 10, + "id": 12, "legend": { "alignAsTable": false, "avg": false, @@ -31781,7 +32357,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "go_goroutines{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}", + "expr": "go_goroutines{cluster=\"$cluster\", job=\"kube-proxy\",instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", @@ -31901,7 +32477,7 @@ items: "options": [ ], - "query": "label_values(process_cpu_seconds_total{cluster=\"$cluster\", job=\"kube-scheduler\"}, instance)", + "query": "label_values(kubeproxy_network_programming_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\"}, instance)", "refresh": 2, "regex": "", "sort": 1, @@ -31945,8 +32521,8 @@ items: ] }, "timezone": "UTC", - "title": "Kubernetes / Scheduler", - "uid": "2e6b6a3b4bddf1427b3a55aa1311c656", + "title": "Kubernetes / Proxy", + "uid": "632e265de029684c40b21cb76bca4f94", "version": 0 } kind: ConfigMap @@ -31956,120 +32532,37 @@ items: app.kubernetes.io/name: grafana app.kubernetes.io/part-of: kube-prometheus app.kubernetes.io/version: 7.5.4 - name: grafana-dashboard-scheduler - namespace: monitoring -- apiVersion: v1 - data: - statefulset.json: |- - { - "__inputs": [ - - ], - "__requires": [ - - ], - "annotations": { - "list": [ - - ] - }, - "editable": false, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "id": null, - "links": [ - - ], - "refresh": "", - "rows": [ - { - "collapse": false, - "collapsed": false, - "panels": [ - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - - }, - "id": 2, - "interval": null, - "links": [ - - ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "cores", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 4, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "lineColor": "rgb(31, 120, 193)", - "show": true - }, - "tableColumn": "", - "targets": [ - { - "expr": "sum(rate(container_cpu_usage_seconds_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", container!=\"\", namespace=\"$namespace\", pod=~\"$statefulset.*\"}[3m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": "", - "title": "CPU", - "tooltip": { - "shared": false - }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "0", - "value": "null" - } - ], - "valueName": "current" - }, + name: grafana-dashboard-proxy + namespace: monitoring +- apiVersion: v1 + data: + scheduler.json: |- + { + "__inputs": [ + + ], + "__requires": [ + + ], + "annotations": { + "list": [ + + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ + + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "collapsed": false, + "panels": [ { "cacheTimeout": null, "colorBackground": false, @@ -32091,7 +32584,7 @@ items: "gridPos": { }, - "id": 3, + "id": 2, "interval": null, "links": [ @@ -32110,7 +32603,7 @@ items: "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, - "postfix": "GB", + "postfix": "", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", @@ -32121,16 +32614,17 @@ items: "to": "null" } ], - "span": 4, + "span": 2, "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, "lineColor": "rgb(31, 120, 193)", - "show": true + "show": false }, "tableColumn": "", "targets": [ { - "expr": "sum(container_memory_usage_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", container!=\"\", namespace=\"$namespace\", pod=~\"$statefulset.*\"}) / 1024^3", + "expr": "sum(up{cluster=\"$cluster\", job=\"kube-scheduler\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "", @@ -32138,7 +32632,7 @@ items: } ], "thresholds": "", - "title": "Memory", + "title": "Up", "tooltip": { "shared": false }, @@ -32147,94 +32641,239 @@ items: "valueMaps": [ { "op": "=", - "text": "0", + "text": "N/A", "value": "null" } ], - "valueName": "current" + "valueName": "min" }, { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true + "aliasColors": { + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, "gridPos": { }, - "id": 4, - "interval": null, + "id": 3, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, "links": [ ], - "mappingType": 1, - "mappingTypes": [ + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 5, + "stack": false, + "steppedLine": false, + "targets": [ { - "name": "value to text", - "value": 1 + "expr": "sum(rate(scheduler_e2e_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} e2e", + "refId": "A" }, { - "name": "range to text", - "value": 2 + "expr": "sum(rate(scheduler_binding_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} binding", + "refId": "B" + }, + { + "expr": "sum(rate(scheduler_scheduling_algorithm_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} scheduling algorithm", + "refId": "C" + }, + { + "expr": "sum(rate(scheduler_volume_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} volume", + "refId": "D" } ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "Bps", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Scheduling Rate", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ { - "from": "null", - "text": "N/A", - "to": "null" + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true } - ], - "span": 4, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "lineColor": "rgb(31, 120, 193)", - "show": true + ] + }, + { + "aliasColors": { + }, - "tableColumn": "", + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 4, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 5, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "sum(rate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$statefulset.*\"}[3m])) + sum(rate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",pod=~\"$statefulset.*\"}[3m]))", + "expr": "histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (instance, le))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "", + "legendFormat": "{{instance}} e2e", "refId": "A" + }, + { + "expr": "histogram_quantile(0.99, sum(rate(scheduler_binding_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (instance, le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} binding", + "refId": "B" + }, + { + "expr": "histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (instance, le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} scheduling algorithm", + "refId": "C" + }, + { + "expr": "histogram_quantile(0.99, sum(rate(scheduler_volume_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (instance, le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} volume", + "refId": "D" } ], - "thresholds": "", - "title": "Network", + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Scheduling latency 99th Quantile", "tooltip": { - "shared": false + "shared": false, + "sort": 0, + "value_type": "individual" }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ { - "op": "=", - "text": "0", - "value": "null" + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true } - ], - "valueName": "current" + ] } ], "repeat": null, @@ -32248,357 +32887,519 @@ items: { "collapse": false, "collapsed": false, - "height": "100px", "panels": [ { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true + "aliasColors": { + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, "gridPos": { }, "id": 5, - "interval": null, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, "links": [ ], - "mappingType": 1, - "mappingTypes": [ + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ { - "name": "value to text", - "value": 1 + "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\",code=~\"2..\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "2xx", + "refId": "A" }, { - "name": "range to text", - "value": 2 + "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\",code=~\"3..\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "3xx", + "refId": "B" + }, + { + "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\",code=~\"4..\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "4xx", + "refId": "C" + }, + { + "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\",code=~\"5..\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "5xx", + "refId": "D" } ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Kube API Request Rate", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ { - "from": "null", - "text": "N/A", - "to": "null" + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false + ] + }, + { + "aliasColors": { + }, - "tableColumn": "", + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 6, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 8, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "max(kube_statefulset_replicas{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", statefulset=\"$statefulset\"}) without (instance, pod)", + "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\", verb=\"POST\"}[5m])) by (verb, url, le))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "", + "legendFormat": "{{verb}} {{url}}", "refId": "A" } ], - "thresholds": "", - "title": "Desired Replicas", + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Post Request Latency 99th Quantile", "tooltip": { - "shared": false + "shared": false, + "sort": 0, + "value_type": "individual" }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, { - "op": "=", - "text": "0", - "value": "null" + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true } - ], - "valueName": "current" - }, + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true + "aliasColors": { + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, "gridPos": { }, - "id": 6, - "interval": null, + "id": 7, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, "links": [ ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "min(kube_statefulset_status_replicas_current{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", statefulset=\"$statefulset\"}) without (instance, pod)", + "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\", verb=\"GET\"}[5m])) by (verb, url, le))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "", + "legendFormat": "{{verb}} {{url}}", "refId": "A" } ], - "thresholds": "", - "title": "Replicas of current version", + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Get Request Latency 99th Quantile", "tooltip": { - "shared": false + "shared": false, + "sort": 0, + "value_type": "individual" }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ { - "op": "=", - "text": "0", - "value": "null" + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true } - ], - "valueName": "current" - }, + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true + "aliasColors": { + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, "gridPos": { }, - "id": 7, - "interval": null, + "id": 8, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, "links": [ ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "max(kube_statefulset_status_observed_generation{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", statefulset=\"$statefulset\"}) without (instance, pod)", + "expr": "process_resident_memory_bytes{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, - "legendFormat": "", + "legendFormat": "{{instance}}", "refId": "A" } ], - "thresholds": "", - "title": "Observed Generation", + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory", "tooltip": { - "shared": false + "shared": false, + "sort": 0, + "value_type": "individual" }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ { - "op": "=", - "text": "0", - "value": "null" + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true } - ], - "valueName": "current" + ] }, { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true + "aliasColors": { + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, "gridPos": { }, - "id": 8, - "interval": null, + "id": 9, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, "links": [ ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "max(kube_statefulset_metadata_generation{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)", + "expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])", "format": "time_series", "intervalFactor": 2, - "legendFormat": "", + "legendFormat": "{{instance}}", "refId": "A" } ], - "thresholds": "", - "title": "Metadata Generation", + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU usage", "tooltip": { - "shared": false + "shared": false, + "sort": 0, + "value_type": "individual" }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ { - "op": "=", - "text": "0", - "value": "null" + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true } - ], - "valueName": "current" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": false, - "collapsed": false, - "panels": [ + ] + }, { "aliasColors": { @@ -32612,7 +33413,7 @@ items: "gridPos": { }, - "id": 9, + "id": 10, "legend": { "alignAsTable": false, "avg": false, @@ -32640,43 +33441,16 @@ items: ], "spaceLength": 10, + "span": 4, "stack": false, "steppedLine": false, "targets": [ { - "expr": "max(kube_statefulset_replicas{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)", + "expr": "go_goroutines{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, - "legendFormat": "replicas specified", + "legendFormat": "{{instance}}", "refId": "A" - }, - { - "expr": "max(kube_statefulset_status_replicas{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "replicas created", - "refId": "B" - }, - { - "expr": "min(kube_statefulset_status_replicas_ready{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "ready", - "refId": "C" - }, - { - "expr": "min(kube_statefulset_status_replicas_current{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "replicas of current version", - "refId": "D" - }, - { - "expr": "min(kube_statefulset_status_replicas_updated{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "updated", - "refId": "E" } ], "thresholds": [ @@ -32684,7 +33458,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Replicas", + "title": "Goroutines", "tooltip": { "shared": false, "sort": 0, @@ -32766,33 +33540,7 @@ items: "options": [ ], - "query": "label_values(kube_statefulset_metadata_generation, cluster)", - "refresh": 2, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "Namespace", - "multi": false, - "name": "namespace", - "options": [ - - ], - "query": "label_values(kube_statefulset_metadata_generation{job=\"kube-state-metrics\", cluster=\"$cluster\"}, namespace)", + "query": "label_values(kube_pod_info, cluster)", "refresh": 2, "regex": "", "sort": 1, @@ -32811,14 +33559,14 @@ items: }, "datasource": "$datasource", "hide": 0, - "includeAll": false, - "label": "Name", + "includeAll": true, + "label": null, "multi": false, - "name": "statefulset", + "name": "instance", "options": [ ], - "query": "label_values(kube_statefulset_metadata_generation{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\"}, statefulset)", + "query": "label_values(process_cpu_seconds_total{cluster=\"$cluster\", job=\"kube-scheduler\"}, instance)", "refresh": 2, "regex": "", "sort": 1, @@ -32862,8 +33610,8 @@ items: ] }, "timezone": "UTC", - "title": "Kubernetes / StatefulSets", - "uid": "a31c1f46e6f727cb37c0d731a7245005", + "title": "Kubernetes / Scheduler", + "uid": "2e6b6a3b4bddf1427b3a55aa1311c656", "version": 0 } kind: ConfigMap @@ -32873,7 +33621,7 @@ items: app.kubernetes.io/name: grafana app.kubernetes.io/part-of: kube-prometheus app.kubernetes.io/version: 7.5.4 - name: grafana-dashboard-statefulset + name: grafana-dashboard-scheduler namespace: monitoring - apiVersion: v1 data: diff --git a/manifests/grafana-deployment.yaml b/manifests/grafana-deployment.yaml index c69b637b..edb6623e 100644 --- a/manifests/grafana-deployment.yaml +++ b/manifests/grafana-deployment.yaml @@ -116,9 +116,6 @@ spec: - mountPath: /grafana-dashboard-definitions/0/scheduler name: grafana-dashboard-scheduler readOnly: false - - mountPath: /grafana-dashboard-definitions/0/statefulset - name: grafana-dashboard-statefulset - readOnly: false - mountPath: /grafana-dashboard-definitions/0/workload-total name: grafana-dashboard-workload-total readOnly: false @@ -201,9 +198,6 @@ spec: - configMap: name: grafana-dashboard-scheduler name: grafana-dashboard-scheduler - - configMap: - name: grafana-dashboard-statefulset - name: grafana-dashboard-statefulset - configMap: name: grafana-dashboard-workload-total name: grafana-dashboard-workload-total diff --git a/manifests/kube-state-metrics-prometheusRule.yaml b/manifests/kube-state-metrics-prometheusRule.yaml index 9b4541f6..824d8132 100644 --- a/manifests/kube-state-metrics-prometheusRule.yaml +++ b/manifests/kube-state-metrics-prometheusRule.yaml @@ -40,3 +40,26 @@ spec: for: 15m labels: severity: critical + - alert: KubeStateMetricsShardingMismatch + annotations: + description: kube-state-metrics pods are running with different --total-shards configuration, some Kubernetes objects may be exposed multiple times or not exposed at all. + runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubestatemetricsshardingmismatch + summary: kube-state-metrics sharding is misconfigured. + expr: | + stdvar (kube_state_metrics_total_shards{job="kube-state-metrics"}) != 0 + for: 15m + labels: + severity: critical + - alert: KubeStateMetricsShardsMissing + annotations: + description: kube-state-metrics shards are missing, some Kubernetes objects are not being exposed. + runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubestatemetricsshardsmissing + summary: kube-state-metrics shards are missing. + expr: | + 2^max(kube_state_metrics_total_shards{job="kube-state-metrics"}) - 1 + - + sum( 2 ^ max by (shard_ordinal) (kube_state_metrics_shard_ordinal{job="kube-state-metrics"}) ) + != 0 + for: 15m + labels: + severity: critical diff --git a/manifests/kubernetes-prometheusRule.yaml b/manifests/kubernetes-prometheusRule.yaml index fda06912..0434a676 100644 --- a/manifests/kubernetes-prometheusRule.yaml +++ b/manifests/kubernetes-prometheusRule.yaml @@ -699,26 +699,26 @@ spec: ( ( # too slow - sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[1d])) + sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[1d])) - ( ( - sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[1d])) + sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[1d])) or vector(0) ) + - sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[1d])) + sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[1d])) + - sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[1d])) + sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[1d])) ) ) + # errors - sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[1d])) + sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[1d])) ) / - sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[1d])) + sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[1d])) labels: verb: read record: apiserver_request:burnrate1d @@ -726,26 +726,26 @@ spec: ( ( # too slow - sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[1h])) + sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[1h])) - ( ( - sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[1h])) + sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[1h])) or vector(0) ) + - sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[1h])) + sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[1h])) + - sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[1h])) + sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[1h])) ) ) + # errors - sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[1h])) + sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[1h])) ) / - sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[1h])) + sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[1h])) labels: verb: read record: apiserver_request:burnrate1h @@ -753,26 +753,26 @@ spec: ( ( # too slow - sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[2h])) + sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[2h])) - ( ( - sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[2h])) + sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[2h])) or vector(0) ) + - sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[2h])) + sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[2h])) + - sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[2h])) + sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[2h])) ) ) + # errors - sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[2h])) + sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[2h])) ) / - sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[2h])) + sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[2h])) labels: verb: read record: apiserver_request:burnrate2h @@ -780,26 +780,26 @@ spec: ( ( # too slow - sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[30m])) + sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[30m])) - ( ( - sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[30m])) + sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[30m])) or vector(0) ) + - sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[30m])) + sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[30m])) + - sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[30m])) + sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[30m])) ) ) + # errors - sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[30m])) + sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[30m])) ) / - sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[30m])) + sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[30m])) labels: verb: read record: apiserver_request:burnrate30m @@ -807,26 +807,26 @@ spec: ( ( # too slow - sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[3d])) + sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[3d])) - ( ( - sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[3d])) + sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[3d])) or vector(0) ) + - sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[3d])) + sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[3d])) + - sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[3d])) + sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[3d])) ) ) + # errors - sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[3d])) + sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[3d])) ) / - sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[3d])) + sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[3d])) labels: verb: read record: apiserver_request:burnrate3d @@ -834,26 +834,26 @@ spec: ( ( # too slow - sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[5m])) + sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[5m])) - ( ( - sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[5m])) + sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[5m])) or vector(0) ) + - sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[5m])) + sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[5m])) + - sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[5m])) + sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[5m])) ) ) + # errors - sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[5m])) + sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[5m])) ) / - sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[5m])) + sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[5m])) labels: verb: read record: apiserver_request:burnrate5m @@ -861,26 +861,26 @@ spec: ( ( # too slow - sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[6h])) + sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[6h])) - ( ( - sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[6h])) + sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[6h])) or vector(0) ) + - sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[6h])) + sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[6h])) + - sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[6h])) + sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[6h])) ) ) + # errors - sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[6h])) + sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[6h])) ) / - sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[6h])) + sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[6h])) labels: verb: read record: apiserver_request:burnrate6h @@ -888,15 +888,15 @@ spec: ( ( # too slow - sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1d])) + sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1d])) - - sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[1d])) + sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[1d])) ) + - sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[1d])) + sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[1d])) ) / - sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1d])) + sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1d])) labels: verb: write record: apiserver_request:burnrate1d @@ -904,15 +904,15 @@ spec: ( ( # too slow - sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1h])) + sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1h])) - - sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[1h])) + sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[1h])) ) + - sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[1h])) + sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[1h])) ) / - sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1h])) + sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1h])) labels: verb: write record: apiserver_request:burnrate1h @@ -920,15 +920,15 @@ spec: ( ( # too slow - sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[2h])) + sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[2h])) - - sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[2h])) + sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[2h])) ) + - sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[2h])) + sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[2h])) ) / - sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[2h])) + sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[2h])) labels: verb: write record: apiserver_request:burnrate2h @@ -936,15 +936,15 @@ spec: ( ( # too slow - sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[30m])) + sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[30m])) - - sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[30m])) + sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[30m])) ) + - sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[30m])) + sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[30m])) ) / - sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[30m])) + sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[30m])) labels: verb: write record: apiserver_request:burnrate30m @@ -952,15 +952,15 @@ spec: ( ( # too slow - sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[3d])) + sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[3d])) - - sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[3d])) + sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[3d])) ) + - sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[3d])) + sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[3d])) ) / - sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[3d])) + sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[3d])) labels: verb: write record: apiserver_request:burnrate3d @@ -968,15 +968,15 @@ spec: ( ( # too slow - sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m])) + sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m])) - - sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[5m])) + sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[5m])) ) + - sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[5m])) + sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[5m])) ) / - sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m])) + sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m])) labels: verb: write record: apiserver_request:burnrate5m @@ -984,36 +984,36 @@ spec: ( ( # too slow - sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[6h])) + sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[6h])) - - sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[6h])) + sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[6h])) ) + - sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[6h])) + sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[6h])) ) / - sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[6h])) + sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[6h])) labels: verb: write record: apiserver_request:burnrate6h - expr: | - sum by (code,resource) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[5m])) + sum by (cluster,code,resource) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[5m])) labels: verb: read record: code_resource:apiserver_request_total:rate5m - expr: | - sum by (code,resource) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m])) + sum by (cluster,code,resource) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m])) labels: verb: write record: code_resource:apiserver_request_total:rate5m - expr: | - histogram_quantile(0.99, sum by (le, resource) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET"}[5m]))) > 0 + histogram_quantile(0.99, sum by (cluster, le, resource) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET"}[5m]))) > 0 labels: quantile: "0.99" verb: read record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile - expr: | - histogram_quantile(0.99, sum by (le, resource) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m]))) > 0 + histogram_quantile(0.99, sum by (cluster, le, resource) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m]))) > 0 labels: quantile: "0.99" verb: write @@ -1040,56 +1040,56 @@ spec: 1 - ( ( # write too slow - sum(increase(apiserver_request_duration_seconds_count{verb=~"POST|PUT|PATCH|DELETE"}[30d])) + sum by (cluster) (increase(apiserver_request_duration_seconds_count{verb=~"POST|PUT|PATCH|DELETE"}[30d])) - - sum(increase(apiserver_request_duration_seconds_bucket{verb=~"POST|PUT|PATCH|DELETE",le="1"}[30d])) + sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{verb=~"POST|PUT|PATCH|DELETE",le="1"}[30d])) ) + ( # read too slow - sum(increase(apiserver_request_duration_seconds_count{verb=~"LIST|GET"}[30d])) + sum by (cluster) (increase(apiserver_request_duration_seconds_count{verb=~"LIST|GET"}[30d])) - ( ( - sum(increase(apiserver_request_duration_seconds_bucket{verb=~"LIST|GET",scope=~"resource|",le="0.1"}[30d])) + sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{verb=~"LIST|GET",scope=~"resource|",le="0.1"}[30d])) or vector(0) ) + - sum(increase(apiserver_request_duration_seconds_bucket{verb=~"LIST|GET",scope="namespace",le="0.5"}[30d])) + sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{verb=~"LIST|GET",scope="namespace",le="0.5"}[30d])) + - sum(increase(apiserver_request_duration_seconds_bucket{verb=~"LIST|GET",scope="cluster",le="5"}[30d])) + sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{verb=~"LIST|GET",scope="cluster",le="5"}[30d])) ) ) + # errors - sum(code:apiserver_request_total:increase30d{code=~"5.."} or vector(0)) + sum by (cluster) (code:apiserver_request_total:increase30d{code=~"5.."} or vector(0)) ) / - sum(code:apiserver_request_total:increase30d) + sum by (cluster) (code:apiserver_request_total:increase30d) labels: verb: all record: apiserver_request:availability30d - expr: | 1 - ( - sum(increase(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[30d])) + sum by (cluster) (increase(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[30d])) - ( # too slow ( - sum(increase(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[30d])) + sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[30d])) or vector(0) ) + - sum(increase(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[30d])) + sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[30d])) + - sum(increase(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[30d])) + sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[30d])) ) + # errors - sum(code:apiserver_request_total:increase30d{verb="read",code=~"5.."} or vector(0)) + sum by (cluster) (code:apiserver_request_total:increase30d{verb="read",code=~"5.."} or vector(0)) ) / - sum(code:apiserver_request_total:increase30d{verb="read"}) + sum by (cluster) (code:apiserver_request_total:increase30d{verb="read"}) labels: verb: read record: apiserver_request:availability30d @@ -1097,16 +1097,16 @@ spec: 1 - ( ( # too slow - sum(increase(apiserver_request_duration_seconds_count{verb=~"POST|PUT|PATCH|DELETE"}[30d])) + sum by (cluster) (increase(apiserver_request_duration_seconds_count{verb=~"POST|PUT|PATCH|DELETE"}[30d])) - - sum(increase(apiserver_request_duration_seconds_bucket{verb=~"POST|PUT|PATCH|DELETE",le="1"}[30d])) + sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{verb=~"POST|PUT|PATCH|DELETE",le="1"}[30d])) ) + # errors - sum(code:apiserver_request_total:increase30d{verb="write",code=~"5.."} or vector(0)) + sum by (cluster) (code:apiserver_request_total:increase30d{verb="write",code=~"5.."} or vector(0)) ) / - sum(code:apiserver_request_total:increase30d{verb="write"}) + sum by (cluster) (code:apiserver_request_total:increase30d{verb="write"}) labels: verb: write record: apiserver_request:availability30d @@ -1114,84 +1114,84 @@ spec: avg_over_time(code_verb:apiserver_request_total:increase1h[30d]) * 24 * 30 record: code_verb:apiserver_request_total:increase30d - expr: | - sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"2.."}[1h])) + sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"2.."}[1h])) record: code_verb:apiserver_request_total:increase1h - expr: | - sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"2.."}[1h])) + sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"2.."}[1h])) record: code_verb:apiserver_request_total:increase1h - expr: | - sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"2.."}[1h])) + sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"2.."}[1h])) record: code_verb:apiserver_request_total:increase1h - expr: | - sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"2.."}[1h])) + sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"2.."}[1h])) record: code_verb:apiserver_request_total:increase1h - expr: | - sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"2.."}[1h])) + sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"2.."}[1h])) record: code_verb:apiserver_request_total:increase1h - expr: | - sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"2.."}[1h])) + sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"2.."}[1h])) record: code_verb:apiserver_request_total:increase1h - expr: | - sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"3.."}[1h])) + sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"3.."}[1h])) record: code_verb:apiserver_request_total:increase1h - expr: | - sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"3.."}[1h])) + sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"3.."}[1h])) record: code_verb:apiserver_request_total:increase1h - expr: | - sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"3.."}[1h])) + sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"3.."}[1h])) record: code_verb:apiserver_request_total:increase1h - expr: | - sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"3.."}[1h])) + sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"3.."}[1h])) record: code_verb:apiserver_request_total:increase1h - expr: | - sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"3.."}[1h])) + sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"3.."}[1h])) record: code_verb:apiserver_request_total:increase1h - expr: | - sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"3.."}[1h])) + sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"3.."}[1h])) record: code_verb:apiserver_request_total:increase1h - expr: | - sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"4.."}[1h])) + sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"4.."}[1h])) record: code_verb:apiserver_request_total:increase1h - expr: | - sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"4.."}[1h])) + sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"4.."}[1h])) record: code_verb:apiserver_request_total:increase1h - expr: | - sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"4.."}[1h])) + sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"4.."}[1h])) record: code_verb:apiserver_request_total:increase1h - expr: | - sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"4.."}[1h])) + sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"4.."}[1h])) record: code_verb:apiserver_request_total:increase1h - expr: | - sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"4.."}[1h])) + sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"4.."}[1h])) record: code_verb:apiserver_request_total:increase1h - expr: | - sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"4.."}[1h])) + sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"4.."}[1h])) record: code_verb:apiserver_request_total:increase1h - expr: | - sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"5.."}[1h])) + sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"5.."}[1h])) record: code_verb:apiserver_request_total:increase1h - expr: | - sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"5.."}[1h])) + sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"5.."}[1h])) record: code_verb:apiserver_request_total:increase1h - expr: | - sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"5.."}[1h])) + sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"5.."}[1h])) record: code_verb:apiserver_request_total:increase1h - expr: | - sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"5.."}[1h])) + sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"5.."}[1h])) record: code_verb:apiserver_request_total:increase1h - expr: | - sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"5.."}[1h])) + sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"5.."}[1h])) record: code_verb:apiserver_request_total:increase1h - expr: | - sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"5.."}[1h])) + sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"5.."}[1h])) record: code_verb:apiserver_request_total:increase1h - expr: | - sum by (code) (code_verb:apiserver_request_total:increase30d{verb=~"LIST|GET"}) + sum by (cluster, code) (code_verb:apiserver_request_total:increase30d{verb=~"LIST|GET"}) labels: verb: read record: code:apiserver_request_total:increase30d - expr: | - sum by (code) (code_verb:apiserver_request_total:increase30d{verb=~"POST|PUT|PATCH|DELETE"}) + sum by (cluster, code) (code_verb:apiserver_request_total:increase30d{verb=~"POST|PUT|PATCH|DELETE"}) labels: verb: write record: code:apiserver_request_total:increase30d diff --git a/manifests/prometheus-prometheusRule.yaml b/manifests/prometheus-prometheusRule.yaml index 681219fb..51f857d6 100644 --- a/manifests/prometheus-prometheusRule.yaml +++ b/manifests/prometheus-prometheusRule.yaml @@ -211,6 +211,16 @@ spec: for: 15m labels: severity: warning + - alert: PrometheusLabelLimitHit + annotations: + description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has dropped {{ printf "%.0f" $value }} targets because some samples exceeded the configured label_limit, label_name_length_limit or label_value_length_limit. + runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheuslabellimithit + summary: Prometheus has dropped targets because some scrape configs have exceeded the labels limit. + expr: | + increase(prometheus_target_scrape_pool_exceeded_label_limits_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0 + for: 15m + labels: + severity: warning - alert: PrometheusErrorSendingAlertsToAnyAlertmanager annotations: description: '{{ printf "%.1f" $value }}% minimum errors while sending alerts from Prometheus {{$labels.namespace}}/{{$labels.pod}} to any Alertmanager.' diff --git a/manifests/setup/prometheus-operator-deployment.yaml b/manifests/setup/prometheus-operator-deployment.yaml index 23992a03..a87ecacc 100644 --- a/manifests/setup/prometheus-operator-deployment.yaml +++ b/manifests/setup/prometheus-operator-deployment.yaml @@ -17,6 +17,8 @@ spec: app.kubernetes.io/part-of: kube-prometheus template: metadata: + annotations: + kubectl.kubernetes.io/default-container: prometheus-operator labels: app.kubernetes.io/component: controller app.kubernetes.io/name: prometheus-operator -- GitLab