diff --git a/jsonnetfile.lock.json b/jsonnetfile.lock.json index 0009cf7e44b8a709ebf304d0d9624ef16bc7b637..b1d29d7623635696cabe6199744254f257e59e1e 100644 --- a/jsonnetfile.lock.json +++ b/jsonnetfile.lock.json @@ -8,7 +8,7 @@ "subdir": "jsonnet/kube-prometheus" } }, - "version": "bdf84bf1865e66d76c027eb2cea7bf776acd18a4" + "version": "90b8632fb37be04ff73542218d980ba54c53295b" }, { "name": "ksonnet", @@ -28,7 +28,7 @@ "subdir": "" } }, - "version": "af494738e1709998696ffbce9296063a20c80692" + "version": "d8f135ba007b4ec7ac58be9371042d19e1ae4dea" }, { "name": "grafonnet", @@ -78,7 +78,7 @@ "subdir": "Documentation/etcd-mixin" } }, - "version": "9ff762857712c9c5c2037e6ddd9a692a488224bf" + "version": "948e276ca73d3eb09391829d8ac317dbda8c07a1" } ] } diff --git a/manifests/grafana-dashboardDefinitions.yaml b/manifests/grafana-dashboardDefinitions.yaml index 1a51fd48ca5ff9f6871012ba1b93d42594f9d45d..dea85646ff02cc139b7971a7eeb8b45911762365 100644 --- a/manifests/grafana-dashboardDefinitions.yaml +++ b/manifests/grafana-dashboardDefinitions.yaml @@ -2,26 +2,117 @@ apiVersion: v1 items: - apiVersion: v1 data: - k8s-cluster-rsrc-use.json: |- + apiserver.json: |- { + "__inputs": [ + + ], + "__requires": [ + + ], "annotations": { "list": [ ] }, - "editable": true, + "editable": false, "gnetId": null, "graphTooltip": 0, "hideControls": false, + "id": null, "links": [ ], - "refresh": "10s", + "refresh": "", "rows": [ { "collapse": false, - "height": "250px", + "collapsed": false, "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + + }, + "id": 2, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(up{job=\"apiserver\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Up", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "min" + }, { "aliasColors": { @@ -30,42 +121,68 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 10, - "id": 1, + "fill": 1, + "gridPos": { + + }, + "id": 3, "legend": { + "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, + "rightSide": false, "show": true, "total": false, "values": false }, "lines": true, - "linewidth": 0, + "linewidth": 1, "links": [ ], - "nullPointMode": "null as zero", + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 6, - "stack": true, + "span": 5, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "node:cluster_cpu_utilisation:ratio{cluster=\"$cluster\"}", + "expr": "sum(rate(apiserver_request_total{job=\"apiserver\", instance=~\"$instance\",code=~\"2..\"}[5m]))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{node}}", - "legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use", - "step": 10 + "legendFormat": "2xx", + "refId": "A" + }, + { + "expr": "sum(rate(apiserver_request_total{job=\"apiserver\", instance=~\"$instance\",code=~\"3..\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "3xx", + "refId": "B" + }, + { + "expr": "sum(rate(apiserver_request_total{job=\"apiserver\", instance=~\"$instance\",code=~\"4..\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "4xx", + "refId": "C" + }, + { + "expr": "sum(rate(apiserver_request_total{job=\"apiserver\", instance=~\"$instance\",code=~\"5..\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "5xx", + "refId": "D" } ], "thresholds": [ @@ -73,7 +190,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "CPU Utilisation", + "title": "RPC Rate", "tooltip": { "shared": false, "sort": 0, @@ -91,20 +208,20 @@ items: }, "yaxes": [ { - "format": "percentunit", + "format": "ops", "label": null, "logBase": 1, - "max": 1, - "min": 0, + "max": null, + "min": null, "show": true }, { - "format": "short", + "format": "ops", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ] }, @@ -116,42 +233,47 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 10, - "id": 2, + "fill": 1, + "gridPos": { + + }, + "id": 4, "legend": { + "alignAsTable": "true", "avg": false, - "current": false, + "current": "true", "max": false, "min": false, - "show": true, + "rightSide": "true", + "show": "true", "total": false, - "values": false + "values": "true" }, "lines": true, - "linewidth": 0, + "linewidth": 1, "links": [ ], - "nullPointMode": "null as zero", + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 6, - "stack": true, + "span": 5, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "node:node_cpu_saturation_load1:{cluster=\"$cluster\"} / scalar(sum(min(kube_pod_info{cluster=\"$cluster\"}) by (node)))", + "expr": "histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\", instance=~\"$instance\"}[5m])) by (verb, le))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{node}}", - "legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use", - "step": 10 + "legendFormat": "{{verb}}", + "refId": "A" } ], "thresholds": [ @@ -159,7 +281,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "CPU Saturation (Load1)", + "title": "Request duration 99th quantile", "tooltip": { "shared": false, "sort": 0, @@ -177,20 +299,20 @@ items: }, "yaxes": [ { - "format": "percentunit", + "format": "s", "label": null, "logBase": 1, - "max": 1, - "min": 0, + "max": null, + "min": null, "show": true }, { - "format": "short", + "format": "s", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ] } @@ -198,13 +320,14 @@ items: "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": true, - "title": "CPU", - "titleSize": "h6" + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" }, { "collapse": false, - "height": "250px", + "collapsed": false, "panels": [ { "aliasColors": { @@ -214,42 +337,47 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 10, - "id": 3, + "fill": 1, + "gridPos": { + + }, + "id": 5, "legend": { + "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, - "show": true, + "rightSide": false, + "show": false, "total": false, "values": false }, "lines": true, - "linewidth": 0, + "linewidth": 1, "links": [ ], - "nullPointMode": "null as zero", + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 6, - "stack": true, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "node:cluster_memory_utilisation:ratio{cluster=\"$cluster\"}", + "expr": "sum(rate(workqueue_adds_total{job=\"apiserver\", instance=~\"$instance\"}[5m])) by (instance, name)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{node}}", - "legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use", - "step": 10 + "legendFormat": "{{instance}} {{name}}", + "refId": "A" } ], "thresholds": [ @@ -257,7 +385,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Memory Utilisation", + "title": "Work Queue Add Rate", "tooltip": { "shared": false, "sort": 0, @@ -275,20 +403,20 @@ items: }, "yaxes": [ { - "format": "percentunit", + "format": "ops", "label": null, "logBase": 1, - "max": 1, + "max": null, "min": 0, "show": true }, { - "format": "short", + "format": "ops", "label": null, "logBase": 1, "max": null, - "min": null, - "show": false + "min": 0, + "show": true } ] }, @@ -300,42 +428,47 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 10, - "id": 4, + "fill": 1, + "gridPos": { + + }, + "id": 6, "legend": { + "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, - "show": true, + "rightSide": false, + "show": false, "total": false, "values": false }, "lines": true, - "linewidth": 0, + "linewidth": 1, "links": [ ], - "nullPointMode": "null as zero", + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 6, - "stack": true, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "node:node_memory_swap_io_bytes:sum_rate{cluster=\"$cluster\"}", + "expr": "sum(rate(workqueue_depth{job=\"apiserver\", instance=~\"$instance\"}[5m])) by (instance, name)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{node}}", - "legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use", - "step": 10 + "legendFormat": "{{instance}} {{name}}", + "refId": "A" } ], "thresholds": [ @@ -343,7 +476,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Memory Saturation (Swap I/O)", + "title": "Work Queue Depth", "tooltip": { "shared": false, "sort": 0, @@ -361,7 +494,7 @@ items: }, "yaxes": [ { - "format": "Bps", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -373,23 +506,11 @@ items: "label": null, "logBase": 1, "max": null, - "min": null, - "show": false + "min": 0, + "show": true } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Memory", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ + }, { "aliasColors": { @@ -398,42 +519,47 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 10, - "id": 5, + "fill": 1, + "gridPos": { + + }, + "id": 7, "legend": { + "alignAsTable": "true", "avg": false, - "current": false, + "current": "true", "max": false, "min": false, - "show": true, + "rightSide": "true", + "show": "true", "total": false, - "values": false + "values": "true" }, "lines": true, - "linewidth": 0, + "linewidth": 1, "links": [ ], - "nullPointMode": "null as zero", + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 6, - "stack": true, + "span": 12, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "node:node_disk_utilisation:avg_irate{cluster=\"$cluster\"} / scalar(:kube_pod_info_node_count:{cluster=\"$cluster\"})", + "expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{job=\"apiserver\", instance=~\"$instance\"}[5m])) by (instance, name, le))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{node}}", - "legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use", - "step": 10 + "legendFormat": "{{instance}} {{name}}", + "refId": "A" } ], "thresholds": [ @@ -441,7 +567,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Disk IO Utilisation", + "title": "Work Queue Latency", "tooltip": { "shared": false, "sort": 0, @@ -459,23 +585,36 @@ items: }, "yaxes": [ { - "format": "percentunit", + "format": "s", "label": null, "logBase": 1, - "max": 1, - "min": 0, + "max": null, + "min": null, "show": true }, { - "format": "short", + "format": "s", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ { "aliasColors": { @@ -484,42 +623,47 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 10, - "id": 6, + "fill": 1, + "gridPos": { + + }, + "id": 8, "legend": { + "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, + "rightSide": false, "show": true, "total": false, "values": false }, "lines": true, - "linewidth": 0, + "linewidth": 1, "links": [ ], - "nullPointMode": "null as zero", + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 6, - "stack": true, + "span": 4, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "node:node_disk_saturation:avg_irate{cluster=\"$cluster\"} / scalar(:kube_pod_info_node_count:{cluster=\"$cluster\"})", + "expr": "etcd_helper_cache_entry_total{job=\"apiserver\", instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{node}}", - "legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use", - "step": 10 + "legendFormat": "{{instance}}", + "refId": "A" } ], "thresholds": [ @@ -527,7 +671,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Disk IO Saturation", + "title": "ETCD Cache Entry Total", "tooltip": { "shared": false, "sort": 0, @@ -545,10 +689,10 @@ items: }, "yaxes": [ { - "format": "percentunit", + "format": "short", "label": null, "logBase": 1, - "max": 1, + "max": null, "min": 0, "show": true }, @@ -557,23 +701,11 @@ items: "label": null, "logBase": 1, "max": null, - "min": null, - "show": false + "min": 0, + "show": true } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Disk", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ + }, { "aliasColors": { @@ -582,42 +714,54 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 10, - "id": 7, + "fill": 1, + "gridPos": { + + }, + "id": 9, "legend": { + "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, + "rightSide": false, "show": true, "total": false, "values": false }, "lines": true, - "linewidth": 0, + "linewidth": 1, "links": [ ], - "nullPointMode": "null as zero", + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 6, - "stack": true, + "span": 4, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "node:node_net_utilisation:sum_irate{cluster=\"$cluster\"}", + "expr": "sum(rate(etcd_helper_cache_hit_total{job=\"apiserver\",instance=~\"$instance\"}[5m])) by (intance)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{node}}", - "legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use", - "step": 10 + "legendFormat": "{{instance}} hit", + "refId": "A" + }, + { + "expr": "sum(rate(etcd_helper_cache_miss_total{job=\"apiserver\",instance=~\"$instance\"}[5m])) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} miss", + "refId": "B" } ], "thresholds": [ @@ -625,7 +769,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Net Utilisation (Transmitted)", + "title": "ETCD Cache Hit/Miss Rate", "tooltip": { "shared": false, "sort": 0, @@ -643,7 +787,7 @@ items: }, "yaxes": [ { - "format": "Bps", + "format": "ops", "label": null, "logBase": 1, "max": null, @@ -651,12 +795,12 @@ items: "show": true }, { - "format": "short", + "format": "ops", "label": null, "logBase": 1, "max": null, - "min": null, - "show": false + "min": 0, + "show": true } ] }, @@ -668,42 +812,54 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 10, - "id": 8, + "fill": 1, + "gridPos": { + + }, + "id": 10, "legend": { + "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, + "rightSide": false, "show": true, "total": false, "values": false }, "lines": true, - "linewidth": 0, + "linewidth": 1, "links": [ ], - "nullPointMode": "null as zero", + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 6, - "stack": true, + "span": 4, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "node:node_net_saturation:sum_irate{cluster=\"$cluster\"}", + "expr": "histogram_quantile(0.99,sum(rate(etcd_request_cache_get_duration_seconds_bucket{job=\"apiserver\",instance=~\"$instance\"}[5m])) by (instance, le))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{node}}", - "legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use", - "step": 10 + "legendFormat": "{{instance}} get", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.99,sum(rate(etcd_request_cache_add_duration_seconds_bucket{job=\"apiserver\",instance=~\"$instance\"}[5m])) by (instance, le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} miss", + "refId": "B" } ], "thresholds": [ @@ -711,7 +867,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Net Saturation (Dropped)", + "title": "ETCD Cache Duration 99th Quantile", "tooltip": { "shared": false, "sort": 0, @@ -729,7 +885,7 @@ items: }, "yaxes": [ { - "format": "Bps", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -737,12 +893,12 @@ items: "show": true }, { - "format": "short", + "format": "s", "label": null, "logBase": 1, "max": null, - "min": null, - "show": false + "min": 0, + "show": true } ] } @@ -750,13 +906,14 @@ items: "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": true, - "title": "Network", - "titleSize": "h6" + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" }, { "collapse": false, - "height": "250px", + "collapsed": false, "panels": [ { "aliasColors": { @@ -766,42 +923,47 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 10, - "id": 9, + "fill": 1, + "gridPos": { + + }, + "id": 11, "legend": { + "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, + "rightSide": false, "show": true, "total": false, "values": false }, "lines": true, - "linewidth": 0, + "linewidth": 1, "links": [ ], - "nullPointMode": "null as zero", + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 12, - "stack": true, + "span": 4, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(max(node_filesystem_size_bytes{fstype=~\"ext[234]|btrfs|xfs|zfs\", cluster=\"$cluster\"} - node_filesystem_avail_bytes{fstype=~\"ext[234]|btrfs|xfs|zfs\", cluster=\"$cluster\"}) by (device,pod,namespace)) by (pod,namespace)\n/ scalar(sum(max(node_filesystem_size_bytes{fstype=~\"ext[234]|btrfs|xfs|zfs\", cluster=\"$cluster\"}) by (device,pod,namespace)))\n* on (namespace, pod) group_left (node) node_namespace_pod:kube_pod_info:{cluster=\"$cluster\"}\n", + "expr": "process_resident_memory_bytes{job=\"apiserver\",instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{node}}", - "legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use", - "step": 10 + "legendFormat": "{{instance}}", + "refId": "A" } ], "thresholds": [ @@ -809,7 +971,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Disk Capacity", + "title": "Memory", "tooltip": { "shared": false, "sort": 0, @@ -827,146 +989,25 @@ items: }, "yaxes": [ { - "format": "percentunit", + "format": "bytes", "label": null, "logBase": 1, - "max": 1, - "min": 0, + "max": null, + "min": null, "show": true }, { - "format": "short", + "format": "bytes", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Storage", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "kubernetes-mixin" - ], - "templating": { - "list": [ - { - "current": { - "text": "Prometheus", - "value": "Prometheus" }, - "hide": 0, - "label": null, - "name": "datasource", - "options": [ - - ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 2, - "includeAll": false, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ - - ], - "query": "label_values(kube_node_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "", - "title": "Kubernetes / USE Method / Cluster", - "uid": "a6e7d1362e1ddbb79db21d5bb40d7137", - "version": 0 - } - kind: ConfigMap - metadata: - name: grafana-dashboard-k8s-cluster-rsrc-use - namespace: monitoring -- apiVersion: v1 - data: - k8s-node-rsrc-use.json: |- - { - "annotations": { - "list": [ - - ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "links": [ - - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { + { + "aliasColors": { }, "bars": false, @@ -974,12 +1015,17 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 1, + "gridPos": { + + }, + "id": 12, "legend": { + "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, + "rightSide": false, "show": true, "total": false, "values": false @@ -989,26 +1035,26 @@ items: "links": [ ], - "nullPointMode": "null as zero", + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 6, + "span": 4, "stack": false, "steppedLine": false, "targets": [ { - "expr": "node:node_cpu_utilisation:avg1m{cluster=\"$cluster\", node=\"$node\"}", + "expr": "rate(process_cpu_seconds_total{job=\"apiserver\",instance=~\"$instance\"}[5m])", "format": "time_series", "intervalFactor": 2, - "legendFormat": "Utilisation", - "legendLink": null, - "step": 10 + "legendFormat": "{{instance}}", + "refId": "A" } ], "thresholds": [ @@ -1016,7 +1062,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "CPU Utilisation", + "title": "CPU usage", "tooltip": { "shared": false, "sort": 0, @@ -1034,7 +1080,7 @@ items: }, "yaxes": [ { - "format": "percentunit", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -1046,8 +1092,8 @@ items: "label": null, "logBase": 1, "max": null, - "min": null, - "show": false + "min": 0, + "show": true } ] }, @@ -1060,12 +1106,17 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 2, + "gridPos": { + + }, + "id": 13, "legend": { + "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, + "rightSide": false, "show": true, "total": false, "values": false @@ -1075,26 +1126,26 @@ items: "links": [ ], - "nullPointMode": "null as zero", + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 6, + "span": 4, "stack": false, "steppedLine": false, "targets": [ { - "expr": "node:node_cpu_saturation_load1:{cluster=\"$cluster\", node=\"$node\"}", + "expr": "go_goroutines{job=\"apiserver\",instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, - "legendFormat": "Saturation", - "legendLink": null, - "step": 10 + "legendFormat": "{{instance}}", + "refId": "A" } ], "thresholds": [ @@ -1102,7 +1153,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "CPU Saturation (Load1)", + "title": "Goroutines", "tooltip": { "shared": false, "sort": 0, @@ -1120,11 +1171,11 @@ items: }, "yaxes": [ { - "format": "percentunit", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true }, { @@ -1133,7 +1184,7 @@ items: "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ] } @@ -1141,99 +1192,213 @@ items: "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": true, - "title": "CPU", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { - ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "node:node_memory_utilisation:{cluster=\"$cluster\", node=\"$node\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Memory", - "legendLink": null, - "step": 10 - } - ], - "thresholds": [ + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": null, + "multi": false, + "name": "instance", + "options": [ - ], - "timeFrom": null, - "timeShift": null, - "title": "Memory Utilisation", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + ], + "query": "label_values(apiserver_request_total{job=\"apiserver\"}, instance)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ - ] - }, - "yaxes": [ - { - "format": "percentunit", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Kubernetes / API server", + "uid": "09ec8aa1e996d6ffcd6817bbaff4db1b", + "version": 0 + } + kind: ConfigMap + metadata: + name: grafana-dashboard-apiserver + namespace: monitoring +- apiVersion: v1 + data: + controller-manager.json: |- + { + "__inputs": [ + + ], + "__requires": [ + + ], + "annotations": { + "list": [ + + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ + + ], + "refresh": "", + "rows": [ + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + + }, + "id": 2, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 }, { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false + "name": "range to text", + "value": 2 } - ] + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(up{job=\"kube-controller-manager\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Up", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "min" }, { "aliasColors": { @@ -1244,41 +1409,46 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 4, + "gridPos": { + + }, + "id": 3, "legend": { + "alignAsTable": "true", "avg": false, - "current": false, + "current": "true", "max": false, "min": false, - "show": true, + "rightSide": "true", + "show": "true", "total": false, - "values": false + "values": "true" }, "lines": true, "linewidth": 1, "links": [ ], - "nullPointMode": "null as zero", + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 6, + "span": 10, "stack": false, "steppedLine": false, "targets": [ { - "expr": "node:node_memory_swap_io_bytes:sum_rate{cluster=\"$cluster\", node=\"$node\"}", + "expr": "sum(rate(workqueue_adds_total{job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (instance, name)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "Swap IO", - "legendLink": null, - "step": 10 + "legendFormat": "{{instance}} {{name}}", + "refId": "A" } ], "thresholds": [ @@ -1286,7 +1456,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Memory Saturation (Swap I/O)", + "title": "Work Queue Add Rate", "tooltip": { "shared": false, "sort": 0, @@ -1304,20 +1474,20 @@ items: }, "yaxes": [ { - "format": "Bps", + "format": "ops", "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true }, { - "format": "short", + "format": "ops", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ] } @@ -1325,13 +1495,14 @@ items: "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": true, - "title": "Memory", - "titleSize": "h6" + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" }, { "collapse": false, - "height": "250px", + "collapsed": false, "panels": [ { "aliasColors": { @@ -1342,41 +1513,46 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 5, + "gridPos": { + + }, + "id": 4, "legend": { + "alignAsTable": "true", "avg": false, - "current": false, + "current": "true", "max": false, "min": false, - "show": true, + "rightSide": "true", + "show": "true", "total": false, - "values": false + "values": "true" }, "lines": true, "linewidth": 1, "links": [ ], - "nullPointMode": "null as zero", + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 6, + "span": 12, "stack": false, "steppedLine": false, "targets": [ { - "expr": "node:node_disk_utilisation:avg_irate{cluster=\"$cluster\", node=\"$node\"}", + "expr": "sum(rate(workqueue_depth{job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (instance, name)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "Utilisation", - "legendLink": null, - "step": 10 + "legendFormat": "{{instance}} {{name}}", + "refId": "A" } ], "thresholds": [ @@ -1384,7 +1560,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Disk IO Utilisation", + "title": "Work Queue Depth", "tooltip": { "shared": false, "sort": 0, @@ -1402,7 +1578,7 @@ items: }, "yaxes": [ { - "format": "percentunit", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -1414,11 +1590,24 @@ items: "label": null, "logBase": 1, "max": null, - "min": null, - "show": false + "min": 0, + "show": true } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ { "aliasColors": { @@ -1428,41 +1617,46 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 6, + "gridPos": { + + }, + "id": 5, "legend": { + "alignAsTable": "true", "avg": false, - "current": false, + "current": "true", "max": false, "min": false, - "show": true, + "rightSide": "true", + "show": "true", "total": false, - "values": false + "values": "true" }, "lines": true, "linewidth": 1, "links": [ ], - "nullPointMode": "null as zero", + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 6, + "span": 12, "stack": false, "steppedLine": false, "targets": [ { - "expr": "node:node_disk_saturation:avg_irate{cluster=\"$cluster\", node=\"$node\"}", + "expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (instance, name, le))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "Saturation", - "legendLink": null, - "step": 10 + "legendFormat": "{{instance}} {{name}}", + "refId": "A" } ], "thresholds": [ @@ -1470,7 +1664,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Disk IO Saturation", + "title": "Work Queue Latency", "tooltip": { "shared": false, "sort": 0, @@ -1488,20 +1682,20 @@ items: }, "yaxes": [ { - "format": "percentunit", + "format": "s", "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true }, { - "format": "short", + "format": "s", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ] } @@ -1509,13 +1703,14 @@ items: "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": true, - "title": "Disk", - "titleSize": "h6" + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" }, { "collapse": false, - "height": "250px", + "collapsed": false, "panels": [ { "aliasColors": { @@ -1526,12 +1721,17 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 7, + "gridPos": { + + }, + "id": 6, "legend": { + "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, + "rightSide": false, "show": true, "total": false, "values": false @@ -1541,26 +1741,47 @@ items: "links": [ ], - "nullPointMode": "null as zero", + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 6, + "span": 4, "stack": false, "steppedLine": false, "targets": [ { - "expr": "node:node_net_utilisation:sum_irate{cluster=\"$cluster\", node=\"$node\"}", + "expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"2..\"}[5m]))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "Utilisation", - "legendLink": null, - "step": 10 + "legendFormat": "2xx", + "refId": "A" + }, + { + "expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"3..\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "3xx", + "refId": "B" + }, + { + "expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"4..\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "4xx", + "refId": "C" + }, + { + "expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"5..\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "5xx", + "refId": "D" } ], "thresholds": [ @@ -1568,7 +1789,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Net Utilisation (Transmitted)", + "title": "Kube API Request Rate", "tooltip": { "shared": false, "sort": 0, @@ -1586,20 +1807,20 @@ items: }, "yaxes": [ { - "format": "Bps", + "format": "ops", "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true }, { - "format": "short", + "format": "ops", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ] }, @@ -1612,12 +1833,17 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 8, + "gridPos": { + + }, + "id": 7, "legend": { + "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, + "rightSide": false, "show": true, "total": false, "values": false @@ -1627,26 +1853,26 @@ items: "links": [ ], - "nullPointMode": "null as zero", + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 6, + "span": 8, "stack": false, "steppedLine": false, "targets": [ { - "expr": "node:node_net_saturation:sum_irate{cluster=\"$cluster\", node=\"$node\"}", + "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_latency_seconds_bucket{job=\"kube-controller-manager\", instance=~\"$instance\", verb=\"POST\"}[5m])) by (verb, url, le))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "Saturation", - "legendLink": null, - "step": 10 + "legendFormat": "{{verb}} {{url}}", + "refId": "A" } ], "thresholds": [ @@ -1654,7 +1880,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Net Saturation (Dropped)", + "title": "Post Request Latency 99th Quantile", "tooltip": { "shared": false, "sort": 0, @@ -1672,7 +1898,7 @@ items: }, "yaxes": [ { - "format": "Bps", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -1680,12 +1906,12 @@ items: "show": true }, { - "format": "short", + "format": "s", "label": null, "logBase": 1, "max": null, - "min": null, - "show": false + "min": 0, + "show": true } ] } @@ -1693,13 +1919,14 @@ items: "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": true, - "title": "Net", - "titleSize": "h6" + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" }, { "collapse": false, - "height": "250px", + "collapsed": false, "panels": [ { "aliasColors": { @@ -1710,26 +1937,32 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 9, + "gridPos": { + + }, + "id": 8, "legend": { + "alignAsTable": "true", "avg": false, - "current": false, + "current": "true", "max": false, "min": false, - "show": true, + "rightSide": "true", + "show": "true", "total": false, - "values": false + "values": "true" }, "lines": true, "linewidth": 1, "links": [ ], - "nullPointMode": "null as zero", + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, "seriesOverrides": [ ], @@ -1739,12 +1972,11 @@ items: "steppedLine": false, "targets": [ { - "expr": "node:node_filesystem_usage:{cluster=\"$cluster\"}\n* on (namespace, pod) group_left (node) node_namespace_pod:kube_pod_info:{cluster=\"$cluster\", node=\"$node\"}\n", + "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_latency_seconds_bucket{job=\"kube-controller-manager\", instance=~\"$instance\", verb=\"GET\"}[5m])) by (verb, url, le))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{device}}", - "legendLink": null, - "step": 10 + "legendFormat": "{{verb}} {{url}}", + "refId": "A" } ], "thresholds": [ @@ -1752,7 +1984,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Disk Utilisation", + "title": "Get Request Latency 99th Quantile", "tooltip": { "shared": false, "sort": 0, @@ -1770,7 +2002,7 @@ items: }, "yaxes": [ { - "format": "percentunit", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -1778,12 +2010,12 @@ items: "show": true }, { - "format": "short", + "format": "s", "label": null, "logBase": 1, "max": null, - "min": null, - "show": false + "min": 0, + "show": true } ] } @@ -1791,206 +2023,78 @@ items: "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": true, - "title": "Disk", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "kubernetes-mixin" - ], - "templating": { - "list": [ - { - "current": { - "text": "Prometheus", - "value": "Prometheus" - }, - "hide": 0, - "label": null, - "name": "datasource", - "options": [ + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { - ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 2, - "includeAll": false, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { - ], - "query": "label_values(kube_node_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ + }, + "id": 9, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ - ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "node", - "multi": false, - "name": "node", - "options": [ + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ - ], - "query": "label_values(kube_node_info{cluster=\"$cluster\"}, node)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "process_resident_memory_bytes{job=\"kube-controller-manager\",instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": [ - ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "", - "title": "Kubernetes / USE Method / Node", - "uid": "4ac4f123aae0ff6dbaf4f4f66120033b", - "version": 0 - } - kind: ConfigMap - metadata: - name: grafana-dashboard-k8s-node-rsrc-use - namespace: monitoring -- apiVersion: v1 - data: - k8s-resources-cluster.json: |- - { - "annotations": { - "list": [ - - ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "links": [ - - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "100px", - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "format": "percentunit", - "id": 1, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "1 - avg(rate(node_cpu_seconds_total{mode=\"idle\", cluster=\"$cluster\"}[1m]))", - "format": "time_series", - "instant": true, - "intervalFactor": 2, - "refId": "A" - } ], - "thresholds": "70,80", "timeFrom": null, "timeShift": null, - "title": "CPU Utilisation", + "title": "Memory", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "type": "singlestat", + "type": "graph", "xaxis": { "buckets": null, "mode": "time", @@ -2002,20 +2106,20 @@ items: }, "yaxes": [ { - "format": "short", + "format": "bytes", "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true }, { - "format": "short", + "format": "bytes", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ] }, @@ -2028,13 +2132,17 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, - "format": "percentunit", - "id": 2, + "gridPos": { + + }, + "id": 10, "legend": { + "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, + "rightSide": false, "show": true, "total": false, "values": false @@ -2044,37 +2152,40 @@ items: "links": [ ], - "nullPointMode": "null as zero", + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 2, + "span": 4, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\"}) / sum(node:node_num_cpu:sum{cluster=\"$cluster\"})", + "expr": "rate(process_cpu_seconds_total{job=\"kube-controller-manager\",instance=~\"$instance\"}[5m])", "format": "time_series", - "instant": true, "intervalFactor": 2, + "legendFormat": "{{instance}}", "refId": "A" } ], - "thresholds": "70,80", + "thresholds": [ + + ], "timeFrom": null, "timeShift": null, - "title": "CPU Requests Commitment", + "title": "CPU usage", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "type": "singlestat", + "type": "graph", "xaxis": { "buckets": null, "mode": "time", @@ -2098,8 +2209,8 @@ items: "label": null, "logBase": 1, "max": null, - "min": null, - "show": false + "min": 0, + "show": true } ] }, @@ -2112,13 +2223,17 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, - "format": "percentunit", - "id": 3, + "gridPos": { + + }, + "id": 11, "legend": { + "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, + "rightSide": false, "show": true, "total": false, "values": false @@ -2128,37 +2243,40 @@ items: "links": [ ], - "nullPointMode": "null as zero", + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 2, + "span": 4, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\"}) / sum(node:node_num_cpu:sum{cluster=\"$cluster\"})", + "expr": "go_goroutines{job=\"kube-controller-manager\",instance=~\"$instance\"}", "format": "time_series", - "instant": true, "intervalFactor": 2, + "legendFormat": "{{instance}}", "refId": "A" } ], - "thresholds": "70,80", + "thresholds": [ + + ], "timeFrom": null, "timeShift": null, - "title": "CPU Limits Commitment", + "title": "Goroutines", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "type": "singlestat", + "type": "graph", "xaxis": { "buckets": null, "mode": "time", @@ -2174,7 +2292,7 @@ items: "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true }, { @@ -2183,66 +2301,189 @@ items: "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" }, - { - "aliasColors": { + "hide": 0, + "label": null, + "name": "datasource", + "options": [ - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "format": "percentunit", - "id": 4, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": null, + "multi": false, + "name": "instance", + "options": [ - ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "1 - sum(:node_memory_MemFreeCachedBuffers_bytes:sum{cluster=\"$cluster\"}) / sum(:node_memory_MemTotal_bytes:sum{cluster=\"$cluster\"})", - "format": "time_series", - "instant": true, - "intervalFactor": 2, - "refId": "A" - } - ], - "thresholds": "70,80", + ], + "query": "label_values(process_cpu_seconds_total{job=\"kube-controller-manager\"}, instance)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Kubernetes / Controller Manager", + "uid": "72e0e05bef5099e5f049b05fdc429ed4", + "version": 0 + } + kind: ConfigMap + metadata: + name: grafana-dashboard-controller-manager + namespace: monitoring +- apiVersion: v1 + data: + k8s-cluster-rsrc-use.json: |- + { + "annotations": { + "list": [ + + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "node:cluster_cpu_utilisation:ratio{cluster=\"$cluster\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{node}}", + "legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use", + "step": 10 + } + ], + "thresholds": [ + + ], "timeFrom": null, "timeShift": null, - "title": "Memory Utilisation", + "title": "CPU Utilisation", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "type": "singlestat", + "type": "graph", "xaxis": { "buckets": null, "mode": "time", @@ -2254,10 +2495,10 @@ items: }, "yaxes": [ { - "format": "short", + "format": "percentunit", "label": null, "logBase": 1, - "max": null, + "max": 1, "min": 0, "show": true }, @@ -2279,9 +2520,8 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "format": "percentunit", - "id": 5, + "fill": 10, + "id": 2, "legend": { "avg": false, "current": false, @@ -2292,7 +2532,7 @@ items: "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], @@ -2305,28 +2545,31 @@ items: ], "spaceLength": 10, - "span": 2, - "stack": false, + "span": 6, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\"}) / sum(:node_memory_MemTotal_bytes:sum{cluster=\"$cluster\"})", + "expr": "node:node_cpu_saturation_load1:{cluster=\"$cluster\"} / scalar(sum(min(kube_pod_info{cluster=\"$cluster\"}) by (node)))", "format": "time_series", - "instant": true, "intervalFactor": 2, - "refId": "A" + "legendFormat": "{{node}}", + "legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use", + "step": 10 } ], - "thresholds": "70,80", + "thresholds": [ + + ], "timeFrom": null, "timeShift": null, - "title": "Memory Requests Commitment", + "title": "CPU Saturation (Load1)", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "type": "singlestat", + "type": "graph", "xaxis": { "buckets": null, "mode": "time", @@ -2338,10 +2581,10 @@ items: }, "yaxes": [ { - "format": "short", + "format": "percentunit", "label": null, "logBase": 1, - "max": null, + "max": 1, "min": 0, "show": true }, @@ -2354,7 +2597,19 @@ items: "show": false } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ { "aliasColors": { @@ -2363,9 +2618,8 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "format": "percentunit", - "id": 6, + "fill": 10, + "id": 3, "legend": { "avg": false, "current": false, @@ -2376,7 +2630,7 @@ items: "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], @@ -2389,28 +2643,31 @@ items: ], "spaceLength": 10, - "span": 2, - "stack": false, + "span": 6, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\"}) / sum(:node_memory_MemTotal_bytes:sum{cluster=\"$cluster\"})", + "expr": "node:cluster_memory_utilisation:ratio{cluster=\"$cluster\"}", "format": "time_series", - "instant": true, "intervalFactor": 2, - "refId": "A" + "legendFormat": "{{node}}", + "legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use", + "step": 10 } ], - "thresholds": "70,80", + "thresholds": [ + + ], "timeFrom": null, "timeShift": null, - "title": "Memory Limits Commitment", + "title": "Memory Utilisation", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "type": "singlestat", + "type": "graph", "xaxis": { "buckets": null, "mode": "time", @@ -2422,10 +2679,10 @@ items: }, "yaxes": [ { - "format": "short", + "format": "percentunit", "label": null, "logBase": 1, - "max": null, + "max": 1, "min": 0, "show": true }, @@ -2438,19 +2695,7 @@ items: "show": false } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Headlines", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ + }, { "aliasColors": { @@ -2460,7 +2705,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 7, + "id": 4, "legend": { "avg": false, "current": false, @@ -2484,16 +2729,16 @@ items: ], "spaceLength": 10, - "span": 12, + "span": 6, "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace)", + "expr": "node:node_memory_swap_io_bytes:sum_rate{cluster=\"$cluster\"}", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{namespace}}", - "legendLink": null, + "legendFormat": "{{node}}", + "legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use", "step": 10 } ], @@ -2502,7 +2747,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "CPU Usage", + "title": "Memory Saturation (Swap I/O)", "tooltip": { "shared": false, "sort": 0, @@ -2520,7 +2765,7 @@ items: }, "yaxes": [ { - "format": "short", + "format": "Bps", "label": null, "logBase": 1, "max": null, @@ -2542,7 +2787,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "CPU", + "title": "Memory", "titleSize": "h6" }, { @@ -2557,8 +2802,8 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "id": 8, + "fill": 10, + "id": 5, "legend": { "avg": false, "current": false, @@ -2569,7 +2814,7 @@ items: "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], @@ -2582,238 +2827,16 @@ items: ], "spaceLength": 10, - "span": 12, - "stack": false, + "span": 6, + "stack": true, "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, + "targets": [ { - "alias": "Pods", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "link": true, - "linkTooltip": "Drill down to pods", - "linkUrl": "/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1", - "pattern": "Value #A", - "thresholds": [ - - ], - "type": "number", - "unit": "short" - }, - { - "alias": "Workloads", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "link": true, - "linkTooltip": "Drill down to workloads", - "linkUrl": "/d/a87fb0d919ec0ea5f6543124e16c42a5/k8s-resources-workloads-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1", - "pattern": "Value #B", - "thresholds": [ - - ], - "type": "number", - "unit": "short" - }, - { - "alias": "CPU Usage", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #C", - "thresholds": [ - - ], - "type": "number", - "unit": "short" - }, - { - "alias": "CPU Requests", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #D", - "thresholds": [ - - ], - "type": "number", - "unit": "short" - }, - { - "alias": "CPU Requests %", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #E", - "thresholds": [ - - ], - "type": "number", - "unit": "percentunit" - }, - { - "alias": "CPU Limits", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #F", - "thresholds": [ - - ], - "type": "number", - "unit": "short" - }, - { - "alias": "CPU Limits %", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #G", - "thresholds": [ - - ], - "type": "number", - "unit": "percentunit" - }, - { - "alias": "Namespace", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": true, - "linkTooltip": "Drill down to pods", - "linkUrl": "/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell", - "pattern": "namespace", - "thresholds": [ - - ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ - - ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "count(mixin_pod_workload{cluster=\"$cluster\"}) by (namespace)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "A", - "step": 10 - }, - { - "expr": "count(avg(mixin_pod_workload{cluster=\"$cluster\"}) by (workload, namespace)) by (namespace)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "B", - "step": 10 - }, - { - "expr": "sum(namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "C", - "step": 10 - }, - { - "expr": "sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\"}) by (namespace)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "D", - "step": 10 - }, - { - "expr": "sum(namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace) / sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\"}) by (namespace)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "E", - "step": 10 - }, - { - "expr": "sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\"}) by (namespace)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "F", - "step": 10 - }, - { - "expr": "sum(namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace) / sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\"}) by (namespace)", - "format": "table", - "instant": true, + "expr": "node:node_disk_utilisation:avg_irate{cluster=\"$cluster\"} / scalar(:kube_pod_info_node_count:{cluster=\"$cluster\"})", + "format": "time_series", "intervalFactor": 2, - "legendFormat": "", - "refId": "G", + "legendFormat": "{{node}}", + "legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use", "step": 10 } ], @@ -2822,14 +2845,13 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "CPU Quota", + "title": "Disk IO Utilisation", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "transform": "table", - "type": "table", + "type": "graph", "xaxis": { "buckets": null, "mode": "time", @@ -2841,10 +2863,10 @@ items: }, "yaxes": [ { - "format": "short", + "format": "percentunit", "label": null, "logBase": 1, - "max": null, + "max": 1, "min": 0, "show": true }, @@ -2857,19 +2879,7 @@ items: "show": false } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "CPU Quota", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ + }, { "aliasColors": { @@ -2879,7 +2889,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 9, + "id": 6, "legend": { "avg": false, "current": false, @@ -2903,16 +2913,16 @@ items: ], "spaceLength": 10, - "span": 12, + "span": 6, "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(container_memory_rss{cluster=\"$cluster\", container_name!=\"\"}) by (namespace)", + "expr": "node:node_disk_saturation:avg_irate{cluster=\"$cluster\"} / scalar(:kube_pod_info_node_count:{cluster=\"$cluster\"})", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{namespace}}", - "legendLink": null, + "legendFormat": "{{node}}", + "legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use", "step": 10 } ], @@ -2921,7 +2931,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Memory Usage (w/o cache)", + "title": "Disk IO Saturation", "tooltip": { "shared": false, "sort": 0, @@ -2939,10 +2949,10 @@ items: }, "yaxes": [ { - "format": "bytes", + "format": "percentunit", "label": null, "logBase": 1, - "max": null, + "max": 1, "min": 0, "show": true }, @@ -2961,7 +2971,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Memory", + "title": "Disk", "titleSize": "h6" }, { @@ -2976,8 +2986,8 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "id": 10, + "fill": 10, + "id": 7, "legend": { "avg": false, "current": false, @@ -2988,7 +2998,7 @@ items: "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], @@ -3001,238 +3011,16 @@ items: ], "spaceLength": 10, - "span": 12, - "stack": false, + "span": 6, + "stack": true, "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "Pods", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "link": true, - "linkTooltip": "Drill down to pods", - "linkUrl": "/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1", - "pattern": "Value #A", - "thresholds": [ - - ], - "type": "number", - "unit": "short" - }, - { - "alias": "Workloads", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "link": true, - "linkTooltip": "Drill down to workloads", - "linkUrl": "/d/a87fb0d919ec0ea5f6543124e16c42a5/k8s-resources-workloads-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1", - "pattern": "Value #B", - "thresholds": [ - - ], - "type": "number", - "unit": "short" - }, - { - "alias": "Memory Usage", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #C", - "thresholds": [ - - ], - "type": "number", - "unit": "bytes" - }, - { - "alias": "Memory Requests", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #D", - "thresholds": [ - - ], - "type": "number", - "unit": "bytes" - }, - { - "alias": "Memory Requests %", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #E", - "thresholds": [ - - ], - "type": "number", - "unit": "percentunit" - }, - { - "alias": "Memory Limits", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #F", - "thresholds": [ - - ], - "type": "number", - "unit": "bytes" - }, - { - "alias": "Memory Limits %", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #G", - "thresholds": [ - - ], - "type": "number", - "unit": "percentunit" - }, - { - "alias": "Namespace", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": true, - "linkTooltip": "Drill down to pods", - "linkUrl": "/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell", - "pattern": "namespace", - "thresholds": [ - - ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ - - ], - "type": "string", - "unit": "short" - } - ], "targets": [ { - "expr": "count(mixin_pod_workload{cluster=\"$cluster\"}) by (namespace)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "A", - "step": 10 - }, - { - "expr": "count(avg(mixin_pod_workload{cluster=\"$cluster\"}) by (workload, namespace)) by (namespace)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "B", - "step": 10 - }, - { - "expr": "sum(container_memory_rss{cluster=\"$cluster\", container_name!=\"\"}) by (namespace)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "C", - "step": 10 - }, - { - "expr": "sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\"}) by (namespace)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "D", - "step": 10 - }, - { - "expr": "sum(container_memory_rss{cluster=\"$cluster\", container_name!=\"\"}) by (namespace) / sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\"}) by (namespace)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "E", - "step": 10 - }, - { - "expr": "sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\"}) by (namespace)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "F", - "step": 10 - }, - { - "expr": "sum(container_memory_rss{cluster=\"$cluster\", container_name!=\"\"}) by (namespace) / sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\"}) by (namespace)", - "format": "table", - "instant": true, + "expr": "node:node_net_utilisation:sum_irate{cluster=\"$cluster\"}", + "format": "time_series", "intervalFactor": 2, - "legendFormat": "", - "refId": "G", + "legendFormat": "{{node}}", + "legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use", "step": 10 } ], @@ -3241,14 +3029,13 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Requests by Namespace", + "title": "Net Utilisation (Transmitted)", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "transform": "table", - "type": "table", + "type": "graph", "xaxis": { "buckets": null, "mode": "time", @@ -3260,7 +3047,7 @@ items: }, "yaxes": [ { - "format": "short", + "format": "Bps", "label": null, "logBase": 1, "max": null, @@ -3276,22 +3063,206 @@ items: "show": false } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Memory Requests", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "kubernetes-mixin" - ], - "templating": { + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "node:node_net_saturation:sum_irate{cluster=\"$cluster\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{node}}", + "legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Net Saturation (Dropped)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(max(node_filesystem_size_bytes{fstype=~\"ext[234]|btrfs|xfs|zfs\", cluster=\"$cluster\"} - node_filesystem_avail_bytes{fstype=~\"ext[234]|btrfs|xfs|zfs\", cluster=\"$cluster\"}) by (device,pod,namespace)) by (pod,namespace)\n/ scalar(sum(max(node_filesystem_size_bytes{fstype=~\"ext[234]|btrfs|xfs|zfs\", cluster=\"$cluster\"}) by (device,pod,namespace)))\n* on (namespace, pod) group_left (node) node_namespace_pod:kube_pod_info:{cluster=\"$cluster\"}\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{node}}", + "legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Disk Capacity", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Storage", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { "list": [ { "current": { @@ -3324,7 +3295,7 @@ items: "options": [ ], - "query": "label_values(node_cpu_seconds_total, cluster)", + "query": "label_values(kube_node_info, cluster)", "refresh": 1, "regex": "", "sort": 2, @@ -3368,17 +3339,17 @@ items: ] }, "timezone": "", - "title": "Kubernetes / Compute Resources / Cluster", - "uid": "efa86fd1d0c121a26444b636a3f509a8", + "title": "Kubernetes / USE Method / Cluster", + "uid": "a6e7d1362e1ddbb79db21d5bb40d7137", "version": 0 } kind: ConfigMap metadata: - name: grafana-dashboard-k8s-resources-cluster + name: grafana-dashboard-k8s-cluster-rsrc-use namespace: monitoring - apiVersion: v1 data: - k8s-resources-namespace.json: |- + k8s-node-rsrc-use.json: |- { "annotations": { "list": [ @@ -3406,7 +3377,7 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 10, + "fill": 1, "id": 1, "legend": { "avg": false, @@ -3418,7 +3389,7 @@ items: "values": false }, "lines": true, - "linewidth": 0, + "linewidth": 1, "links": [ ], @@ -3431,15 +3402,15 @@ items: ], "spaceLength": 10, - "span": 12, - "stack": true, + "span": 6, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod_name)", + "expr": "node:node_cpu_utilisation:avg1m{cluster=\"$cluster\", node=\"$node\"}", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{pod_name}}", + "legendFormat": "Utilisation", "legendLink": null, "step": 10 } @@ -3449,7 +3420,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "CPU Usage", + "title": "CPU Utilisation", "tooltip": { "shared": false, "sort": 0, @@ -3467,7 +3438,7 @@ items: }, "yaxes": [ { - "format": "short", + "format": "percentunit", "label": null, "logBase": 1, "max": null, @@ -3483,19 +3454,7 @@ items: "show": false } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "CPU Usage", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ + }, { "aliasColors": { @@ -3529,184 +3488,16 @@ items: ], "spaceLength": 10, - "span": 12, + "span": 6, "stack": false, "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, + "targets": [ { - "alias": "CPU Usage", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ - - ], - "type": "number", - "unit": "short" - }, - { - "alias": "CPU Requests", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #B", - "thresholds": [ - - ], - "type": "number", - "unit": "short" - }, - { - "alias": "CPU Requests %", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #C", - "thresholds": [ - - ], - "type": "number", - "unit": "percentunit" - }, - { - "alias": "CPU Limits", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #D", - "thresholds": [ - - ], - "type": "number", - "unit": "short" - }, - { - "alias": "CPU Limits %", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #E", - "thresholds": [ - - ], - "type": "number", - "unit": "percentunit" - }, - { - "alias": "Pod", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": true, - "linkTooltip": "Drill down", - "linkUrl": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell", - "pattern": "pod", - "thresholds": [ - - ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ - - ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "sum(label_replace(namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}, \"pod\", \"$1\", \"pod_name\", \"(.*)\")) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "A", - "step": 10 - }, - { - "expr": "sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "B", - "step": 10 - }, - { - "expr": "sum(label_replace(namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}, \"pod\", \"$1\", \"pod_name\", \"(.*)\")) by (pod) / sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "C", - "step": 10 - }, - { - "expr": "sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "D", - "step": 10 - }, - { - "expr": "sum(label_replace(namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}, \"pod\", \"$1\", \"pod_name\", \"(.*)\")) by (pod) / sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", - "format": "table", - "instant": true, + "expr": "node:node_cpu_saturation_load1:{cluster=\"$cluster\", node=\"$node\"}", + "format": "time_series", "intervalFactor": 2, - "legendFormat": "", - "refId": "E", + "legendFormat": "Saturation", + "legendLink": null, "step": 10 } ], @@ -3715,14 +3506,13 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "CPU Quota", + "title": "CPU Saturation (Load1)", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "transform": "table", - "type": "table", + "type": "graph", "xaxis": { "buckets": null, "mode": "time", @@ -3734,7 +3524,7 @@ items: }, "yaxes": [ { - "format": "short", + "format": "percentunit", "label": null, "logBase": 1, "max": null, @@ -3756,7 +3546,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "CPU Quota", + "title": "CPU", "titleSize": "h6" }, { @@ -3771,7 +3561,7 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 10, + "fill": 1, "id": 3, "legend": { "avg": false, @@ -3783,7 +3573,7 @@ items: "values": false }, "lines": true, - "linewidth": 0, + "linewidth": 1, "links": [ ], @@ -3796,15 +3586,15 @@ items: ], "spaceLength": 10, - "span": 12, - "stack": true, + "span": 6, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container_name!=\"\"}) by (pod_name)", + "expr": "node:node_memory_utilisation:{cluster=\"$cluster\", node=\"$node\"}", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{pod_name}}", + "legendFormat": "Memory", "legendLink": null, "step": 10 } @@ -3814,7 +3604,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Memory Usage (w/o cache)", + "title": "Memory Utilisation", "tooltip": { "shared": false, "sort": 0, @@ -3832,7 +3622,7 @@ items: }, "yaxes": [ { - "format": "bytes", + "format": "percentunit", "label": null, "logBase": 1, "max": null, @@ -3848,19 +3638,7 @@ items: "show": false } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Memory Usage", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ + }, { "aliasColors": { @@ -3894,265 +3672,384 @@ items: ], "spaceLength": 10, - "span": 12, + "span": 6, "stack": false, "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, + "targets": [ { - "alias": "Memory Usage", - "colorMode": null, - "colors": [ + "expr": "node:node_memory_swap_io_bytes:sum_rate{cluster=\"$cluster\", node=\"$node\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Swap IO", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Saturation (Swap I/O)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ - ], - "type": "number", - "unit": "bytes" + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true }, { - "alias": "Memory Requests", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #B", - "thresholds": [ + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memory", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { - ], - "type": "number", - "unit": "bytes" - }, - { - "alias": "Memory Requests %", - "colorMode": null, - "colors": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #C", - "thresholds": [ + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ - ], - "type": "number", - "unit": "percentunit" - }, + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ { - "alias": "Memory Limits", - "colorMode": null, - "colors": [ + "expr": "node:node_disk_utilisation:avg_irate{cluster=\"$cluster\", node=\"$node\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Utilisation", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #D", - "thresholds": [ + ], + "timeFrom": null, + "timeShift": null, + "title": "Disk IO Utilisation", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ - ], - "type": "number", - "unit": "bytes" - }, + ] + }, + "yaxes": [ { - "alias": "Memory Limits %", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #E", - "thresholds": [ - - ], - "type": "number", - "unit": "percentunit" + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true }, { - "alias": "Memory Usage (RSS)", - "colorMode": null, - "colors": [ + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #F", - "thresholds": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ - ], - "type": "number", - "unit": "bytes" - }, + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ { - "alias": "Memory Usage (Cache)", - "colorMode": null, - "colors": [ + "expr": "node:node_disk_saturation:avg_irate{cluster=\"$cluster\", node=\"$node\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Saturation", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #G", - "thresholds": [ + ], + "timeFrom": null, + "timeShift": null, + "title": "Disk IO Saturation", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ - ], - "type": "number", - "unit": "bytes" - }, + ] + }, + "yaxes": [ { - "alias": "Memory Usage (Swap", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #H", - "thresholds": [ - - ], - "type": "number", - "unit": "bytes" + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true }, { - "alias": "Pod", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": true, - "linkTooltip": "Drill down", - "linkUrl": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell", - "pattern": "pod", - "thresholds": [ + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Disk", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { - ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ - ], - "type": "string", - "unit": "short" - } ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "sum(label_replace(container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container_name!=\"\"}, \"pod\", \"$1\", \"pod_name\", \"(.*)\")) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "A", - "step": 10 - }, - { - "expr": "sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "B", - "step": 10 - }, - { - "expr": "sum(label_replace(container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container_name!=\"\"}, \"pod\", \"$1\", \"pod_name\", \"(.*)\")) by (pod) / sum(kube_pod_container_resource_requests_memory_bytes{namespace=\"$namespace\"}) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "C", - "step": 10 - }, - { - "expr": "sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "D", - "step": 10 - }, - { - "expr": "sum(label_replace(container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container_name!=\"\"}, \"pod\", \"$1\", \"pod_name\", \"(.*)\")) by (pod) / sum(kube_pod_container_resource_limits_memory_bytes{namespace=\"$namespace\"}) by (pod)", - "format": "table", - "instant": true, + "expr": "node:node_net_utilisation:sum_irate{cluster=\"$cluster\", node=\"$node\"}", + "format": "time_series", "intervalFactor": 2, - "legendFormat": "", - "refId": "E", + "legendFormat": "Utilisation", + "legendLink": null, "step": 10 - }, + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Net Utilisation (Transmitted)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ { - "expr": "sum(label_replace(container_memory_rss{cluster=\"$cluster\", namespace=\"$namespace\",container_name!=\"\"}, \"pod\", \"$1\", \"pod_name\", \"(.*)\")) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "F", - "step": 10 + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true }, { - "expr": "sum(label_replace(container_memory_cache{cluster=\"$cluster\", namespace=\"$namespace\",container_name!=\"\"}, \"pod\", \"$1\", \"pod_name\", \"(.*)\")) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "G", - "step": 10 - }, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ { - "expr": "sum(label_replace(container_memory_swap{cluster=\"$cluster\", namespace=\"$namespace\",container_name!=\"\"}, \"pod\", \"$1\", \"pod_name\", \"(.*)\")) by (pod)", - "format": "table", - "instant": true, + "expr": "node:node_net_saturation:sum_irate{cluster=\"$cluster\", node=\"$node\"}", + "format": "time_series", "intervalFactor": 2, - "legendFormat": "", - "refId": "H", + "legendFormat": "Saturation", + "legendLink": null, "step": 10 } ], @@ -4161,14 +4058,13 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Memory Quota", + "title": "Net Saturation (Dropped)", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "transform": "table", - "type": "table", + "type": "graph", "xaxis": { "buckets": null, "mode": "time", @@ -4180,7 +4076,7 @@ items: }, "yaxes": [ { - "format": "short", + "format": "Bps", "label": null, "logBase": 1, "max": null, @@ -4202,14 +4098,112 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Memory Quota", + "title": "Net", "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "kubernetes-mixin" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node:node_filesystem_usage:{cluster=\"$cluster\"}\n* on (namespace, pod) group_left (node) node_namespace_pod:kube_pod_info:{cluster=\"$cluster\", node=\"$node\"}\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{device}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Disk Utilisation", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Disk", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" ], "templating": { "list": [ @@ -4244,7 +4238,7 @@ items: "options": [ ], - "query": "label_values(kube_pod_info, cluster)", + "query": "label_values(kube_node_info, cluster)", "refresh": 1, "regex": "", "sort": 2, @@ -4265,13 +4259,13 @@ items: "datasource": "$datasource", "hide": 0, "includeAll": false, - "label": "namespace", + "label": "node", "multi": false, - "name": "namespace", + "name": "node", "options": [ ], - "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)", + "query": "label_values(kube_node_info{cluster=\"$cluster\"}, node)", "refresh": 1, "regex": "", "sort": 2, @@ -4315,17 +4309,17 @@ items: ] }, "timezone": "", - "title": "Kubernetes / Compute Resources / Namespace (Pods)", - "uid": "85a562078cdf77779eaa1add43ccec1e", + "title": "Kubernetes / USE Method / Node", + "uid": "4ac4f123aae0ff6dbaf4f4f66120033b", "version": 0 } kind: ConfigMap metadata: - name: grafana-dashboard-k8s-resources-namespace + name: grafana-dashboard-k8s-node-rsrc-use namespace: monitoring - apiVersion: v1 data: - k8s-resources-pod.json: |- + k8s-resources-cluster.json: |- { "annotations": { "list": [ @@ -4343,7 +4337,7 @@ items: "rows": [ { "collapse": false, - "height": "250px", + "height": "100px", "panels": [ { "aliasColors": { @@ -4353,7 +4347,8 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 10, + "fill": 1, + "format": "percentunit", "id": 1, "legend": { "avg": false, @@ -4365,7 +4360,7 @@ items: "values": false }, "lines": true, - "linewidth": 0, + "linewidth": 1, "links": [ ], @@ -4378,31 +4373,28 @@ items: ], "spaceLength": 10, - "span": 12, - "stack": true, + "span": 2, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{namespace=\"$namespace\", pod_name=\"$pod\", container_name!=\"POD\", cluster=\"$cluster\"}) by (container_name)", + "expr": "1 - avg(rate(node_cpu_seconds_total{mode=\"idle\", cluster=\"$cluster\"}[1m]))", "format": "time_series", + "instant": true, "intervalFactor": 2, - "legendFormat": "{{container_name}}", - "legendLink": null, - "step": 10 + "refId": "A" } ], - "thresholds": [ - - ], + "thresholds": "70,80", "timeFrom": null, "timeShift": null, - "title": "CPU Usage", + "title": "CPU Utilisation", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "singlestat", "xaxis": { "buckets": null, "mode": "time", @@ -4430,19 +4422,7 @@ items: "show": false } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "CPU Usage", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ + }, { "aliasColors": { @@ -4452,6 +4432,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, + "format": "percentunit", "id": 2, "legend": { "avg": false, @@ -4476,200 +4457,112 @@ items: ], "spaceLength": 10, - "span": 12, + "span": 2, "stack": false, "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, + "targets": [ { - "alias": "CPU Usage", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ + "expr": "sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable_cpu_cores{cluster=\"$cluster\"})", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "CPU Requests Commitment", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ - ], - "type": "number", - "unit": "short" - }, + ] + }, + "yaxes": [ { - "alias": "CPU Requests", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #B", - "thresholds": [ - - ], - "type": "number", - "unit": "short" + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true }, { - "alias": "CPU Requests %", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #C", - "thresholds": [ + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { - ], - "type": "number", - "unit": "percentunit" - }, - { - "alias": "CPU Limits", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #D", - "thresholds": [ - - ], - "type": "number", - "unit": "short" - }, - { - "alias": "CPU Limits %", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #E", - "thresholds": [ - - ], - "type": "number", - "unit": "percentunit" - }, - { - "alias": "Container", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "container", - "thresholds": [ - - ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "percentunit", + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ - ], - "type": "string", - "unit": "short" - } ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "sum(label_replace(namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod_name=\"$pod\", container_name!=\"POD\"}, \"container\", \"$1\", \"container_name\", \"(.*)\")) by (container)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "A", - "step": 10 - }, - { - "expr": "sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "B", - "step": 10 - }, - { - "expr": "sum(label_replace(namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod_name=\"$pod\"}, \"container\", \"$1\", \"container_name\", \"(.*)\")) by (container) / sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "C", - "step": 10 - }, - { - "expr": "sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "D", - "step": 10 - }, - { - "expr": "sum(label_replace(namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod_name=\"$pod\"}, \"container\", \"$1\", \"container_name\", \"(.*)\")) by (container) / sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", - "format": "table", + "expr": "sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable_cpu_cores{cluster=\"$cluster\"})", + "format": "time_series", "instant": true, "intervalFactor": 2, - "legendFormat": "", - "refId": "E", - "step": 10 + "refId": "A" } ], - "thresholds": [ - - ], + "thresholds": "70,80", "timeFrom": null, "timeShift": null, - "title": "CPU Quota", + "title": "CPU Limits Commitment", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "transform": "table", - "type": "table", + "type": "singlestat", "xaxis": { "buckets": null, "mode": "time", @@ -4697,19 +4590,7 @@ items: "show": false } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "CPU Quota", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ + }, { "aliasColors": { @@ -4718,8 +4599,9 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 10, - "id": 3, + "fill": 1, + "format": "percentunit", + "id": 4, "legend": { "avg": false, "current": false, @@ -4730,7 +4612,7 @@ items: "values": false }, "lines": true, - "linewidth": 0, + "linewidth": 1, "links": [ ], @@ -4743,47 +4625,28 @@ items: ], "spaceLength": 10, - "span": 12, - "stack": true, + "span": 2, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(container_memory_rss{cluster=\"$cluster\", namespace=\"$namespace\", pod_name=\"$pod\", container_name!=\"POD\", container_name!=\"\"}) by (container_name)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{container_name}} (RSS)", - "legendLink": null, - "step": 10 - }, - { - "expr": "sum(container_memory_cache{cluster=\"$cluster\", namespace=\"$namespace\", pod_name=\"$pod\", container_name!=\"POD\", container_name!=\"\"}) by (container_name)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{container_name}} (Cache)", - "legendLink": null, - "step": 10 - }, - { - "expr": "sum(container_memory_swap{cluster=\"$cluster\", namespace=\"$namespace\", pod_name=\"$pod\", container_name!=\"POD\", container_name!=\"\"}) by (container_name)", + "expr": "1 - sum(:node_memory_MemFreeCachedBuffers_bytes:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable_memory_bytes{cluster=\"$cluster\"})", "format": "time_series", + "instant": true, "intervalFactor": 2, - "legendFormat": "{{container_name}} (Swap)", - "legendLink": null, - "step": 10 + "refId": "A" } ], - "thresholds": [ - - ], + "thresholds": "70,80", "timeFrom": null, "timeShift": null, - "title": "Memory Usage", + "title": "Memory Utilisation", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "singlestat", "xaxis": { "buckets": null, "mode": "time", @@ -4795,7 +4658,7 @@ items: }, "yaxes": [ { - "format": "bytes", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -4811,19 +4674,7 @@ items: "show": false } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Memory Usage", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ + }, { "aliasColors": { @@ -4833,7 +4684,8 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 4, + "format": "percentunit", + "id": 5, "legend": { "avg": false, "current": false, @@ -4857,146 +4709,333 @@ items: ], "spaceLength": 10, - "span": 12, + "span": 2, "stack": false, "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, + "targets": [ { - "alias": "Memory Usage", - "colorMode": null, - "colors": [ + "expr": "sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable_memory_bytes{cluster=\"$cluster\"})", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Memory Requests Commitment", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ - - ], - "type": "number", - "unit": "bytes" - }, + ] + }, + "yaxes": [ { - "alias": "Memory Requests", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #B", - "thresholds": [ - - ], - "type": "number", - "unit": "bytes" + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true }, { - "alias": "Memory Requests %", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #C", - "thresholds": [ + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { - ], - "type": "number", - "unit": "percentunit" - }, - { - "alias": "Memory Limits", - "colorMode": null, - "colors": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "percentunit", + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #D", - "thresholds": [ + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ - ], - "type": "number", - "unit": "bytes" - }, + ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ { - "alias": "Memory Limits %", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #E", - "thresholds": [ + "expr": "sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable_memory_bytes{cluster=\"$cluster\"})", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Memory Limits Commitment", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ - ], - "type": "number", - "unit": "percentunit" - }, + ] + }, + "yaxes": [ { - "alias": "Memory Usage (RSS)", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #F", - "thresholds": [ - - ], - "type": "number", - "unit": "bytes" + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true }, { - "alias": "Memory Usage (Cache)", - "colorMode": null, - "colors": [ + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Headlines", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #G", - "thresholds": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ - ], - "type": "number", - "unit": "bytes" - }, + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ { - "alias": "Memory Usage (Swap", - "colorMode": null, - "colors": [ + "expr": "sum(namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{namespace}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Usage", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "Pods", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "link": true, + "linkTooltip": "Drill down to pods", + "linkUrl": "/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "Workloads", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "link": true, + "linkTooltip": "Drill down to workloads", + "linkUrl": "/d/a87fb0d919ec0ea5f6543124e16c42a5/k8s-resources-workloads-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Usage", + "colorMode": null, + "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", @@ -5004,15 +5043,15 @@ items: "link": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "Value #H", + "pattern": "Value #C", "thresholds": [ ], "type": "number", - "unit": "bytes" + "unit": "short" }, { - "alias": "Container", + "alias": "CPU Requests", "colorMode": null, "colors": [ @@ -5022,7 +5061,79 @@ items: "link": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "container", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Requests %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "CPU Limits", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Limits %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #G", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Namespace", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTooltip": "Drill down to pods", + "linkUrl": "/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell", + "pattern": "namespace", "thresholds": [ ], @@ -5047,7 +5158,7 @@ items: ], "targets": [ { - "expr": "sum(label_replace(container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod_name=\"$pod\", container_name!=\"POD\", container_name!=\"\"}, \"container\", \"$1\", \"container_name\", \"(.*)\")) by (container)", + "expr": "count(mixin_pod_workload{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -5056,7 +5167,7 @@ items: "step": 10 }, { - "expr": "sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "expr": "count(avg(mixin_pod_workload{cluster=\"$cluster\"}) by (workload, namespace)) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -5065,7 +5176,7 @@ items: "step": 10 }, { - "expr": "sum(label_replace(container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod_name=\"$pod\"}, \"container\", \"$1\", \"container_name\", \"(.*)\")) by (container) / sum(kube_pod_container_resource_requests_memory_bytes{namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "expr": "sum(namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -5074,7 +5185,7 @@ items: "step": 10 }, { - "expr": "sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\"}) by (container)", + "expr": "sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -5083,7 +5194,7 @@ items: "step": 10 }, { - "expr": "sum(label_replace(container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod_name=\"$pod\", container_name!=\"\"}, \"container\", \"$1\", \"container_name\", \"(.*)\")) by (container) / sum(kube_pod_container_resource_limits_memory_bytes{namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "expr": "sum(namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace) / sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -5092,7 +5203,7 @@ items: "step": 10 }, { - "expr": "sum(label_replace(container_memory_rss{cluster=\"$cluster\", namespace=\"$namespace\", pod_name=\"$pod\", container_name != \"\", container_name != \"POD\"}, \"container\", \"$1\", \"container_name\", \"(.*)\")) by (container)", + "expr": "sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -5101,22 +5212,13 @@ items: "step": 10 }, { - "expr": "sum(label_replace(container_memory_cache{cluster=\"$cluster\", namespace=\"$namespace\", pod_name=\"$pod\", container_name != \"\", container_name != \"POD\"}, \"container\", \"$1\", \"container_name\", \"(.*)\")) by (container)", + "expr": "sum(namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace) / sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "G", "step": 10 - }, - { - "expr": "sum(label_replace(container_memory_swap{cluster=\"$cluster\", namespace=\"$namespace\", pod_name=\"$pod\", container_name != \"\", container_name != \"POD\"}, \"container\", \"$1\", \"container_name\", \"(.*)\")) by (container)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "H", - "step": 10 } ], "thresholds": [ @@ -5124,7 +5226,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Memory Quota", + "title": "CPU Quota", "tooltip": { "shared": false, "sort": 0, @@ -5165,172 +5267,9 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Memory Quota", + "title": "CPU Quota", "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "kubernetes-mixin" - ], - "templating": { - "list": [ - { - "current": { - "text": "Prometheus", - "value": "Prometheus" - }, - "hide": 0, - "label": null, - "name": "datasource", - "options": [ - - ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 2, - "includeAll": false, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ - - ], - "query": "label_values(kube_pod_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ - - ], - "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "pod", - "multi": false, - "name": "pod", - "options": [ - - ], - "query": "label_values(kube_pod_info{cluster=\"$cluster\", namespace=\"$namespace\"}, pod)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "", - "title": "Kubernetes / Compute Resources / Pod", - "uid": "6581e46e4e5c7ba40a07646395ef7b23", - "version": 0 - } - kind: ConfigMap - metadata: - name: grafana-dashboard-k8s-resources-pod - namespace: monitoring -- apiVersion: v1 - data: - k8s-resources-workload.json: |- - { - "annotations": { - "list": [ - - ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "links": [ - - ], - "refresh": "10s", - "rows": [ + }, { "collapse": false, "height": "250px", @@ -5344,7 +5283,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 1, + "id": 9, "legend": { "avg": false, "current": false, @@ -5373,10 +5312,10 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(\n label_replace(\n namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"},\n \"pod\", \"$1\", \"pod_name\", \"(.*)\"\n ) * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "expr": "sum(container_memory_rss{cluster=\"$cluster\", container!=\"\"}) by (namespace)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{pod}}", + "legendFormat": "{{namespace}}", "legendLink": null, "step": 10 } @@ -5386,7 +5325,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "CPU Usage", + "title": "Memory Usage (w/o cache)", "tooltip": { "shared": false, "sort": 0, @@ -5404,7 +5343,7 @@ items: }, "yaxes": [ { - "format": "short", + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -5426,7 +5365,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "CPU Usage", + "title": "Memory", "titleSize": "h6" }, { @@ -5442,7 +5381,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 2, + "id": 10, "legend": { "avg": false, "current": false, @@ -5477,7 +5416,43 @@ items: "type": "hidden" }, { - "alias": "CPU Usage", + "alias": "Pods", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "link": true, + "linkTooltip": "Drill down to pods", + "linkUrl": "/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "Workloads", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "link": true, + "linkTooltip": "Drill down to workloads", + "linkUrl": "/d/a87fb0d919ec0ea5f6543124e16c42a5/k8s-resources-workloads-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "Memory Usage", "colorMode": null, "colors": [ @@ -5487,15 +5462,15 @@ items: "link": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "Value #A", + "pattern": "Value #C", "thresholds": [ ], "type": "number", - "unit": "short" + "unit": "bytes" }, { - "alias": "CPU Requests", + "alias": "Memory Requests", "colorMode": null, "colors": [ @@ -5505,15 +5480,15 @@ items: "link": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "Value #B", + "pattern": "Value #D", "thresholds": [ ], "type": "number", - "unit": "short" + "unit": "bytes" }, { - "alias": "CPU Requests %", + "alias": "Memory Requests %", "colorMode": null, "colors": [ @@ -5523,7 +5498,7 @@ items: "link": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "Value #C", + "pattern": "Value #E", "thresholds": [ ], @@ -5531,7 +5506,7 @@ items: "unit": "percentunit" }, { - "alias": "CPU Limits", + "alias": "Memory Limits", "colorMode": null, "colors": [ @@ -5541,15 +5516,15 @@ items: "link": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "Value #D", + "pattern": "Value #F", "thresholds": [ ], "type": "number", - "unit": "short" + "unit": "bytes" }, { - "alias": "CPU Limits %", + "alias": "Memory Limits %", "colorMode": null, "colors": [ @@ -5559,7 +5534,7 @@ items: "link": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "Value #E", + "pattern": "Value #G", "thresholds": [ ], @@ -5567,7 +5542,7 @@ items: "unit": "percentunit" }, { - "alias": "Pod", + "alias": "Namespace", "colorMode": null, "colors": [ @@ -5575,9 +5550,9 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": true, - "linkTooltip": "Drill down", - "linkUrl": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell", - "pattern": "pod", + "linkTooltip": "Drill down to pods", + "linkUrl": "/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell", + "pattern": "namespace", "thresholds": [ ], @@ -5602,7 +5577,7 @@ items: ], "targets": [ { - "expr": "sum(\n label_replace(\n namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"},\n \"pod\", \"$1\", \"pod_name\", \"(.*)\"\n ) * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "expr": "count(mixin_pod_workload{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -5611,7 +5586,7 @@ items: "step": 10 }, { - "expr": "sum(\n kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "expr": "count(avg(mixin_pod_workload{cluster=\"$cluster\"}) by (workload, namespace)) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -5620,7 +5595,7 @@ items: "step": 10 }, { - "expr": "sum(\n label_replace(\n namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"},\n \"pod\", \"$1\", \"pod_name\", \"(.*)\"\n ) * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "expr": "sum(container_memory_rss{cluster=\"$cluster\", container!=\"\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -5629,7 +5604,7 @@ items: "step": 10 }, { - "expr": "sum(\n kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "expr": "sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -5638,13 +5613,31 @@ items: "step": 10 }, { - "expr": "sum(\n label_replace(\n namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"},\n \"pod\", \"$1\", \"pod_name\", \"(.*)\"\n ) * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "expr": "sum(container_memory_rss{cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "E", "step": 10 + }, + { + "expr": "sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\"}) by (namespace)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 + }, + { + "expr": "sum(container_memory_rss{cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\"}) by (namespace)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "G", + "step": 10 } ], "thresholds": [ @@ -5652,7 +5645,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "CPU Quota", + "title": "Requests by Namespace", "tooltip": { "shared": false, "sort": 0, @@ -5693,23 +5686,132 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "CPU Quota", + "title": "Memory Requests", "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { - - }, + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(node_cpu_seconds_total, cluster)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Kubernetes / Compute Resources / Cluster", + "uid": "efa86fd1d0c121a26444b636a3f509a8", + "version": 0 + } + kind: ConfigMap + metadata: + name: grafana-dashboard-k8s-resources-cluster + namespace: monitoring +- apiVersion: v1 + data: + k8s-resources-namespace.json: |- + { + "annotations": { + "list": [ + + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 3, + "id": 1, "legend": { "avg": false, "current": false, @@ -5738,7 +5840,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(\n label_replace(\n container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container_name!=\"\"},\n \"pod\", \"$1\", \"pod_name\", \"(.*)\"\n ) * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n ) by (pod)\n", + "expr": "sum(namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", @@ -5751,7 +5853,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Memory Usage", + "title": "CPU Usage", "tooltip": { "shared": false, "sort": 0, @@ -5769,7 +5871,7 @@ items: }, "yaxes": [ { - "format": "bytes", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -5791,7 +5893,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Memory Usage", + "title": "CPU Usage", "titleSize": "h6" }, { @@ -5807,7 +5909,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 4, + "id": 2, "legend": { "avg": false, "current": false, @@ -5842,7 +5944,7 @@ items: "type": "hidden" }, { - "alias": "Memory Usage", + "alias": "CPU Usage", "colorMode": null, "colors": [ @@ -5857,10 +5959,10 @@ items: ], "type": "number", - "unit": "bytes" + "unit": "short" }, { - "alias": "Memory Requests", + "alias": "CPU Requests", "colorMode": null, "colors": [ @@ -5875,10 +5977,10 @@ items: ], "type": "number", - "unit": "bytes" + "unit": "short" }, { - "alias": "Memory Requests %", + "alias": "CPU Requests %", "colorMode": null, "colors": [ @@ -5896,7 +5998,7 @@ items: "unit": "percentunit" }, { - "alias": "Memory Limits", + "alias": "CPU Limits", "colorMode": null, "colors": [ @@ -5911,10 +6013,10 @@ items: ], "type": "number", - "unit": "bytes" + "unit": "short" }, { - "alias": "Memory Limits %", + "alias": "CPU Limits %", "colorMode": null, "colors": [ @@ -5967,7 +6069,7 @@ items: ], "targets": [ { - "expr": "sum(\n label_replace(\n container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container_name!=\"\"},\n \"pod\", \"$1\", \"pod_name\", \"(.*)\"\n ) * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n ) by (pod)\n", + "expr": "sum(namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -5976,7 +6078,7 @@ items: "step": 10 }, { - "expr": "sum(\n kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "expr": "sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -5985,7 +6087,7 @@ items: "step": 10 }, { - "expr": "sum(\n label_replace(\n container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container_name!=\"\"},\n \"pod\", \"$1\", \"pod_name\", \"(.*)\"\n ) * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n ) by (pod)\n/sum(\n kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "expr": "sum(namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -5994,7 +6096,7 @@ items: "step": 10 }, { - "expr": "sum(\n kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "expr": "sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -6003,7 +6105,7 @@ items: "step": 10 }, { - "expr": "sum(\n label_replace(\n container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container_name!=\"\"},\n \"pod\", \"$1\", \"pod_name\", \"(.*)\"\n ) * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n ) by (pod)\n/sum(\n kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "expr": "sum(namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -6017,7 +6119,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Memory Quota", + "title": "CPU Quota", "tooltip": { "shared": false, "sort": 0, @@ -6058,233 +6160,43 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Memory Quota", + "title": "CPU Quota", "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "kubernetes-mixin" - ], - "templating": { - "list": [ - { - "current": { - "text": "Prometheus", - "value": "Prometheus" - }, - "hide": 0, - "label": null, - "name": "datasource", - "options": [ - - ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 2, - "includeAll": false, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ - - ], - "query": "label_values(kube_pod_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { - ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ - ], - "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "workload", - "multi": false, - "name": "workload", - "options": [ - - ], - "query": "label_values(mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}, workload)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "type", - "multi": false, - "name": "type", - "options": [ - - ], - "query": "label_values(mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\"}, workload_type)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "", - "title": "Kubernetes / Compute Resources / Workload", - "uid": "a164a7f0339f99e89cea5cb47e9be617", - "version": 0 - } - kind: ConfigMap - metadata: - name: grafana-dashboard-k8s-resources-workload - namespace: monitoring -- apiVersion: v1 - data: - k8s-resources-workloads-namespace.json: |- - { - "annotations": { - "list": [ - - ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "links": [ - - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 10, - "id": 1, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 0, - "links": [ - - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], "spaceLength": 10, @@ -6293,10 +6205,10 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(\n label_replace(\n namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"},\n \"pod\", \"$1\", \"pod_name\", \"(.*)\"\n ) * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", + "expr": "sum(container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}) by (pod)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{workload}} - {{workload_type}}", + "legendFormat": "{{pod}}", "legendLink": null, "step": 10 } @@ -6306,7 +6218,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "CPU Usage", + "title": "Memory Usage (w/o cache)", "tooltip": { "shared": false, "sort": 0, @@ -6324,7 +6236,7 @@ items: }, "yaxes": [ { - "format": "short", + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -6346,7 +6258,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "CPU Usage", + "title": "Memory Usage", "titleSize": "h6" }, { @@ -6362,7 +6274,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 2, + "id": 4, "legend": { "avg": false, "current": false, @@ -6397,13 +6309,13 @@ items: "type": "hidden" }, { - "alias": "Running Pods", + "alias": "Memory Usage", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, + "decimals": 2, "link": false, "linkTooltip": "Drill down", "linkUrl": "", @@ -6412,10 +6324,10 @@ items: ], "type": "number", - "unit": "short" + "unit": "bytes" }, { - "alias": "CPU Usage", + "alias": "Memory Requests", "colorMode": null, "colors": [ @@ -6430,10 +6342,10 @@ items: ], "type": "number", - "unit": "short" + "unit": "bytes" }, { - "alias": "CPU Requests", + "alias": "Memory Requests %", "colorMode": null, "colors": [ @@ -6448,10 +6360,10 @@ items: ], "type": "number", - "unit": "short" + "unit": "percentunit" }, { - "alias": "CPU Requests %", + "alias": "Memory Limits", "colorMode": null, "colors": [ @@ -6466,10 +6378,10 @@ items: ], "type": "number", - "unit": "percentunit" + "unit": "bytes" }, { - "alias": "CPU Limits", + "alias": "Memory Limits %", "colorMode": null, "colors": [ @@ -6484,10 +6396,10 @@ items: ], "type": "number", - "unit": "short" + "unit": "percentunit" }, { - "alias": "CPU Limits %", + "alias": "Memory Usage (RSS)", "colorMode": null, "colors": [ @@ -6502,28 +6414,28 @@ items: ], "type": "number", - "unit": "percentunit" + "unit": "bytes" }, { - "alias": "Workload", + "alias": "Memory Usage (Cache)", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, - "link": true, + "link": false, "linkTooltip": "Drill down", - "linkUrl": "/d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=$__cell&var-type=$__cell_2", - "pattern": "workload", + "linkUrl": "", + "pattern": "Value #G", "thresholds": [ ], "type": "number", - "unit": "short" + "unit": "bytes" }, { - "alias": "Workload Type", + "alias": "Memory Usage (Swap", "colorMode": null, "colors": [ @@ -6533,7 +6445,25 @@ items: "link": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "workload_type", + "pattern": "Value #H", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Pod", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTooltip": "Drill down", + "linkUrl": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell", + "pattern": "pod", "thresholds": [ ], @@ -6558,7 +6488,7 @@ items: ], "targets": [ { - "expr": "count(mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}) by (workload, workload_type)", + "expr": "sum(container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -6567,7 +6497,7 @@ items: "step": 10 }, { - "expr": "sum(\n label_replace(\n namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"},\n \"pod\", \"$1\", \"pod_name\", \"(.*)\"\n ) * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", + "expr": "sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -6576,7 +6506,7 @@ items: "step": 10 }, { - "expr": "sum(\n kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", + "expr": "sum(container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod) / sum(kube_pod_container_resource_requests_memory_bytes{namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -6585,7 +6515,7 @@ items: "step": 10 }, { - "expr": "sum(\n label_replace(\n namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"},\n \"pod\", \"$1\", \"pod_name\", \"(.*)\"\n ) * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", + "expr": "sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -6594,7 +6524,7 @@ items: "step": 10 }, { - "expr": "sum(\n kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", + "expr": "sum(container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod) / sum(kube_pod_container_resource_limits_memory_bytes{namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -6603,13 +6533,31 @@ items: "step": 10 }, { - "expr": "sum(\n label_replace(\n namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"},\n \"pod\", \"$1\", \"pod_name\", \"(.*)\"\n ) * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", + "expr": "sum(container_memory_rss{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "F", "step": 10 + }, + { + "expr": "sum(container_memory_cache{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "G", + "step": 10 + }, + { + "expr": "sum(container_memory_swap{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "H", + "step": 10 } ], "thresholds": [ @@ -6617,7 +6565,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "CPU Quota", + "title": "Memory Quota", "tooltip": { "shared": false, "sort": 0, @@ -6658,23 +6606,159 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "CPU Quota", + "title": "Memory Quota", "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 10, - "id": 3, + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(kube_pod_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [ + + ], + "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Kubernetes / Compute Resources / Namespace (Pods)", + "uid": "85a562078cdf77779eaa1add43ccec1e", + "version": 0 + } + kind: ConfigMap + metadata: + name: grafana-dashboard-k8s-resources-namespace + namespace: monitoring +- apiVersion: v1 + data: + k8s-resources-pod.json: |- + { + "annotations": { + "list": [ + + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 1, "legend": { "avg": false, "current": false, @@ -6703,10 +6787,10 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(\n label_replace(\n container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container_name!=\"\"},\n \"pod\", \"$1\", \"pod_name\", \"(.*)\"\n ) * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n ) by (workload, workload_type)\n", + "expr": "sum(namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\", cluster=\"$cluster\"}) by (container)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{workload}} - {{workload_type}}", + "legendFormat": "{{container}}", "legendLink": null, "step": 10 } @@ -6716,7 +6800,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Memory Usage", + "title": "CPU Usage", "tooltip": { "shared": false, "sort": 0, @@ -6734,7 +6818,7 @@ items: }, "yaxes": [ { - "format": "bytes", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -6756,7 +6840,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Memory Usage", + "title": "CPU Usage", "titleSize": "h6" }, { @@ -6772,7 +6856,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 4, + "id": 2, "legend": { "avg": false, "current": false, @@ -6807,13 +6891,13 @@ items: "type": "hidden" }, { - "alias": "Running Pods", + "alias": "CPU Usage", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, + "decimals": 2, "link": false, "linkTooltip": "Drill down", "linkUrl": "", @@ -6825,7 +6909,7 @@ items: "unit": "short" }, { - "alias": "Memory Usage", + "alias": "CPU Requests", "colorMode": null, "colors": [ @@ -6840,10 +6924,10 @@ items: ], "type": "number", - "unit": "bytes" + "unit": "short" }, { - "alias": "Memory Requests", + "alias": "CPU Requests %", "colorMode": null, "colors": [ @@ -6858,10 +6942,10 @@ items: ], "type": "number", - "unit": "bytes" + "unit": "percentunit" }, { - "alias": "Memory Requests %", + "alias": "CPU Limits", "colorMode": null, "colors": [ @@ -6876,10 +6960,10 @@ items: ], "type": "number", - "unit": "percentunit" + "unit": "short" }, { - "alias": "Memory Limits", + "alias": "CPU Limits %", "colorMode": null, "colors": [ @@ -6892,48 +6976,12 @@ items: "pattern": "Value #E", "thresholds": [ - ], - "type": "number", - "unit": "bytes" - }, - { - "alias": "Memory Limits %", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #F", - "thresholds": [ - ], "type": "number", "unit": "percentunit" }, { - "alias": "Workload", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": true, - "linkTooltip": "Drill down", - "linkUrl": "/d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=$__cell&var-type=$__cell_2", - "pattern": "workload", - "thresholds": [ - - ], - "type": "number", - "unit": "short" - }, - { - "alias": "Workload Type", + "alias": "Container", "colorMode": null, "colors": [ @@ -6943,7 +6991,7 @@ items: "link": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "workload_type", + "pattern": "container", "thresholds": [ ], @@ -6968,7 +7016,7 @@ items: ], "targets": [ { - "expr": "count(mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}) by (workload, workload_type)", + "expr": "sum(namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\"}) by (container)", "format": "table", "instant": true, "intervalFactor": 2, @@ -6977,7 +7025,7 @@ items: "step": 10 }, { - "expr": "sum(\n label_replace(\n container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container_name!=\"\"},\n \"pod\", \"$1\", \"pod_name\", \"(.*)\"\n ) * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n ) by (workload, workload_type)\n", + "expr": "sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", "format": "table", "instant": true, "intervalFactor": 2, @@ -6986,7 +7034,7 @@ items: "step": 10 }, { - "expr": "sum(\n kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", + "expr": "sum(namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", "format": "table", "instant": true, "intervalFactor": 2, @@ -6995,7 +7043,7 @@ items: "step": 10 }, { - "expr": "sum(\n label_replace(\n container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container_name!=\"\"},\n \"pod\", \"$1\", \"pod_name\", \"(.*)\"\n ) * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n ) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", + "expr": "sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", "format": "table", "instant": true, "intervalFactor": 2, @@ -7004,22 +7052,13 @@ items: "step": 10 }, { - "expr": "sum(\n kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", + "expr": "sum(namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "E", "step": 10 - }, - { - "expr": "sum(\n label_replace(\n container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container_name!=\"\"},\n \"pod\", \"$1\", \"pod_name\", \"(.*)\"\n ) * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n ) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "F", - "step": 10 } ], "thresholds": [ @@ -7027,7 +7066,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Memory Quota", + "title": "CPU Quota", "tooltip": { "shared": false, "sort": 0, @@ -7068,155 +7107,12 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Memory Quota", + "title": "CPU Quota", "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "kubernetes-mixin" - ], - "templating": { - "list": [ - { - "current": { - "text": "Prometheus", - "value": "Prometheus" - }, - "hide": 0, - "label": null, - "name": "datasource", - "options": [ - - ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 2, - "includeAll": false, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ - - ], - "query": "label_values(kube_pod_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ - - ], - "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "", - "title": "Kubernetes / Compute Resources / Namespace (Workloads)", - "uid": "a87fb0d919ec0ea5f6543124e16c42a5", - "version": 0 - } - kind: ConfigMap - metadata: - name: grafana-dashboard-k8s-resources-workloads-namespace - namespace: monitoring -- apiVersion: v1 - data: - nodes.json: |- - { - "__inputs": [ - - ], - "__requires": [ - - ], - "annotations": { - "list": [ - - ] - }, - "editable": false, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "id": null, - "links": [ - - ], - "refresh": "", - "rows": [ + }, { "collapse": false, - "collapsed": false, + "height": "250px", "panels": [ { "aliasColors": { @@ -7226,68 +7122,58 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "gridPos": { - - }, - "id": 2, + "fill": 10, + "id": 3, "legend": { - "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, - "rightSide": false, "show": true, "total": false, "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], - "nullPointMode": "null", + "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 6, - "stack": false, + "span": 12, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "max(node_load1{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "load 1m", - "refId": "A" - }, - { - "expr": "max(node_load5{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"})", + "expr": "sum(container_memory_rss{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\", container!=\"\"}) by (container)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "load 5m", - "refId": "B" + "legendFormat": "{{container}} (RSS)", + "legendLink": null, + "step": 10 }, { - "expr": "max(node_load15{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"})", + "expr": "sum(container_memory_cache{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\", container!=\"\"}) by (container)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "load 15m", - "refId": "C" + "legendFormat": "{{container}} (Cache)", + "legendLink": null, + "step": 10 }, { - "expr": "count(node_cpu_seconds_total{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\", mode=\"user\"})", + "expr": "sum(container_memory_swap{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\", container!=\"\"}) by (container)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "logical cores", - "refId": "D" + "legendFormat": "{{container}} (Swap)", + "legendLink": null, + "step": 10 } ], "thresholds": [ @@ -7295,7 +7181,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "System load", + "title": "Memory Usage", "tooltip": { "shared": false, "sort": 0, @@ -7313,11 +7199,11 @@ items: }, "yaxes": [ { - "format": "short", + "format": "bytes", "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true }, { @@ -7326,10 +7212,22 @@ items: "logBase": 1, "max": null, "min": null, - "show": true + "show": false } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memory Usage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ { "aliasColors": { @@ -7339,17 +7237,12 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, - "gridPos": { - - }, - "id": 3, + "id": 4, "legend": { - "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, - "rightSide": false, "show": true, "total": false, "values": false @@ -7359,339 +7252,275 @@ items: "links": [ ], - "nullPointMode": "null", + "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 6, + "span": 12, "stack": false, "steppedLine": false, - "targets": [ + "styles": [ { - "expr": "sum by (cpu) (irate(node_cpu_seconds_total{cluster=\"$cluster\", job=\"node-exporter\", mode!=\"idle\", instance=\"$instance\"}[5m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cpu}}", - "refId": "A" - } - ], - "thresholds": [ + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "Memory Usage", + "colorMode": null, + "colors": [ - ], - "timeFrom": null, - "timeShift": null, - "title": "Usage Per Core", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ - ] - }, - "yaxes": [ - { - "format": "percentunit", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + ], + "type": "number", + "unit": "bytes" }, { - "format": "percentunit", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": false, - "collapsed": false, - "panels": [ - { - "aliasColors": { + "alias": "Memory Requests", + "colorMode": null, + "colors": [ - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ - }, - "id": 4, - "legend": { - "alignAsTable": "true", - "avg": "true", - "current": "true", - "max": "false", - "min": "false", - "rightSide": "true", - "show": "true", - "total": "false", - "values": "true" - }, - "lines": true, - "linewidth": 1, - "links": [ + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Requests %", + "colorMode": null, + "colors": [ - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ - ], - "spaceLength": 10, - "span": 9, - "stack": false, - "steppedLine": false, - "targets": [ + ], + "type": "number", + "unit": "percentunit" + }, { - "expr": "max (sum by (cpu) (irate(node_cpu_seconds_total{cluster=\"$cluster\", job=\"node-exporter\", mode!=\"idle\", instance=\"$instance\"}[2m])) ) * 100\n", - "format": "time_series", - "intervalFactor": 10, - "legendFormat": "{{ cpu }}", - "refId": "A" - } - ], - "thresholds": [ + "alias": "Memory Limits", + "colorMode": null, + "colors": [ - ], - "timeFrom": null, - "timeShift": null, - "title": "CPU Utilization", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ - ] - }, - "yaxes": [ + ], + "type": "number", + "unit": "bytes" + }, { - "format": "percent", - "label": null, - "logBase": 1, - "max": 100, - "min": 0, - "show": true + "alias": "Memory Limits %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" }, { - "format": "percent", - "label": null, - "logBase": 1, - "max": 100, - "min": 0, - "show": true - } - ] - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": "$datasource", - "format": "percent", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": true, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { + "alias": "Memory Usage (RSS)", + "colorMode": null, + "colors": [ - }, - "id": 5, - "interval": null, - "links": [ + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", + "thresholds": [ - ], - "mappingType": 1, - "mappingTypes": [ + ], + "type": "number", + "unit": "bytes" + }, { - "name": "value to text", - "value": 1 + "alias": "Memory Usage (Cache)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #G", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" }, { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ + "alias": "Memory Usage (Swap", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #H", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "avg(sum by (cpu) (irate(node_cpu_seconds_total{cluster=\"$cluster\", job=\"node-exporter\", mode!=\"idle\", instance=\"$instance\"}[2m]))) * 100\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": "80, 90", - "title": "CPU Usage", - "tooltip": { - "shared": false - }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": false, - "collapsed": false, - "panels": [ - { - "aliasColors": { + "alias": "Container", + "colorMode": null, + "colors": [ - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "container", + "thresholds": [ - }, - "id": 6, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + ], + "type": "string", + "unit": "short" + } ], - "spaceLength": 10, - "span": 9, - "stack": false, - "steppedLine": false, "targets": [ { - "expr": "max(\n node_memory_MemTotal_bytes{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}\n - node_memory_MemFree_bytes{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}\n - node_memory_Buffers_bytes{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}\n - node_memory_Cached_bytes{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}\n)\n", - "format": "time_series", + "expr": "sum(container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\", container!=\"\"}) by (container)", + "format": "table", + "instant": true, "intervalFactor": 2, - "legendFormat": "memory used", - "refId": "A" + "legendFormat": "", + "refId": "A", + "step": 10 }, { - "expr": "max(node_memory_Buffers_bytes{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"})", - "format": "time_series", + "expr": "sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "format": "table", + "instant": true, "intervalFactor": 2, - "legendFormat": "memory buffers", - "refId": "B" + "legendFormat": "", + "refId": "B", + "step": 10 }, { - "expr": "max(node_memory_Cached_bytes{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"})", - "format": "time_series", + "expr": "sum(container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_container_resource_requests_memory_bytes{namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "format": "table", + "instant": true, "intervalFactor": 2, - "legendFormat": "memory cached", - "refId": "C" + "legendFormat": "", + "refId": "C", + "step": 10 }, { - "expr": "max(node_memory_MemFree_bytes{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"})", - "format": "time_series", + "expr": "sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\"}) by (container)", + "format": "table", + "instant": true, "intervalFactor": 2, - "legendFormat": "memory free", - "refId": "D" + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum(container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\"}) by (container) / sum(kube_pod_container_resource_limits_memory_bytes{namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + }, + { + "expr": "sum(container_memory_rss{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 + }, + { + "expr": "sum(container_memory_cache{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "G", + "step": 10 + }, + { + "expr": "sum(container_memory_swap{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "H", + "step": 10 } ], "thresholds": [ @@ -7699,13 +7528,14 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Memory Usage", + "title": "Memory Quota", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "type": "graph", + "transform": "table", + "type": "table", "xaxis": { "buckets": null, "mode": "time", @@ -7717,119 +7547,197 @@ items: }, "yaxes": [ { - "format": "bytes", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true }, { - "format": "bytes", + "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": true + "show": false } ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memory Quota", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": "$datasource", - "format": "percent", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": true, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - - }, - "id": 7, - "interval": null, - "links": [ + "hide": 0, + "label": null, + "name": "datasource", + "options": [ - ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "max(\n (\n (\n node_memory_MemTotal_bytes{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}\n - node_memory_MemFree_bytes{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}\n - node_memory_Buffers_bytes{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}\n - node_memory_Cached_bytes{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}\n )\n / node_memory_MemTotal_bytes{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}\n ) * 100)\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": "80, 90", - "title": "Memory Usage", - "tooltip": { - "shared": false - }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" - }, + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(kube_pod_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [ + + ], + "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "pod", + "multi": false, + "name": "pod", + "options": [ + + ], + "query": "label_values(kube_pod_info{cluster=\"$cluster\", namespace=\"$namespace\"}, pod)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Kubernetes / Compute Resources / Pod", + "uid": "6581e46e4e5c7ba40a07646395ef7b23", + "version": 0 + } + kind: ConfigMap + metadata: + name: grafana-dashboard-k8s-resources-pod + namespace: monitoring +- apiVersion: v1 + data: + k8s-resources-workload.json: |- + { + "annotations": { + "list": [ + + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + + ], + "refresh": "10s", + "rows": [ { "collapse": false, - "collapsed": false, + "height": "250px", "panels": [ { "aliasColors": { @@ -7839,68 +7747,42 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "gridPos": { - - }, - "id": 8, + "fill": 10, + "id": 1, "legend": { - "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, - "rightSide": false, "show": true, "total": false, "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], - "nullPointMode": "null", + "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [ - { - "alias": "read", - "yaxis": 1 - }, - { - "alias": "io time", - "yaxis": 2 - } + ], "spaceLength": 10, - "span": 6, - "stack": false, + "span": 12, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "max(rate(node_disk_read_bytes_total{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}[2m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "read", - "refId": "A" - }, - { - "expr": "max(rate(node_disk_written_bytes_total{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}[2m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "written", - "refId": "B" - }, - { - "expr": "max(rate(node_disk_io_time_seconds_total{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}[2m]))", + "expr": "sum(\n namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", "format": "time_series", "intervalFactor": 2, - "legendFormat": "io time", - "refId": "C" + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 } ], "thresholds": [ @@ -7908,7 +7790,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Disk I/O", + "title": "CPU Usage", "tooltip": { "shared": false, "sort": 0, @@ -7926,23 +7808,35 @@ items: }, "yaxes": [ { - "format": "bytes", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true }, { - "format": "ms", + "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": true + "show": false } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU Usage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ { "aliasColors": { @@ -7952,17 +7846,12 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, - "gridPos": { - - }, - "id": 9, + "id": 2, "legend": { - "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, - "rightSide": false, "show": true, "total": false, "values": false @@ -7972,33 +7861,194 @@ items: "links": [ ], - "nullPointMode": "null", + "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 6, + "span": 12, "stack": false, "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "CPU Usage", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Requests", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Requests %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "CPU Limits", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Limits %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Pod", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTooltip": "Drill down", + "linkUrl": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell", + "pattern": "pod", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], "targets": [ { - "expr": "max by (namespace, pod, device) ((node_filesystem_size_bytes{cluster=\"$cluster\", fstype=~\"ext[234]|btrfs|xfs|zfs\", instance=\"$instance\", job=\"node-exporter\"} - node_filesystem_avail_bytes{cluster=\"$cluster\", fstype=~\"ext[234]|btrfs|xfs|zfs\", instance=\"$instance\", job=\"node-exporter\"}) / node_filesystem_size_bytes{cluster=\"$cluster\", fstype=~\"ext[234]|btrfs|xfs|zfs\", instance=\"$instance\", job=\"node-exporter\"})", - "format": "time_series", + "expr": "sum(\n namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "format": "table", + "instant": true, "intervalFactor": 2, - "legendFormat": "disk used", - "refId": "A" + "legendFormat": "", + "refId": "A", + "step": 10 }, { - "expr": "max by (namespace, pod, device) (node_filesystem_avail_bytes{cluster=\"$cluster\", fstype=~\"ext[234]|btrfs|xfs|zfs\", instance=\"$instance\", job=\"node-exporter\"} / node_filesystem_size_bytes{cluster=\"$cluster\", fstype=~\"ext[234]|btrfs|xfs|zfs\", instance=\"$instance\", job=\"node-exporter\"})", - "format": "time_series", + "expr": "sum(\n kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "format": "table", + "instant": true, "intervalFactor": 2, - "legendFormat": "disk free", - "refId": "B" + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(\n namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum(\n kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum(\n namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 } ], "thresholds": [ @@ -8006,13 +8056,14 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Disk Space Usage", + "title": "CPU Quota", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "type": "graph", + "transform": "table", + "type": "table", "xaxis": { "buckets": null, "mode": "time", @@ -8024,20 +8075,20 @@ items: }, "yaxes": [ { - "format": "percentunit", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true }, { - "format": "percentunit", + "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": true + "show": false } ] } @@ -8045,14 +8096,13 @@ items: "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" + "showTitle": true, + "title": "CPU Quota", + "titleSize": "h6" }, { "collapse": false, - "collapsed": false, + "height": "250px", "panels": [ { "aliasColors": { @@ -8062,47 +8112,42 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "gridPos": { - - }, - "id": 10, + "fill": 10, + "id": 3, "legend": { - "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, - "rightSide": false, "show": true, "total": false, "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], - "nullPointMode": "null", + "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 6, - "stack": false, + "span": 12, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "max(rate(node_network_receive_bytes_total{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\", device!~\"lo\"}[5m]))", + "expr": "sum(\n container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{device}}", - "refId": "A" + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 } ], "thresholds": [ @@ -8110,7 +8155,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Network Received", + "title": "Memory Usage", "tooltip": { "shared": false, "sort": 0, @@ -8132,39 +8177,46 @@ items: "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true }, { - "format": "bytes", + "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": true + "show": false } ] - }, - { - "aliasColors": { - - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memory Usage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, - "gridPos": { - - }, - "id": 11, + "id": 4, "legend": { - "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, - "rightSide": false, "show": true, "total": false, "values": false @@ -8174,26 +8226,194 @@ items: "links": [ ], - "nullPointMode": "null", + "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 6, + "span": 12, "stack": false, "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "Memory Usage", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Requests", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Requests %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Memory Limits", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Limits %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Pod", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTooltip": "Drill down", + "linkUrl": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell", + "pattern": "pod", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], "targets": [ { - "expr": "max(rate(node_network_transmit_bytes_total{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\", device!~\"lo\"}[5m]))", - "format": "time_series", + "expr": "sum(\n container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "format": "table", + "instant": true, "intervalFactor": 2, - "legendFormat": "{{device}}", - "refId": "A" + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sum(\n kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(\n container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum(\n kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum(\n container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 } ], "thresholds": [ @@ -8201,13 +8421,14 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Network Transmitted", + "title": "Memory Quota", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "type": "graph", + "transform": "table", + "type": "table", "xaxis": { "buckets": null, "mode": "time", @@ -8219,20 +8440,20 @@ items: }, "yaxes": [ { - "format": "bytes", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true }, { - "format": "bytes", + "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": true + "show": false } ] } @@ -8240,71 +8461,248 @@ items: "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": false, - "collapsed": false, - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { + "showTitle": true, + "title": "Memory Quota", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ - }, - "id": 12, - "legend": { - "alignAsTable": false, + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(kube_pod_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [ + + ], + "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "workload", + "multi": false, + "name": "workload", + "options": [ + + ], + "query": "label_values(mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}, workload)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "type", + "multi": false, + "name": "type", + "options": [ + + ], + "query": "label_values(mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\"}, workload_type)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Kubernetes / Compute Resources / Workload", + "uid": "a164a7f0339f99e89cea5cb47e9be617", + "version": 0 + } + kind: ConfigMap + metadata: + name: grafana-dashboard-k8s-resources-workload + namespace: monitoring +- apiVersion: v1 + data: + k8s-resources-workloads-namespace.json: |- + { + "annotations": { + "list": [ + + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 1, + "legend": { "avg": false, "current": false, "max": false, "min": false, - "rightSide": false, "show": true, "total": false, "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], - "nullPointMode": "null", + "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 9, - "stack": false, + "span": 12, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "max(\n node_filesystem_files{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}\n - node_filesystem_files_free{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}\n)\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "inodes used", - "refId": "A" - }, - { - "expr": "max(node_filesystem_files_free{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"})", + "expr": "sum(\n namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", "format": "time_series", "intervalFactor": 2, - "legendFormat": "inodes free", - "refId": "B" + "legendFormat": "{{workload}} - {{workload_type}}", + "legendLink": null, + "step": 10 } ], "thresholds": [ @@ -8312,7 +8710,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Inodes Usage", + "title": "CPU Usage", "tooltip": { "shared": false, "sort": 0, @@ -8334,7 +8732,7 @@ items: "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true }, { @@ -8343,250 +8741,24 @@ items: "logBase": 1, "max": null, "min": null, - "show": true + "show": false } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU Usage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": "$datasource", - "format": "percent", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": true, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - - }, - "id": 13, - "interval": null, - "links": [ - - ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "max(\n (\n (\n node_filesystem_files{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}\n - node_filesystem_files_free{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}\n )\n / node_filesystem_files{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}\n ) * 100)\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": "80, 90", - "title": "Inodes Usage", - "tooltip": { - "shared": false - }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "kubernetes-mixin" - ], - "templating": { - "list": [ - { - "current": { - "text": "Prometheus", - "value": "Prometheus" - }, - "hide": 0, - "label": null, - "name": "datasource", - "options": [ - - ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": null, - "current": { - - }, - "datasource": "$datasource", - "hide": 2, - "includeAll": false, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ - - ], - "query": "label_values(kube_pod_info, cluster)", - "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": null, - "multi": false, - "name": "instance", - "options": [ - - ], - "query": "label_values(node_boot_time_seconds{cluster=\"$cluster\", job=\"node-exporter\"}, instance)", - "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "", - "title": "Kubernetes / Nodes", - "uid": "fa49a4706d07a042595b664c87fb33ea", - "version": 0 - } - kind: ConfigMap - metadata: - name: grafana-dashboard-nodes - namespace: monitoring -- apiVersion: v1 - data: - persistentvolumesusage.json: |- - { - "__inputs": [ - - ], - "__requires": [ - - ], - "annotations": { - "list": [ - - ] - }, - "editable": false, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "id": null, - "links": [ - - ], - "refresh": "", - "rows": [ - { - "collapse": false, - "collapsed": false, - "panels": [ - { - "aliasColors": { + "aliasColors": { }, "bars": false, @@ -8594,67 +8766,269 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, - "gridPos": { - - }, "id": 2, "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": true, - "rightSide": false, + "avg": false, + "current": false, + "max": false, + "min": false, "show": true, "total": false, - "values": true + "values": false }, "lines": true, "linewidth": 1, "links": [ ], - "nullPointMode": "null", + "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 9, - "stack": true, + "span": 12, + "stack": false, "steppedLine": false, - "targets": [ + "styles": [ { - "expr": "(\n sum without(instance, node) (kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n -\n sum without(instance, node) (kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n)\n", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Used Space", - "refId": "A" + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" }, { - "expr": "sum without(instance, node) (kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Free Space", - "refId": "B" - } - ], - "thresholds": [ + "alias": "Running Pods", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Usage", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Requests", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Requests %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "CPU Limits", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Limits %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Workload", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTooltip": "Drill down", + "linkUrl": "/d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=$__cell&var-type=$__cell_2", + "pattern": "workload", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "Workload Type", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "workload_type", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "count(mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}) by (workload, workload_type)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sum(\n namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(\n kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum(\n namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum(\n kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + }, + { + "expr": "sum(\n namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 + } + ], + "thresholds": [ ], "timeFrom": null, "timeShift": null, - "title": "Volume Space Usage", + "title": "CPU Quota", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "type": "graph", + "transform": "table", + "type": "table", "xaxis": { "buckets": null, "mode": "time", @@ -8666,7 +9040,7 @@ items: }, "yaxes": [ { - "format": "bytes", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -8674,168 +9048,71 @@ items: "show": true }, { - "format": "bytes", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": 0, - "show": true + "min": null, + "show": false } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU Quota", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": "$datasource", - "format": "percent", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": true, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - - }, - "id": 3, - "interval": null, - "links": [ - - ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "(\n kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n -\n kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n)\n/\nkubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n* 100\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": "80, 90", - "title": "Volume Space Usage", - "tooltip": { - "shared": false - }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": false, - "collapsed": false, - "panels": [ - { - "aliasColors": { - + "aliasColors": { + }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "gridPos": { - - }, - "id": 4, + "fill": 10, + "id": 3, "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": true, - "rightSide": false, + "avg": false, + "current": false, + "max": false, + "min": false, "show": true, "total": false, - "values": true + "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], - "nullPointMode": "null", + "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 9, + "span": 12, "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum without(instance, node) (kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Used inodes", - "refId": "A" - }, - { - "expr": "(\n sum without(instance, node) (kubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n -\n sum without(instance, node) (kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n)\n", + "expr": "sum(\n container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", "format": "time_series", - "intervalFactor": 1, - "legendFormat": " Free inodes", - "refId": "B" + "intervalFactor": 2, + "legendFormat": "{{workload}} - {{workload_type}}", + "legendLink": null, + "step": 10 } ], "thresholds": [ @@ -8843,7 +9120,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Volume inodes Usage", + "title": "Memory Usage", "tooltip": { "shared": false, "sort": 0, @@ -8861,7 +9138,7 @@ items: }, "yaxes": [ { - "format": "none", + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -8869,107 +9146,334 @@ items: "show": true }, { - "format": "none", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": 0, - "show": true + "min": null, + "show": false } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memory Usage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "$datasource", - "format": "percent", - "gauge": { - "maxValue": 100, - "minValue": 0, + "fill": 1, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, "show": true, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - + "total": false, + "values": false }, - "id": 5, - "interval": null, + "lines": true, + "linewidth": 1, "links": [ ], - "mappingType": 1, - "mappingTypes": [ + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ { - "name": "value to text", - "value": 1 + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" }, { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ + "alias": "Running Pods", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, { - "expr": "kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n/\nkubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n* 100\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": "80, 90", - "title": "Volume inodes Usage", + "alias": "Memory Usage", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Requests", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Requests %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Memory Limits", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Limits %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Workload", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTooltip": "Drill down", + "linkUrl": "/d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=$__cell&var-type=$__cell_2", + "pattern": "workload", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "Workload Type", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "workload_type", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "count(mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}) by (workload, workload_type)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sum(\n container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(\n kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum(\n container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum(\n kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + }, + { + "expr": "sum(\n container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Quota", "tooltip": { - "shared": false + "shared": false, + "sort": 0, + "value_type": "individual" }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ { - "op": "=", - "text": "N/A", - "value": "null" + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false } - ], - "valueName": "current" + ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" + "showTitle": true, + "title": "Memory Quota", + "titleSize": "h6" } ], "schemaVersion": 14, @@ -8998,7 +9502,8 @@ items: { "allValue": null, "current": { - + "text": "prod", + "value": "prod" }, "datasource": "$datasource", "hide": 2, @@ -9009,10 +9514,10 @@ items: "options": [ ], - "query": "label_values(kubelet_volume_stats_capacity_bytes, cluster)", - "refresh": 2, + "query": "label_values(kube_pod_info, cluster)", + "refresh": 1, "regex": "", - "sort": 0, + "sort": 2, "tagValuesQuery": "", "tags": [ @@ -9024,47 +9529,22 @@ items: { "allValue": null, "current": { - + "text": "prod", + "value": "prod" }, "datasource": "$datasource", "hide": 0, "includeAll": false, - "label": "Namespace", + "label": "namespace", "multi": false, "name": "namespace", "options": [ ], - "query": "label_values(kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\"}, namespace)", - "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "PersistentVolumeClaim", - "multi": false, - "name": "volume", - "options": [ - - ], - "query": "label_values(kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\"}, persistentvolumeclaim)", - "refresh": 2, + "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)", + "refresh": 1, "regex": "", - "sort": 0, + "sort": 2, "tagValuesQuery": "", "tags": [ @@ -9076,7 +9556,7 @@ items: ] }, "time": { - "from": "now-7d", + "from": "now-1h", "to": "now" }, "timepicker": { @@ -9105,17 +9585,17 @@ items: ] }, "timezone": "", - "title": "Kubernetes / Persistent Volumes", - "uid": "919b92a8e8041bd567af9edab12c840c", + "title": "Kubernetes / Compute Resources / Namespace (Workloads)", + "uid": "a87fb0d919ec0ea5f6543124e16c42a5", "version": 0 } kind: ConfigMap metadata: - name: grafana-dashboard-persistentvolumesusage + name: grafana-dashboard-k8s-resources-workloads-namespace namespace: monitoring - apiVersion: v1 data: - pods.json: |- + kubelet.json: |- { "__inputs": [ @@ -9125,20 +9605,7 @@ items: ], "annotations": { "list": [ - { - "builtIn": 1, - "datasource": "$datasource", - "enable": true, - "expr": "time() == BOOL timestamp(rate(kube_pod_container_status_restarts_total{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[2m]) > 0)", - "hide": false, - "iconColor": "rgba(215, 44, 44, 1)", - "name": "Restarts", - "showIn": 0, - "tags": [ - "restart" - ], - "type": "rows" - } + ] }, "editable": false, @@ -9148,9 +9615,6784 @@ items: "id": null, "links": [ - ], - "refresh": "", - "rows": [ + ], + "refresh": "", + "rows": [ + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + + }, + "id": 2, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(up{job=\"kubelet\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Up", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "min" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + + }, + "id": 3, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(kubelet_running_pod_count{job=\"kubelet\", instance=~\"$instance\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": "", + "title": "Running Pods", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "min" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + + }, + "id": 4, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(kubelet_running_container_count{job=\"kubelet\", instance=~\"$instance\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": "", + "title": "Running Container", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "min" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + + }, + "id": 5, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(volume_manager_total_volumes{job=\"kubelet\", instance=~\"$instance\", state=\"actual_state_of_world\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": "", + "title": "Actual Volume Count", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "min" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + + }, + "id": 6, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(volume_manager_total_volumes{job=\"kubelet\", instance=~\"$instance\",state=\"desired_state_of_world\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": "", + "title": "Desired Volume Count", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "min" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + + }, + "id": 7, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(rate(kubelet_node_config_error{job=\"kubelet\", instance=~\"$instance\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": "", + "title": "Config Error Count", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "min" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 8, + "legend": { + "alignAsTable": "true", + "avg": false, + "current": "true", + "max": false, + "min": false, + "rightSide": "true", + "show": "true", + "total": false, + "values": "true" + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(kubelet_runtime_operations_total{job=\"kubelet\",instance=~\"$instance\"}[5m])) by (operation_type, instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} {{operation_type}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Operation Rate", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 9, + "legend": { + "alignAsTable": "true", + "avg": false, + "current": "true", + "max": false, + "min": false, + "rightSide": "true", + "show": "true", + "total": false, + "values": "true" + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(kubelet_runtime_operations_errors_total{job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, operation_type)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} {{operation_type}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Operation Error Rate", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 10, + "legend": { + "alignAsTable": "true", + "avg": false, + "current": "true", + "max": false, + "min": false, + "rightSide": "true", + "show": "true", + "total": false, + "values": "true" + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(kubelet_runtime_operations_duration_seconds_bucket{job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, operation_type, le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} {{operation_type}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Operation duration 99th quantile", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 11, + "legend": { + "alignAsTable": "true", + "avg": false, + "current": "true", + "max": false, + "min": false, + "rightSide": "true", + "show": "true", + "total": false, + "values": "true" + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(kubelet_pod_start_duration_seconds_count{job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} pod", + "refId": "A" + }, + { + "expr": "sum(rate(kubelet_pod_worker_duration_seconds_count{job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} worker", + "refId": "B" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Pod Start Rate", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 12, + "legend": { + "alignAsTable": "true", + "avg": false, + "current": "true", + "max": false, + "min": false, + "rightSide": "true", + "show": "true", + "total": false, + "values": "true" + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_start_duration_seconds_count{job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} pod", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} worker", + "refId": "B" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Pod Start Duration", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 13, + "legend": { + "alignAsTable": "true", + "avg": false, + "current": "true", + "hideEmpty": "true", + "hideZero": "true", + "max": false, + "min": false, + "rightSide": "true", + "show": "true", + "total": false, + "values": "true" + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(storage_operation_duration_seconds_count{job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} {{operation_name}} {{volume_plugin}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Storage Operation Rate", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 14, + "legend": { + "alignAsTable": "true", + "avg": false, + "current": "true", + "hideEmpty": "true", + "hideZero": "true", + "max": false, + "min": false, + "rightSide": "true", + "show": "true", + "total": false, + "values": "true" + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(storage_operation_errors_total{job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} {{operation_name}} {{volume_plugin}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Storage Operation Error Rate", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 15, + "legend": { + "alignAsTable": "true", + "avg": false, + "current": "true", + "hideEmpty": "true", + "hideZero": "true", + "max": false, + "min": false, + "rightSide": "true", + "show": true, + "total": false, + "values": "true" + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(storage_operation_duration_seconds_bucket{job=\"kubelet\", instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin, le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} {{operation_name}} {{volume_plugin}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Storage Operation Duration 99th quantile", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 16, + "legend": { + "alignAsTable": "true", + "avg": false, + "current": "true", + "max": false, + "min": false, + "rightSide": "true", + "show": "true", + "total": false, + "values": "true" + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(kubelet_cgroup_manager_duration_seconds_count{job=\"kubelet\", instance=~\"$instance\"}[5m])) by (instance, operation_type)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{operation_type}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Cgroup manager operation rate", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 17, + "legend": { + "alignAsTable": "true", + "avg": false, + "current": "true", + "max": false, + "min": false, + "rightSide": "true", + "show": "true", + "total": false, + "values": "true" + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(kubelet_cgroup_manager_duration_seconds_bucket{job=\"kubelet\", instance=~\"$instance\"}[5m])) by (instance, operation_type, le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} {{operation_type}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Cgroup manager 99th quantile", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Pod lifecycle event generator", + "fill": 1, + "gridPos": { + + }, + "id": 18, + "legend": { + "alignAsTable": "true", + "avg": false, + "current": "true", + "max": false, + "min": false, + "rightSide": "true", + "show": "true", + "total": false, + "values": "true" + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(kubelet_pleg_relist_duration_seconds_count{job=\"kubelet\", instance=~\"$instance\"}[5m])) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "PLEG relist rate", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 19, + "legend": { + "alignAsTable": "true", + "avg": false, + "current": "true", + "max": false, + "min": false, + "rightSide": "true", + "show": "true", + "total": false, + "values": "true" + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_interval_seconds_bucket{job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "PLEG relist interval", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 20, + "legend": { + "alignAsTable": "true", + "avg": false, + "current": "true", + "max": false, + "min": false, + "rightSide": "true", + "show": "true", + "total": false, + "values": "true" + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "PLEG relist duration", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 21, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(rest_client_requests_total{job=\"kubelet\", instance=~\"$instance\",code=~\"2..\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "2xx", + "refId": "A" + }, + { + "expr": "sum(rate(rest_client_requests_total{job=\"kubelet\", instance=~\"$instance\",code=~\"3..\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "3xx", + "refId": "B" + }, + { + "expr": "sum(rate(rest_client_requests_total{job=\"kubelet\", instance=~\"$instance\",code=~\"4..\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "4xx", + "refId": "C" + }, + { + "expr": "sum(rate(rest_client_requests_total{job=\"kubelet\", instance=~\"$instance\",code=~\"5..\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "5xx", + "refId": "D" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "RPC Rate", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 22, + "legend": { + "alignAsTable": "true", + "avg": false, + "current": "true", + "max": false, + "min": false, + "rightSide": "true", + "show": "true", + "total": false, + "values": "true" + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_latency_seconds_bucket{job=\"kubelet\", instance=~\"$instance\"}[5m])) by (instance, verb, url, le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} {{verb}} {{url}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Request duration 99th quantile", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 23, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "process_resident_memory_bytes{job=\"kubelet\",instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 24, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(process_cpu_seconds_total{job=\"kubelet\",instance=~\"$instance\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU usage", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 25, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "go_goroutines{job=\"kubelet\",instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Goroutines", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": null, + "multi": false, + "name": "instance", + "options": [ + + ], + "query": "label_values(kubelet_runtime_operations{job=\"kubelet\"}, instance)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Kubernetes / Kubelet", + "uid": "3138fa155d5915769fbded898ac09fd9", + "version": 0 + } + kind: ConfigMap + metadata: + name: grafana-dashboard-kubelet + namespace: monitoring +- apiVersion: v1 + data: + nodes.json: |- + { + "__inputs": [ + + ], + "__requires": [ + + ], + "annotations": { + "list": [ + + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ + + ], + "refresh": "", + "rows": [ + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 2, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max(node_load1{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "load 1m", + "refId": "A" + }, + { + "expr": "max(node_load5{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "load 5m", + "refId": "B" + }, + { + "expr": "max(node_load15{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "load 15m", + "refId": "C" + }, + { + "expr": "count(node_cpu_seconds_total{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\", mode=\"user\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "logical cores", + "refId": "D" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "System load", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 3, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (cpu) (irate(node_cpu_seconds_total{cluster=\"$cluster\", job=\"node-exporter\", mode!=\"idle\", instance=\"$instance\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{cpu}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Usage Per Core", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 4, + "legend": { + "alignAsTable": "true", + "avg": "true", + "current": "true", + "max": "false", + "min": "false", + "rightSide": "true", + "show": "true", + "total": "false", + "values": "true" + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 9, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max (sum by (cpu) (irate(node_cpu_seconds_total{cluster=\"$cluster\", job=\"node-exporter\", mode!=\"idle\", instance=\"$instance\"}[2m])) ) * 100\n", + "format": "time_series", + "intervalFactor": 10, + "legendFormat": "{{ cpu }}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Utilization", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "percent", + "label": null, + "logBase": 1, + "max": 100, + "min": 0, + "show": true + }, + { + "format": "percent", + "label": null, + "logBase": 1, + "max": 100, + "min": 0, + "show": true + } + ] + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "$datasource", + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + + }, + "id": 5, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "avg(sum by (cpu) (irate(node_cpu_seconds_total{cluster=\"$cluster\", job=\"node-exporter\", mode!=\"idle\", instance=\"$instance\"}[2m]))) * 100\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "80, 90", + "title": "CPU Usage", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 6, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 9, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max(\n node_memory_MemTotal_bytes{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}\n - node_memory_MemFree_bytes{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}\n - node_memory_Buffers_bytes{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}\n - node_memory_Cached_bytes{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}\n)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "memory used", + "refId": "A" + }, + { + "expr": "max(node_memory_Buffers_bytes{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "memory buffers", + "refId": "B" + }, + { + "expr": "max(node_memory_Cached_bytes{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "memory cached", + "refId": "C" + }, + { + "expr": "max(node_memory_MemFree_bytes{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "memory free", + "refId": "D" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Usage", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "$datasource", + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + + }, + "id": 7, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "max(\n (\n (\n node_memory_MemTotal_bytes{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}\n - node_memory_MemFree_bytes{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}\n - node_memory_Buffers_bytes{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}\n - node_memory_Cached_bytes{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}\n )\n / node_memory_MemTotal_bytes{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}\n ) * 100)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "80, 90", + "title": "Memory Usage", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 8, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + { + "alias": "read", + "yaxis": 1 + }, + { + "alias": "io time", + "yaxis": 2 + } + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max(rate(node_disk_read_bytes_total{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}[2m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "read", + "refId": "A" + }, + { + "expr": "max(rate(node_disk_written_bytes_total{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}[2m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "written", + "refId": "B" + }, + { + "expr": "max(rate(node_disk_io_time_seconds_total{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}[2m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "io time", + "refId": "C" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Disk I/O", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 9, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node:node_filesystem_usage:{cluster=\"$cluster\", instance=\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{device}} disk used", + "refId": "A" + }, + { + "expr": "node:node_filesystem_usage:{cluster=\"$cluster\", instance=\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{device}} disk free", + "refId": "B" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Disk Space Usage", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 10, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "rate(node_network_receive_bytes_total{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\", device!~\"lo\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{device}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Network Received", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 11, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "rate(node_network_transmit_bytes_total{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\", device!~\"lo\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{device}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Network Transmitted", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 12, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 9, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max(\n node_filesystem_files{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}\n - node_filesystem_files_free{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}\n)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "inodes used", + "refId": "A" + }, + { + "expr": "max(node_filesystem_files_free{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "inodes free", + "refId": "B" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Inodes Usage", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "$datasource", + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + + }, + "id": 13, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "max(\n (\n (\n node_filesystem_files{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}\n - node_filesystem_files_free{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}\n )\n / node_filesystem_files{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}\n ) * 100)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "80, 90", + "title": "Inodes Usage", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(kube_pod_info, cluster)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "instance", + "options": [ + + ], + "query": "label_values(node_boot_time_seconds{cluster=\"$cluster\", job=\"node-exporter\"}, instance)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Kubernetes / Nodes", + "uid": "fa49a4706d07a042595b664c87fb33ea", + "version": 0 + } + kind: ConfigMap + metadata: + name: grafana-dashboard-nodes + namespace: monitoring +- apiVersion: v1 + data: + persistentvolumesusage.json: |- + { + "__inputs": [ + + ], + "__requires": [ + + ], + "annotations": { + "list": [ + + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ + + ], + "refresh": "", + "rows": [ + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 2, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 9, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(\n sum without(instance, node) (kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n -\n sum without(instance, node) (kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Used Space", + "refId": "A" + }, + { + "expr": "sum without(instance, node) (kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Free Space", + "refId": "B" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Volume Space Usage", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "$datasource", + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + + }, + "id": 3, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "(\n kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n -\n kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n)\n/\nkubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n* 100\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "80, 90", + "title": "Volume Space Usage", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 4, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 9, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum without(instance, node) (kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Used inodes", + "refId": "A" + }, + { + "expr": "(\n sum without(instance, node) (kubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n -\n sum without(instance, node) (kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": " Free inodes", + "refId": "B" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Volume inodes Usage", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "$datasource", + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + + }, + "id": 5, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n/\nkubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n* 100\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "80, 90", + "title": "Volume inodes Usage", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(kubelet_volume_stats_capacity_bytes, cluster)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "Namespace", + "multi": false, + "name": "namespace", + "options": [ + + ], + "query": "label_values(kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\"}, namespace)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "PersistentVolumeClaim", + "multi": false, + "name": "volume", + "options": [ + + ], + "query": "label_values(kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\"}, persistentvolumeclaim)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-7d", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Kubernetes / Persistent Volumes", + "uid": "919b92a8e8041bd567af9edab12c840c", + "version": 0 + } + kind: ConfigMap + metadata: + name: grafana-dashboard-persistentvolumesusage + namespace: monitoring +- apiVersion: v1 + data: + pods.json: |- + { + "__inputs": [ + + ], + "__requires": [ + + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "$datasource", + "enable": true, + "expr": "time() == BOOL timestamp(rate(kube_pod_container_status_restarts_total{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[2m]) > 0)", + "hide": false, + "iconColor": "rgba(215, 44, 44, 1)", + "name": "Restarts", + "showIn": 0, + "tags": [ + "restart" + ], + "type": "rows" + } + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ + + ], + "refresh": "", + "rows": [ + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 2, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(container) (container_memory_usage_bytes{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\", container!=\"POD\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Current: {{ container }}", + "refId": "A" + }, + { + "expr": "sum by(container) (kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\", pod=\"$pod\", container=~\"$container\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Requested: {{ container }}", + "refId": "B" + }, + { + "expr": "sum by(container) (kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\", pod=\"$pod\", container=~\"$container\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Limit: {{ container }}", + "refId": "C" + }, + { + "expr": "sum by(container) (container_memory_cache{job=\"kubelet\", namespace=\"$namespace\", pod=~\"$pod\", container=~\"$container\", container!=\"POD\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Cache: {{ container }}", + "refId": "D" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Usage", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 3, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (container) (irate(container_cpu_usage_seconds_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", image!=\"\", pod=\"$pod\", container=~\"$container\", container!=\"POD\"}[4m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Current: {{ container }}", + "refId": "A" + }, + { + "expr": "sum by(container) (kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\", pod=\"$pod\", container=~\"$container\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Requested: {{ container }}", + "refId": "B" + }, + { + "expr": "sum by(container) (kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\", pod=\"$pod\", container=~\"$container\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Limit: {{ container }}", + "refId": "C" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Usage", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 4, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum by (pod) (irate(container_network_receive_bytes_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[4m])))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "RX: {{ pod }}", + "refId": "A" + }, + { + "expr": "sort_desc(sum by (pod) (irate(container_network_transmit_bytes_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[4m])))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "TX: {{ pod }}", + "refId": "B" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Network I/O", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 5, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max by (container) (kube_pod_container_status_restarts_total{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Restarts: {{ container }}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Total Restarts Per Container", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(kube_pod_info, cluster)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "Namespace", + "multi": false, + "name": "namespace", + "options": [ + + ], + "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "Pod", + "multi": false, + "name": "pod", + "options": [ + + ], + "query": "label_values(kube_pod_info{cluster=\"$cluster\", namespace=~\"$namespace\"}, pod)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "Container", + "multi": false, + "name": "container", + "options": [ + + ], + "query": "label_values(kube_pod_container_info{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}, container)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Kubernetes / Pods", + "uid": "ab4f13a9892a76a4d21ce8c2445bf4ea", + "version": 0 + } + kind: ConfigMap + metadata: + name: grafana-dashboard-pods + namespace: monitoring +- apiVersion: v1 + data: + proxy.json: |- + { + "__inputs": [ + + ], + "__requires": [ + + ], + "annotations": { + "list": [ + + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ + + ], + "refresh": "", + "rows": [ + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + + }, + "id": 2, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(up{job=\"kube-proxy\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Up", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "min" + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 3, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 5, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(kubeproxy_sync_proxy_rules_duration_seconds_count{job=\"kube-proxy\", instance=~\"$instance\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "rate", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rules Sync Rate", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 4, + "legend": { + "alignAsTable": "true", + "avg": false, + "current": "true", + "max": false, + "min": false, + "rightSide": "true", + "show": "true", + "total": false, + "values": "true" + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 5, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99,rate(kubeproxy_sync_proxy_rules_duration_seconds_bucket{job=\"kube-proxy\", instance=~\"$instance\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rule Sync Latency 99th Quantile", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 5, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(kubeproxy_network_programming_duration_seconds_count{job=\"kube-proxy\", instance=~\"$instance\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "rate", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Network Programming Rate", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 6, + "legend": { + "alignAsTable": "true", + "avg": false, + "current": "true", + "max": false, + "min": false, + "rightSide": "true", + "show": "true", + "total": false, + "values": "true" + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(kubeproxy_network_programming_duration_seconds_bucket{job=\"kube-proxy\", instance=~\"$instance\"}[5m])) by (instance, le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Network Programming Latency 99th Quantile", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 7, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(rest_client_requests_total{job=\"kube-proxy\", instance=~\"$instance\",code=~\"2..\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "2xx", + "refId": "A" + }, + { + "expr": "sum(rate(rest_client_requests_total{job=\"kube-proxy\", instance=~\"$instance\",code=~\"3..\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "3xx", + "refId": "B" + }, + { + "expr": "sum(rate(rest_client_requests_total{job=\"kube-proxy\", instance=~\"$instance\",code=~\"4..\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "4xx", + "refId": "C" + }, + { + "expr": "sum(rate(rest_client_requests_total{job=\"kube-proxy\", instance=~\"$instance\",code=~\"5..\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "5xx", + "refId": "D" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Kube API Request Rate", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 8, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 8, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_latency_seconds_bucket{job=\"kube-proxy\",instance=~\"$instance\",verb=\"POST\"}[5m])) by (verb, url, le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{verb}} {{url}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Post Request Latency 99th Quantile", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 9, + "legend": { + "alignAsTable": "true", + "avg": false, + "current": "true", + "max": false, + "min": false, + "rightSide": "true", + "show": "true", + "total": false, + "values": "true" + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_latency_seconds_bucket{job=\"kube-proxy\", instance=~\"$instance\", verb=\"GET\"}[5m])) by (verb, url, le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{verb}} {{url}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Get Request Latency 99th Quantile", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 10, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "process_resident_memory_bytes{job=\"kube-proxy\",instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 11, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(process_cpu_seconds_total{job=\"kube-proxy\",instance=~\"$instance\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU usage", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 12, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "go_goroutines{job=\"kube-proxy\",instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Goroutines", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": null, + "multi": false, + "name": "instance", + "options": [ + + ], + "query": "label_values(kubeproxy_network_programming_duration_seconds_bucket{job=\"kube-proxy\"}, instance)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Kubernetes / Proxy", + "uid": "632e265de029684c40b21cb76bca4f94", + "version": 0 + } + kind: ConfigMap + metadata: + name: grafana-dashboard-proxy + namespace: monitoring +- apiVersion: v1 + data: + scheduler.json: |- + { + "__inputs": [ + + ], + "__requires": [ + + ], + "annotations": { + "list": [ + + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ + + ], + "refresh": "", + "rows": [ + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + + }, + "id": 2, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(up{job=\"kube-scheduler\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Up", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "min" + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 3, + "legend": { + "alignAsTable": "true", + "avg": false, + "current": "true", + "max": false, + "min": false, + "rightSide": "true", + "show": "true", + "total": false, + "values": "true" + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 5, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(scheduler_e2e_scheduling_duration_seconds_count{job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} e2e", + "refId": "A" + }, + { + "expr": "sum(rate(scheduler_binding_duration_seconds_count{job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} binding", + "refId": "B" + }, + { + "expr": "sum(rate(scheduler_scheduling_algorithm_duration_seconds_count{job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} scheduling algorithm", + "refId": "C" + }, + { + "expr": "sum(rate(scheduler_volume_scheduling_duration_seconds_count{job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} volume", + "refId": "D" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Scheduling Rate", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 4, + "legend": { + "alignAsTable": "true", + "avg": false, + "current": "true", + "max": false, + "min": false, + "rightSide": "true", + "show": "true", + "total": false, + "values": "true" + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 5, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (instance, le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} e2e", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.99, sum(rate(scheduler_binding_duration_seconds_bucket{job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (instance, le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} binding", + "refId": "B" + }, + { + "expr": "histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (instance, le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} scheduling algorithm", + "refId": "C" + }, + { + "expr": "histogram_quantile(0.99, sum(rate(scheduler_volume_scheduling_duration_seconds_bucket{job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (instance, le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} volume", + "refId": "D" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Scheduling latency 99th Quantile", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 5, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(rest_client_requests_total{job=\"kube-scheduler\", instance=~\"$instance\",code=~\"2..\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "2xx", + "refId": "A" + }, + { + "expr": "sum(rate(rest_client_requests_total{job=\"kube-scheduler\", instance=~\"$instance\",code=~\"3..\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "3xx", + "refId": "B" + }, + { + "expr": "sum(rate(rest_client_requests_total{job=\"kube-scheduler\", instance=~\"$instance\",code=~\"4..\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "4xx", + "refId": "C" + }, + { + "expr": "sum(rate(rest_client_requests_total{job=\"kube-scheduler\", instance=~\"$instance\",code=~\"5..\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "5xx", + "refId": "D" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Kube API Request Rate", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 6, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 8, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_latency_seconds_bucket{job=\"kube-scheduler\", instance=~\"$instance\", verb=\"POST\"}[5m])) by (verb, url, le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{verb}} {{url}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Post Request Latency 99th Quantile", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, { "collapse": false, "collapsed": false, @@ -9167,17 +16409,17 @@ items: "gridPos": { }, - "id": 2, + "id": 7, "legend": { - "alignAsTable": true, - "avg": true, - "current": true, + "alignAsTable": "true", + "avg": false, + "current": "true", "max": false, "min": false, - "rightSide": true, - "show": true, + "rightSide": "true", + "show": "true", "total": false, - "values": false + "values": "true" }, "lines": true, "linewidth": 1, @@ -9199,32 +16441,11 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum by(container_name) (container_memory_usage_bytes{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", pod_name=\"$pod\", container_name=~\"$container\", container_name!=\"POD\"})", + "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_latency_seconds_bucket{job=\"kube-scheduler\", instance=~\"$instance\", verb=\"GET\"}[5m])) by (verb, url, le))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "Current: {{ container_name }}", + "legendFormat": "{{verb}} {{url}}", "refId": "A" - }, - { - "expr": "sum by(container) (kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\", pod=\"$pod\", container=~\"$container\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Requested: {{ container }}", - "refId": "B" - }, - { - "expr": "sum by(container) (kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\", pod=\"$pod\", container=~\"$container\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Limit: {{ container }}", - "refId": "C" - }, - { - "expr": "sum by(container_name) (container_memory_cache{job=\"kubelet\", namespace=\"$namespace\", pod_name=~\"$pod\", container_name=~\"$container\", container_name!=\"POD\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Cache: {{ container_name }}", - "refId": "D" } ], "thresholds": [ @@ -9232,7 +16453,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Memory Usage", + "title": "Get Request Latency 99th Quantile", "tooltip": { "shared": false, "sort": 0, @@ -9250,7 +16471,7 @@ items: }, "yaxes": [ { - "format": "bytes", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -9258,7 +16479,7 @@ items: "show": true }, { - "format": "bytes", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -9292,14 +16513,14 @@ items: "gridPos": { }, - "id": 3, + "id": 8, "legend": { - "alignAsTable": true, - "avg": true, - "current": true, + "alignAsTable": false, + "avg": false, + "current": false, "max": false, "min": false, - "rightSide": true, + "rightSide": false, "show": true, "total": false, "values": false @@ -9319,30 +16540,16 @@ items: ], "spaceLength": 10, - "span": 12, + "span": 4, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum by (container_name) (rate(container_cpu_usage_seconds_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", image!=\"\", pod_name=\"$pod\", container_name=~\"$container\", container_name!=\"POD\"}[1m]))", + "expr": "process_resident_memory_bytes{job=\"kube-scheduler\", instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, - "legendFormat": "Current: {{ container_name }}", + "legendFormat": "{{instance}}", "refId": "A" - }, - { - "expr": "sum by(container) (kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\", pod=\"$pod\", container=~\"$container\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Requested: {{ container }}", - "refId": "B" - }, - { - "expr": "sum by(container) (kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\", pod=\"$pod\", container=~\"$container\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Limit: {{ container }}", - "refId": "C" } ], "thresholds": [ @@ -9350,7 +16557,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "CPU Usage", + "title": "Memory", "tooltip": { "shared": false, "sort": 0, @@ -9368,36 +16575,23 @@ items: }, "yaxes": [ { - "format": "short", + "format": "bytes", "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true }, { - "format": "short", + "format": "bytes", "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": false, - "collapsed": false, - "panels": [ + }, { "aliasColors": { @@ -9410,14 +16604,14 @@ items: "gridPos": { }, - "id": 4, + "id": 9, "legend": { - "alignAsTable": true, - "avg": true, - "current": true, + "alignAsTable": false, + "avg": false, + "current": false, "max": false, "min": false, - "rightSide": true, + "rightSide": false, "show": true, "total": false, "values": false @@ -9437,23 +16631,16 @@ items: ], "spaceLength": 10, - "span": 12, + "span": 4, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sort_desc(sum by (pod_name) (rate(container_network_receive_bytes_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", pod_name=\"$pod\"}[1m])))", + "expr": "rate(process_cpu_seconds_total{job=\"kube-scheduler\", instance=~\"$instance\"}[5m])", "format": "time_series", "intervalFactor": 2, - "legendFormat": "RX: {{ pod_name }}", + "legendFormat": "{{instance}}", "refId": "A" - }, - { - "expr": "sort_desc(sum by (pod_name) (rate(container_network_transmit_bytes_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", pod_name=\"$pod\"}[1m])))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "TX: {{ pod_name }}", - "refId": "B" } ], "thresholds": [ @@ -9461,7 +16648,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Network I/O", + "title": "CPU usage", "tooltip": { "shared": false, "sort": 0, @@ -9495,20 +16682,7 @@ items: "show": true } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": false, - "collapsed": false, - "panels": [ + }, { "aliasColors": { @@ -9521,14 +16695,14 @@ items: "gridPos": { }, - "id": 5, + "id": 10, "legend": { - "alignAsTable": true, - "avg": true, - "current": true, + "alignAsTable": false, + "avg": false, + "current": false, "max": false, "min": false, - "rightSide": true, + "rightSide": false, "show": true, "total": false, "values": false @@ -9548,15 +16722,15 @@ items: ], "spaceLength": 10, - "span": 12, + "span": 4, "stack": false, "steppedLine": false, "targets": [ { - "expr": "max by (container) (kube_pod_container_status_restarts_total{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\"})", + "expr": "go_goroutines{job=\"kube-scheduler\",instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, - "legendFormat": "Restarts: {{ container }}", + "legendFormat": "{{instance}}", "refId": "A" } ], @@ -9565,7 +16739,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Total Restarts Per Container", + "title": "Goroutines", "tooltip": { "shared": false, "sort": 0, @@ -9587,7 +16761,7 @@ items: "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true }, { @@ -9595,7 +16769,7 @@ items: "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true } ] @@ -9637,95 +16811,17 @@ items: "allValue": null, "current": { - }, - "datasource": "$datasource", - "hide": 2, - "includeAll": false, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ - - ], - "query": "label_values(kube_pod_info, cluster)", - "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "Namespace", - "multi": false, - "name": "namespace", - "options": [ - - ], - "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)", - "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "Pod", - "multi": false, - "name": "pod", - "options": [ - - ], - "query": "label_values(kube_pod_info{cluster=\"$cluster\", namespace=~\"$namespace\"}, pod)", - "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - }, "datasource": "$datasource", "hide": 0, "includeAll": true, - "label": "Container", + "label": null, "multi": false, - "name": "container", + "name": "instance", "options": [ ], - "query": "label_values(kube_pod_container_info{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}, container)", + "query": "label_values(process_cpu_seconds_total{job=\"kube-scheduler\"}, instance)", "refresh": 2, "regex": "", "sort": 0, @@ -9769,13 +16865,13 @@ items: ] }, "timezone": "", - "title": "Kubernetes / Pods", - "uid": "ab4f13a9892a76a4d21ce8c2445bf4ea", + "title": "Kubernetes / Scheduler", + "uid": "2e6b6a3b4bddf1427b3a55aa1311c656", "version": 0 } kind: ConfigMap metadata: - name: grafana-dashboard-pods + name: grafana-dashboard-scheduler namespace: monitoring - apiVersion: v1 data: @@ -9866,7 +16962,7 @@ items: "tableColumn": "", "targets": [ { - "expr": "sum(rate(container_cpu_usage_seconds_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", pod_name=~\"$statefulset.*\"}[3m]))", + "expr": "sum(rate(container_cpu_usage_seconds_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$statefulset.*\"}[3m]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "", @@ -9949,7 +17045,7 @@ items: "tableColumn": "", "targets": [ { - "expr": "sum(container_memory_usage_bytes{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", pod_name=~\"$statefulset.*\"}) / 1024^3", + "expr": "sum(container_memory_usage_bytes{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$statefulset.*\"}) / 1024^3", "format": "time_series", "intervalFactor": 2, "legendFormat": "", @@ -10032,7 +17128,7 @@ items: "tableColumn": "", "targets": [ { - "expr": "sum(rate(container_network_transmit_bytes_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", pod_name=~\"$statefulset.*\"}[3m])) + sum(rate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\",pod_name=~\"$statefulset.*\"}[3m]))", + "expr": "sum(rate(container_network_transmit_bytes_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$statefulset.*\"}[3m])) + sum(rate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\",pod=~\"$statefulset.*\"}[3m]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "", diff --git a/manifests/grafana-deployment.yaml b/manifests/grafana-deployment.yaml index c977e450991afc4f4848a9f0ccd743d2ef7338ba..98719fbca5193a74fb3e9b13133bd8e43844efea 100644 --- a/manifests/grafana-deployment.yaml +++ b/manifests/grafana-deployment.yaml @@ -42,6 +42,12 @@ spec: - mountPath: /etc/grafana/provisioning/dashboards name: grafana-dashboards readOnly: false + - mountPath: /grafana-dashboard-definitions/0/apiserver + name: grafana-dashboard-apiserver + readOnly: false + - mountPath: /grafana-dashboard-definitions/0/controller-manager + name: grafana-dashboard-controller-manager + readOnly: false - mountPath: /grafana-dashboard-definitions/0/k8s-cluster-rsrc-use name: grafana-dashboard-k8s-cluster-rsrc-use readOnly: false @@ -63,6 +69,9 @@ spec: - mountPath: /grafana-dashboard-definitions/0/k8s-resources-workloads-namespace name: grafana-dashboard-k8s-resources-workloads-namespace readOnly: false + - mountPath: /grafana-dashboard-definitions/0/kubelet + name: grafana-dashboard-kubelet + readOnly: false - mountPath: /grafana-dashboard-definitions/0/nodes name: grafana-dashboard-nodes readOnly: false @@ -72,6 +81,12 @@ spec: - mountPath: /grafana-dashboard-definitions/0/pods name: grafana-dashboard-pods readOnly: false + - mountPath: /grafana-dashboard-definitions/0/proxy + name: grafana-dashboard-proxy + readOnly: false + - mountPath: /grafana-dashboard-definitions/0/scheduler + name: grafana-dashboard-scheduler + readOnly: false - mountPath: /grafana-dashboard-definitions/0/statefulset name: grafana-dashboard-statefulset readOnly: false @@ -90,6 +105,12 @@ spec: - configMap: name: grafana-dashboards name: grafana-dashboards + - configMap: + name: grafana-dashboard-apiserver + name: grafana-dashboard-apiserver + - configMap: + name: grafana-dashboard-controller-manager + name: grafana-dashboard-controller-manager - configMap: name: grafana-dashboard-k8s-cluster-rsrc-use name: grafana-dashboard-k8s-cluster-rsrc-use @@ -111,6 +132,9 @@ spec: - configMap: name: grafana-dashboard-k8s-resources-workloads-namespace name: grafana-dashboard-k8s-resources-workloads-namespace + - configMap: + name: grafana-dashboard-kubelet + name: grafana-dashboard-kubelet - configMap: name: grafana-dashboard-nodes name: grafana-dashboard-nodes @@ -120,6 +144,12 @@ spec: - configMap: name: grafana-dashboard-pods name: grafana-dashboard-pods + - configMap: + name: grafana-dashboard-proxy + name: grafana-dashboard-proxy + - configMap: + name: grafana-dashboard-scheduler + name: grafana-dashboard-scheduler - configMap: name: grafana-dashboard-statefulset name: grafana-dashboard-statefulset diff --git a/manifests/prometheus-rules.yaml b/manifests/prometheus-rules.yaml index 92b131c9286e291dc8484d6b2f654fbe79f8ba43..1cb939cd71e8aee36d68d3cbc7bb756ac00c49f6 100644 --- a/manifests/prometheus-rules.yaml +++ b/manifests/prometheus-rules.yaml @@ -11,30 +11,44 @@ spec: - name: k8s.rules rules: - expr: | - sum(rate(container_cpu_usage_seconds_total{job="kubelet", image!="", container_name!=""}[5m])) by (namespace) + sum(rate(container_cpu_usage_seconds_total{job="kubelet", image!="", container!="POD"}[5m])) by (namespace) record: namespace:container_cpu_usage_seconds_total:sum_rate - expr: | - sum(container_memory_usage_bytes{job="kubelet", image!="", container_name!=""}) by (namespace) + sum by (namespace, pod, container) ( + rate(container_cpu_usage_seconds_total{job="kubelet", image!="", container!="POD"}[5m]) + ) + record: namespace_pod_container:container_cpu_usage_seconds_total:sum_rate + - expr: | + sum(container_memory_usage_bytes{job="kubelet", image!="", container!="POD"}) by (namespace) record: namespace:container_memory_usage_bytes:sum - expr: | - sum by (namespace, pod_name, container_name) ( - rate(container_cpu_usage_seconds_total{job="kubelet", image!="", container_name!=""}[5m]) + sum by (namespace, label_name) ( + sum(rate(container_cpu_usage_seconds_total{job="kubelet", image!="", container!="POD"}[5m])) by (namespace, pod) + * on (namespace, pod) + group_left(label_name) kube_pod_labels{job="kube-state-metrics"} ) - record: namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate + record: namespace:container_cpu_usage_seconds_total:sum_rate - expr: | - sum by(namespace) ( - kube_pod_container_resource_requests_memory_bytes{job="kube-state-metrics"} - * on (endpoint, instance, job, namespace, pod, service) - group_left(phase) (kube_pod_status_phase{phase=~"^(Pending|Running)$"} == 1) + sum by (namespace, label_name) ( + sum(container_memory_usage_bytes{job="kubelet",image!="", container!="POD"}) by (pod, namespace) + * on (namespace, pod) + group_left(label_name) kube_pod_labels{job="kube-state-metrics"} ) - record: namespace_name:kube_pod_container_resource_requests_memory_bytes:sum + record: namespace:container_memory_usage_bytes:sum - expr: | - sum by (namespace) ( - kube_pod_container_resource_requests_cpu_cores{job="kube-state-metrics"} - * on (endpoint, instance, job, namespace, pod, service) - group_left(phase) (kube_pod_status_phase{phase=~"^(Pending|Running)$"} == 1) + sum by (namespace, label_name) ( + sum(kube_pod_container_resource_requests_memory_bytes{job="kube-state-metrics"} * on (endpoint, instance, job, namespace, pod, service) group_left(phase) (kube_pod_status_phase{phase=~"^(Pending|Running)$"} == 1)) by (namespace, pod) + * on (namespace, pod) + group_left(label_name) kube_pod_labels{job="kube-state-metrics"} ) - record: namespace_name:kube_pod_container_resource_requests_cpu_cores:sum + record: namespace:kube_pod_container_resource_requests_memory_bytes:sum + - expr: | + sum by (namespace, label_name) ( + sum(kube_pod_container_resource_requests_cpu_cores{job="kube-state-metrics"} * on (endpoint, instance, job, namespace, pod, service) group_left(phase) (kube_pod_status_phase{phase=~"^(Pending|Running)$"} == 1)) by (namespace, pod) + * on (namespace, pod) + group_left(label_name) kube_pod_labels{job="kube-state-metrics"} + ) + record: namespace:kube_pod_container_resource_requests_cpu_cores:sum - expr: | sum( label_replace( @@ -71,67 +85,67 @@ spec: - name: kube-scheduler.rules rules: - expr: | - histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06 + histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) labels: quantile: "0.99" - record: cluster_quantile:scheduler_e2e_scheduling_latency:histogram_quantile + record: cluster_quantile:scheduler_e2e_scheduling_duration_seconds:histogram_quantile - expr: | - histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06 + histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) labels: quantile: "0.99" - record: cluster_quantile:scheduler_scheduling_algorithm_latency:histogram_quantile + record: cluster_quantile:scheduler_scheduling_algorithm_duration_seconds:histogram_quantile - expr: | - histogram_quantile(0.99, sum(rate(scheduler_binding_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06 + histogram_quantile(0.99, sum(rate(scheduler_binding_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) labels: quantile: "0.99" - record: cluster_quantile:scheduler_binding_latency:histogram_quantile + record: cluster_quantile:scheduler_binding_duration_seconds:histogram_quantile - expr: | - histogram_quantile(0.9, sum(rate(scheduler_e2e_scheduling_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06 + histogram_quantile(0.9, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) labels: quantile: "0.9" - record: cluster_quantile:scheduler_e2e_scheduling_latency:histogram_quantile + record: cluster_quantile:scheduler_e2e_scheduling_duration_seconds:histogram_quantile - expr: | - histogram_quantile(0.9, sum(rate(scheduler_scheduling_algorithm_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06 + histogram_quantile(0.9, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) labels: quantile: "0.9" - record: cluster_quantile:scheduler_scheduling_algorithm_latency:histogram_quantile + record: cluster_quantile:scheduler_scheduling_algorithm_duration_seconds:histogram_quantile - expr: | - histogram_quantile(0.9, sum(rate(scheduler_binding_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06 + histogram_quantile(0.9, sum(rate(scheduler_binding_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) labels: quantile: "0.9" - record: cluster_quantile:scheduler_binding_latency:histogram_quantile + record: cluster_quantile:scheduler_binding_duration_seconds:histogram_quantile - expr: | - histogram_quantile(0.5, sum(rate(scheduler_e2e_scheduling_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06 + histogram_quantile(0.5, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) labels: quantile: "0.5" - record: cluster_quantile:scheduler_e2e_scheduling_latency:histogram_quantile + record: cluster_quantile:scheduler_e2e_scheduling_duration_seconds:histogram_quantile - expr: | - histogram_quantile(0.5, sum(rate(scheduler_scheduling_algorithm_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06 + histogram_quantile(0.5, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) labels: quantile: "0.5" - record: cluster_quantile:scheduler_scheduling_algorithm_latency:histogram_quantile + record: cluster_quantile:scheduler_scheduling_algorithm_duration_seconds:histogram_quantile - expr: | - histogram_quantile(0.5, sum(rate(scheduler_binding_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06 + histogram_quantile(0.5, sum(rate(scheduler_binding_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) labels: quantile: "0.5" - record: cluster_quantile:scheduler_binding_latency:histogram_quantile + record: cluster_quantile:scheduler_binding_duration_seconds:histogram_quantile - name: kube-apiserver.rules rules: - expr: | - histogram_quantile(0.99, sum(rate(apiserver_request_latencies_bucket{job="apiserver"}[5m])) without(instance, pod)) / 1e+06 + histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver"}[5m])) without(instance, pod)) labels: quantile: "0.99" - record: cluster_quantile:apiserver_request_latencies:histogram_quantile + record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile - expr: | - histogram_quantile(0.9, sum(rate(apiserver_request_latencies_bucket{job="apiserver"}[5m])) without(instance, pod)) / 1e+06 + histogram_quantile(0.9, sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver"}[5m])) without(instance, pod)) labels: quantile: "0.9" - record: cluster_quantile:apiserver_request_latencies:histogram_quantile + record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile - expr: | - histogram_quantile(0.5, sum(rate(apiserver_request_latencies_bucket{job="apiserver"}[5m])) without(instance, pod)) / 1e+06 + histogram_quantile(0.5, sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver"}[5m])) without(instance, pod)) labels: quantile: "0.5" - record: cluster_quantile:apiserver_request_latencies:histogram_quantile + record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile - name: node.rules rules: - expr: sum(min(kube_pod_info) by (node)) @@ -593,11 +607,11 @@ spec: tolerate node failure. runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecpuovercommit expr: | - sum(namespace_name:kube_pod_container_resource_requests_cpu_cores:sum) + sum(namespace:kube_pod_container_resource_requests_cpu_cores:sum) / - sum(node:node_num_cpu:sum) + sum(kube_node_status_allocatable_cpu_cores) > - (count(node:node_num_cpu:sum)-1) / count(node:node_num_cpu:sum) + (count(kube_node_status_allocatable_cpu_cores)-1) / count(kube_node_status_allocatable_cpu_cores) for: 5m labels: severity: warning @@ -607,13 +621,13 @@ spec: tolerate node failure. runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubememovercommit expr: | - sum(namespace_name:kube_pod_container_resource_requests_memory_bytes:sum) + sum(namespace:kube_pod_container_resource_requests_memory_bytes:sum) / - sum(node_memory_MemTotal_bytes) + sum(kube_node_status_allocatable_memory_bytes) > - (count(node:node_num_cpu:sum)-1) + (count(kube_node_status_allocatable_memory_bytes)-1) / - count(node:node_num_cpu:sum) + count(kube_node_status_allocatable_memory_bytes) for: 5m labels: severity: warning @@ -624,7 +638,7 @@ spec: expr: | sum(kube_resourcequota{job="kube-state-metrics", type="hard", resource="cpu"}) / - sum(node:node_num_cpu:sum) + sum(kube_node_status_allocatable_cpu_cores) > 1.5 for: 5m labels: @@ -636,7 +650,7 @@ spec: expr: | sum(kube_resourcequota{job="kube-state-metrics", type="hard", resource="memory"}) / - sum(node_memory_MemTotal_bytes{job="node-exporter"}) + sum(kube_node_status_allocatable_memory_bytes{job="node-exporter"}) > 1.5 for: 5m labels: @@ -657,12 +671,11 @@ spec: - alert: CPUThrottlingHigh annotations: message: '{{ printf "%0.0f" $value }}% throttling of CPU in namespace {{ $labels.namespace - }} for container {{ $labels.container_name }} in pod {{ $labels.pod_name - }}.' + }} for container {{ $labels.container }} in pod {{ $labels.pod }}.' runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-cputhrottlinghigh - expr: "100 * sum(increase(container_cpu_cfs_throttled_periods_total{container_name!=\"\", - }[5m])) by (container_name, pod_name, namespace)\n /\nsum(increase(container_cpu_cfs_periods_total{}[5m])) - by (container_name, pod_name, namespace)\n > 25 \n" + expr: "100 * sum(increase(container_cpu_cfs_throttled_periods_total{container!=\"\", + }[5m])) by (container, pod, namespace)\n /\nsum(increase(container_cpu_cfs_periods_total{}[5m])) + by (container, pod, namespace)\n > 25 \n" for: 15m labels: severity: warning @@ -769,7 +782,7 @@ spec: for {{ $labels.verb }} {{ $labels.resource }}. runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapilatencyhigh expr: | - cluster_quantile:apiserver_request_latencies:histogram_quantile{job="apiserver",quantile="0.99",subresource!="log",verb!~"^(?:LIST|WATCH|WATCHLIST|PROXY|CONNECT)$"} > 1 + cluster_quantile:apiserver_request_duration_seconds:histogram_quantile{job="apiserver",quantile="0.99",subresource!="log",verb!~"^(?:LIST|WATCH|WATCHLIST|PROXY|CONNECT)$"} > 1 for: 10m labels: severity: warning @@ -779,7 +792,7 @@ spec: for {{ $labels.verb }} {{ $labels.resource }}. runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapilatencyhigh expr: | - cluster_quantile:apiserver_request_latencies:histogram_quantile{job="apiserver",quantile="0.99",subresource!="log",verb!~"^(?:LIST|WATCH|WATCHLIST|PROXY|CONNECT)$"} > 4 + cluster_quantile:apiserver_request_duration_seconds:histogram_quantile{job="apiserver",quantile="0.99",subresource!="log",verb!~"^(?:LIST|WATCH|WATCHLIST|PROXY|CONNECT)$"} > 4 for: 10m labels: severity: critical @@ -788,9 +801,9 @@ spec: message: API server is returning errors for {{ $value }}% of requests. runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorshigh expr: | - sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[5m])) + sum(rate(apiserver_request_total{job="apiserver",code=~"^(?:5..)$"}[5m])) / - sum(rate(apiserver_request_count{job="apiserver"}[5m])) * 100 > 3 + sum(rate(apiserver_request_total{job="apiserver"}[5m])) * 100 > 3 for: 10m labels: severity: critical @@ -799,9 +812,9 @@ spec: message: API server is returning errors for {{ $value }}% of requests. runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorshigh expr: | - sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[5m])) + sum(rate(apiserver_request_total{job="apiserver",code=~"^(?:5..)$"}[5m])) / - sum(rate(apiserver_request_count{job="apiserver"}[5m])) * 100 > 1 + sum(rate(apiserver_request_total{job="apiserver"}[5m])) * 100 > 1 for: 10m labels: severity: warning @@ -811,9 +824,9 @@ spec: {{ $labels.verb }} {{ $labels.resource }} {{ $labels.subresource }}. runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorshigh expr: | - sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[5m])) by (resource,subresource,verb) + sum(rate(apiserver_request_total{job="apiserver",code=~"^(?:5..)$"}[5m])) by (resource,subresource,verb) / - sum(rate(apiserver_request_count{job="apiserver"}[5m])) by (resource,subresource,verb) * 100 > 10 + sum(rate(apiserver_request_total{job="apiserver"}[5m])) by (resource,subresource,verb) * 100 > 10 for: 10m labels: severity: critical @@ -823,9 +836,9 @@ spec: {{ $labels.verb }} {{ $labels.resource }} {{ $labels.subresource }}. runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorshigh expr: | - sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[5m])) by (resource,subresource,verb) + sum(rate(apiserver_request_total{job="apiserver",code=~"^(?:5..)$"}[5m])) by (resource,subresource,verb) / - sum(rate(apiserver_request_count{job="apiserver"}[5m])) by (resource,subresource,verb) * 100 > 5 + sum(rate(apiserver_request_total{job="apiserver"}[5m])) by (resource,subresource,verb) * 100 > 5 for: 10m labels: severity: warning