diff --git a/README.md b/README.md index 2ca8757ace60a38f0e287296f7c2e5cd19ee9a39..50659f951e20f0ec3d90e939e27411a98bd76fc0 100644 --- a/README.md +++ b/README.md @@ -573,34 +573,34 @@ You can define ServiceMonitor resources in your `jsonnet` spec. See the snippet ```jsonnet local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + { _config+:: { - namespace: 'monitoring', - prometheus+:: { - namespaces+: ['my-namespace', 'my-second-namespace'], - } - }, + namespace: 'monitoring', prometheus+:: { - serviceMonitorMyNamespace: { - apiVersion: 'monitoring.coreos.com/v1', - kind: 'ServiceMonitor', - metadata: { - name: 'my-servicemonitor', - namespace: 'my-namespace', + namespaces+: ['my-namespace', 'my-second-namespace'], + }, + }, + prometheus+:: { + serviceMonitorMyNamespace: { + apiVersion: 'monitoring.coreos.com/v1', + kind: 'ServiceMonitor', + metadata: { + name: 'my-servicemonitor', + namespace: 'my-namespace', + }, + spec: { + jobLabel: 'app', + endpoints: [ + { + port: 'http-metrics', }, - spec: { - jobLabel: 'app', - endpoints: [ - { - port: 'http-metrics', - }, - ], - selector: { - matchLabels: { - 'app': 'myapp', - }, - }, + ], + selector: { + matchLabels: { + app: 'myapp', }, }, - }, + }, + }, + }, }; diff --git a/examples/additional-namespaces-servicemonitor.jsonnet b/examples/additional-namespaces-servicemonitor.jsonnet index 0262b4e060cc093d2d26f55a249b40c2255f0432..0f3add96d462392f903fbf64276d3c42f822f459 100644 --- a/examples/additional-namespaces-servicemonitor.jsonnet +++ b/examples/additional-namespaces-servicemonitor.jsonnet @@ -1,33 +1,33 @@ local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + { _config+:: { - namespace: 'monitoring', - prometheus+:: { - namespaces+: ['my-namespace', 'my-second-namespace'], - } - }, + namespace: 'monitoring', prometheus+:: { - serviceMonitorMyNamespace: { - apiVersion: 'monitoring.coreos.com/v1', - kind: 'ServiceMonitor', - metadata: { - name: 'my-servicemonitor', - namespace: 'my-namespace', + namespaces+: ['my-namespace', 'my-second-namespace'], + }, + }, + prometheus+:: { + serviceMonitorMyNamespace: { + apiVersion: 'monitoring.coreos.com/v1', + kind: 'ServiceMonitor', + metadata: { + name: 'my-servicemonitor', + namespace: 'my-namespace', + }, + spec: { + jobLabel: 'app', + endpoints: [ + { + port: 'http-metrics', }, - spec: { - jobLabel: 'app', - endpoints: [ - { - port: 'http-metrics', - }, - ], - selector: { - matchLabels: { - 'app': 'myapp', - }, - }, + ], + selector: { + matchLabels: { + app: 'myapp', }, }, - }, + }, + }, + }, }; diff --git a/jsonnetfile.lock.json b/jsonnetfile.lock.json index 5d054e2b0171308b64d49a743e91ee6ddeffb5a1..b64313fe5aa35f19f486d2abea6102e40f738f07 100644 --- a/jsonnetfile.lock.json +++ b/jsonnetfile.lock.json @@ -27,7 +27,7 @@ "subdir": "" } }, - "version": "15ddfa20a6921ffbd43172eb54f6bdc1bcf8d3d3" + "version": "a08a6754d527eae13c553bd7565d5f4e9e9f3c0e" }, { "name": "grafonnet", diff --git a/manifests/grafana-dashboardDefinitions.yaml b/manifests/grafana-dashboardDefinitions.yaml index 1065be04eb84f0d0bf83958189d5b1e7a4d297b3..418414040b3475fa0132132f92addbf889d605d7 100644 --- a/manifests/grafana-dashboardDefinitions.yaml +++ b/manifests/grafana-dashboardDefinitions.yaml @@ -1238,7 +1238,7 @@ items: "query": "label_values(apiserver_request_total{job=\"apiserver\"}, instance)", "refresh": 2, "regex": "", - "sort": 0, + "sort": 1, "tagValuesQuery": "", "tags": [ @@ -1289,7 +1289,7 @@ items: namespace: monitoring - apiVersion: v1 data: - controller-manager.json: |- + cluster-total.json: |- { "__inputs": [ @@ -1299,10 +1299,18 @@ items: ], "annotations": { "list": [ - + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } ] }, - "editable": false, + "editable": true, "gnetId": null, "graphTooltip": 0, "hideControls": false, @@ -1310,199 +1318,783 @@ items: "links": [ ], - "refresh": "", - "rows": [ + "panels": [ { "collapse": false, "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 2, "panels": [ + + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Current Bandwidth", + "titleSize": "h6", + "type": "row" + }, + { + "aliasColors": { + + }, + "breakpoint": "50%", + "cacheTimeout": null, + "combine": { + "label": "Others", + "threshold": 0 + }, + "datasource": "prometheus", + "fontSize": "80%", + "format": "Bps", + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 1 + }, + "id": 3, + "interval": null, + "legend": { + "percentage": true, + "percentageDecimals": null, + "show": true, + "values": true + }, + "legendType": "Right side", + "maxDataPoints": 3, + "nullPointMode": "connected", + "pieType": "donut", + "targets": [ { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, + "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))", + "format": "time_series", + "instant": null, + "intervalFactor": 1, + "legendFormat": "{{namespace}}", + "refId": "A" + } + ], + "title": "Current Rate of Bytes Received", + "type": "grafana-piechart-panel", + "valueName": "current" + }, + { + "aliasColors": { + + }, + "breakpoint": "50%", + "cacheTimeout": null, + "combine": { + "label": "Others", + "threshold": 0 + }, + "datasource": "prometheus", + "fontSize": "80%", + "format": "Bps", + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 1 + }, + "id": 4, + "interval": null, + "legend": { + "percentage": true, + "percentageDecimals": null, + "show": true, + "values": true + }, + "legendType": "Right side", + "maxDataPoints": 3, + "nullPointMode": "connected", + "pieType": "donut", + "targets": [ + { + "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))", + "format": "time_series", + "instant": null, + "intervalFactor": 1, + "legendFormat": "{{namespace}}", + "refId": "A" + } + ], + "title": "Current Rate of Bytes Transmitted", + "type": "grafana-piechart-panel", + "valueName": "current" + }, + { + "columns": [ + { + "text": "", + "value": "" + }, + { + "text": "", + "value": "" + }, + { + "text": "", + "value": "" + }, + { + "text": "", + "value": "" + }, + { + "text": "", + "value": "" + }, + { + "text": "", + "value": "" + }, + { + "text": "", + "value": "" + }, + { + "text": "", + "value": "" + }, + { + "text": "", + "value": "" + }, + { + "text": "", + "value": "" + } + ], + "datasource": "prometheus", + "fill": 1, + "fontSize": "90%", + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 10 + }, + "id": 5, + "lines": true, + "linewidth": 1, + "minSpan": 24, + "nullPointMode": "null as zero", + "renderer": "flot", + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": false + }, + "spaceLength": 10, + "span": 24, + "styles": [ + { + "alias": "Time", + "colorMode": null, "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" + ], - "datasource": "$datasource", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Time", + "thresholds": [ - }, - "id": 2, - "interval": null, - "links": [ + ], + "type": "hidden", + "unit": "short" + }, + { + "alias": "Current Bandwidth Received", + "colorMode": null, + "colors": [ ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } + "type": "number", + "unit": "Bps" + }, + { + "alias": "Current Bandwidth Transmitted", + "colorMode": null, + "colors": [ + ], - "span": 2, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "sum(up{job=\"kube-controller-manager\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", - "refId": "A" - } + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + ], - "thresholds": "", - "title": "Up", - "tooltip": { - "shared": false - }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } + "type": "number", + "unit": "Bps" + }, + { + "alias": "Average Bandwidth Received", + "colorMode": null, + "colors": [ + ], - "valueName": "min" + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" }, { - "aliasColors": { + "alias": "Average Bandwidth Transmitted", + "colorMode": null, + "colors": [ - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ - }, - "id": 3, - "legend": { - "alignAsTable": "true", - "avg": false, - "current": "true", - "max": false, - "min": false, - "rightSide": "true", - "show": "true", - "total": false, - "values": "true" - }, - "lines": true, - "linewidth": 1, - "links": [ + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Rate of Received Packets", + "colorMode": null, + "colors": [ ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ ], - "spaceLength": 10, - "span": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(workqueue_adds_total{job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (instance, name)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} {{name}}", - "refId": "A" - } + "type": "number", + "unit": "pps" + }, + { + "alias": "Rate of Transmitted Packets", + "colorMode": null, + "colors": [ + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Work Queue Add Rate", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + "type": "number", + "unit": "pps" + }, + { + "alias": "Rate of Received Packets Dropped", + "colorMode": null, + "colors": [ - ] - }, - "yaxes": [ - { - "format": "ops", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "ops", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #G", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Rate of Transmitted Packets Dropped", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #H", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Namespace", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTooltip": "Drill down", + "linkUrl": "d/8b7a8b326d7a6f1f04244066368c67af/kubernetes-networking-namespace-pods?orgId=1&refresh=30s&var-namespace=$__cell", + "pattern": "namespace", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sort_desc(avg(irate(container_network_receive_bytes_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sort_desc(avg(irate(container_network_transmit_bytes_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sort_desc(sum(irate(container_network_receive_packets_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + }, + { + "expr": "sort_desc(sum(irate(container_network_transmit_packets_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 + }, + { + "expr": "sort_desc(sum(irate(container_network_receive_packets_dropped_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "G", + "step": 10 + }, + { + "expr": "sort_desc(sum(irate(container_network_transmit_packets_dropped_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "H", + "step": 10 + } + ], + "title": "Current Status", + "transform": "table", + "type": "table" + }, + { + "collapse": true, + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 10 + }, + "id": 6, + "panels": [ + { + "aliasColors": { + + }, + "breakpoint": "50%", + "cacheTimeout": null, + "combine": { + "label": "Others", + "threshold": 0 + }, + "datasource": "prometheus", + "fontSize": "80%", + "format": "Bps", + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 11 + }, + "id": 7, + "interval": null, + "legend": { + "percentage": true, + "percentageDecimals": null, + "show": true, + "values": true + }, + "legendType": "Right side", + "maxDataPoints": 3, + "nullPointMode": "connected", + "pieType": "donut", + "targets": [ + { + "expr": "sort_desc(avg(irate(container_network_receive_bytes_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))", + "format": "time_series", + "instant": null, + "intervalFactor": 1, + "legendFormat": "{{namespace}}", + "refId": "A" + } + ], + "title": "Average Rate of Bytes Received", + "type": "grafana-piechart-panel", + "valueName": "current" + }, + { + "aliasColors": { + + }, + "breakpoint": "50%", + "cacheTimeout": null, + "combine": { + "label": "Others", + "threshold": 0 + }, + "datasource": "prometheus", + "fontSize": "80%", + "format": "Bps", + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 11 + }, + "id": 8, + "interval": null, + "legend": { + "percentage": true, + "percentageDecimals": null, + "show": true, + "values": true + }, + "legendType": "Right side", + "maxDataPoints": 3, + "nullPointMode": "connected", + "pieType": "donut", + "targets": [ + { + "expr": "sort_desc(avg(irate(container_network_transmit_bytes_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))", + "format": "time_series", + "instant": null, + "intervalFactor": 1, + "legendFormat": "{{namespace}}", + "refId": "A" + } + ], + "title": "Average Rate of Bytes Transmitted", + "type": "grafana-piechart-panel", + "valueName": "current" + } + ], + "repeat": null, + "repeatIteration": null, "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", + "showTitle": true, + "title": "Average Bandwidth", "titleSize": "h6", "type": "row" }, { "collapse": false, "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 11 + }, + "id": 9, + "panels": [ + + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Bandwidth History", + "titleSize": "h6", + "type": "row" + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 2, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 12 + }, + "id": 10, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [ + + ], + "minSpan": 24, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 24, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{namespace}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Receive Bandwidth", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 2, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 21 + }, + "id": 11, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [ + + ], + "minSpan": 24, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 24, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{namespace}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Transmit Bandwidth", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "collapse": true, + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 30 + }, + "id": 12, "panels": [ { "aliasColors": { @@ -1511,29 +2103,36 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", - "fill": 1, + "datasource": "prometheus", + "fill": 2, "gridPos": { - + "h": 9, + "w": 24, + "x": 0, + "y": 31 }, - "id": 4, + "id": 13, "legend": { - "alignAsTable": "true", - "avg": false, - "current": "true", - "max": false, - "min": false, - "rightSide": "true", - "show": "true", + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, "total": false, - "values": "true" + "values": true }, "lines": true, - "linewidth": 1, + "linewidth": 2, "links": [ ], - "nullPointMode": "null", + "minSpan": 24, + "nullPointMode": "connected", + "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, @@ -1543,16 +2142,17 @@ items: ], "spaceLength": 10, - "span": 12, - "stack": false, + "span": 24, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(rate(workqueue_depth{job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (instance, name)", + "expr": "sort_desc(sum(irate(container_network_receive_packets_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} {{name}}", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{namespace}}", + "refId": "A", + "step": 10 } ], "thresholds": [ @@ -1560,10 +2160,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Work Queue Depth", + "title": "Rate of Received Packets", "tooltip": { - "shared": false, - "sort": 0, + "shared": true, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -1578,7 +2178,7 @@ items: }, "yaxes": [ { - "format": "short", + "format": "pps", "label": null, "logBase": 1, "max": null, @@ -1586,7 +2186,7 @@ items: "show": true }, { - "format": "short", + "format": "pps", "label": null, "logBase": 1, "max": null, @@ -1594,20 +2194,7 @@ items: "show": true } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": false, - "collapsed": false, - "panels": [ + }, { "aliasColors": { @@ -1615,29 +2202,36 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", - "fill": 1, + "datasource": "prometheus", + "fill": 2, "gridPos": { - + "h": 9, + "w": 24, + "x": 0, + "y": 40 }, - "id": 5, + "id": 14, "legend": { - "alignAsTable": "true", - "avg": false, - "current": "true", - "max": false, - "min": false, - "rightSide": "true", - "show": "true", + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, "total": false, - "values": "true" + "values": true }, "lines": true, - "linewidth": 1, + "linewidth": 2, "links": [ ], - "nullPointMode": "null", + "minSpan": 24, + "nullPointMode": "connected", + "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, @@ -1647,16 +2241,17 @@ items: ], "spaceLength": 10, - "span": 12, - "stack": false, + "span": 24, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (instance, name, le))", + "expr": "sort_desc(sum(irate(container_network_transmit_packets_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} {{name}}", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{namespace}}", + "refId": "A", + "step": 10 } ], "thresholds": [ @@ -1664,10 +2259,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Work Queue Latency", + "title": "Rate of Transmitted Packets", "tooltip": { - "shared": false, - "sort": 0, + "shared": true, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -1682,19 +2277,19 @@ items: }, "yaxes": [ { - "format": "s", + "format": "pps", "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true }, { - "format": "s", + "format": "pps", "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true } ] @@ -1703,14 +2298,21 @@ items: "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", + "showTitle": true, + "title": "Packets", "titleSize": "h6", "type": "row" }, { - "collapse": false, - "collapsed": false, + "collapse": true, + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 31 + }, + "id": 15, "panels": [ { "aliasColors": { @@ -1719,29 +2321,36 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", - "fill": 1, + "datasource": "prometheus", + "fill": 2, "gridPos": { - + "h": 9, + "w": 24, + "x": 0, + "y": 50 }, - "id": 6, + "id": 16, "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": true, + "rightSide": true, "show": true, "total": false, - "values": false + "values": true }, "lines": true, - "linewidth": 1, + "linewidth": 2, "links": [ ], - "nullPointMode": "null", + "minSpan": 24, + "nullPointMode": "connected", + "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, @@ -1751,37 +2360,17 @@ items: ], "spaceLength": 10, - "span": 4, - "stack": false, + "span": 24, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"2..\"}[5m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "2xx", - "refId": "A" - }, - { - "expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"3..\"}[5m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "3xx", - "refId": "B" - }, - { - "expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"4..\"}[5m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "4xx", - "refId": "C" - }, - { - "expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"5..\"}[5m]))", + "expr": "sort_desc(sum(irate(container_network_receive_packets_dropped_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "5xx", - "refId": "D" + "intervalFactor": 1, + "legendFormat": "{{namespace}}", + "refId": "A", + "step": 10 } ], "thresholds": [ @@ -1789,10 +2378,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Kube API Request Rate", + "title": "Rate of Received Packets Dropped", "tooltip": { - "shared": false, - "sort": 0, + "shared": true, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -1807,19 +2396,19 @@ items: }, "yaxes": [ { - "format": "ops", + "format": "pps", "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true }, { - "format": "ops", + "format": "pps", "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true } ] @@ -1831,29 +2420,36 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", - "fill": 1, + "datasource": "prometheus", + "fill": 2, "gridPos": { - + "h": 9, + "w": 24, + "x": 0, + "y": 59 }, - "id": 7, + "id": 17, "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": true, + "rightSide": true, "show": true, "total": false, - "values": false + "values": true }, "lines": true, - "linewidth": 1, + "linewidth": 2, "links": [ ], - "nullPointMode": "null", + "minSpan": 24, + "nullPointMode": "connected", + "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, @@ -1863,16 +2459,17 @@ items: ], "spaceLength": 10, - "span": 8, - "stack": false, + "span": 24, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_latency_seconds_bucket{job=\"kube-controller-manager\", instance=~\"$instance\", verb=\"POST\"}[5m])) by (verb, url, le))", + "expr": "sort_desc(sum(irate(container_network_transmit_packets_dropped_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{verb}} {{url}}", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{namespace}}", + "refId": "A", + "step": 10 } ], "thresholds": [ @@ -1880,10 +2477,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Post Request Latency 99th Quantile", + "title": "Rate of Transmitted Packets Dropped", "tooltip": { - "shared": false, - "sort": 0, + "shared": true, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -1898,7 +2495,7 @@ items: }, "yaxes": [ { - "format": "s", + "format": "pps", "label": null, "logBase": 1, "max": null, @@ -1906,7 +2503,7 @@ items: "show": true }, { - "format": "s", + "format": "pps", "label": null, "logBase": 1, "max": null, @@ -1919,33 +2516,274 @@ items: "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", + "showTitle": true, + "title": "Errors", "titleSize": "h6", "type": "row" - }, + } + ], + "refresh": "30s", + "rows": [ + + ], + "schemaVersion": 18, + "style": "dark", + "tags": [ + + ], + "templating": { + "list": [ + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "5m", + "value": "5m" + }, + "datasource": "prometheus", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "resolution", + "options": [ + { + "selected": false, + "text": "30s", + "value": "30s" + }, + { + "selected": true, + "text": "5m", + "value": "5m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + } + ], + "query": "30s,5m,1h", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "interval", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "5m", + "value": "5m" + }, + "datasource": "prometheus", + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "interval", + "options": [ + { + "selected": true, + "text": "4h", + "value": "4h" + } + ], + "query": "4h", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "interval", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Kubernetes / Networking / Cluster", + "uid": "ff635a025bcfea7bc3dd4f508990a3e9", + "version": 0 + } + kind: ConfigMap + metadata: + name: grafana-dashboard-cluster-total + namespace: monitoring +- apiVersion: v1 + data: + controller-manager.json: |- + { + "__inputs": [ + + ], + "__requires": [ + + ], + "annotations": { + "list": [ + + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ + + ], + "refresh": "", + "rows": [ { "collapse": false, "collapsed": false, "panels": [ { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], "datasource": "$datasource", - "fill": 1, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, "gridPos": { }, - "id": 8, - "legend": { - "alignAsTable": "true", - "avg": false, - "current": "true", - "max": false, + "id": 2, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(up{job=\"kube-controller-manager\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Up", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "min" + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 3, + "legend": { + "alignAsTable": "true", + "avg": false, + "current": "true", + "max": false, "min": false, "rightSide": "true", "show": "true", @@ -1967,15 +2805,15 @@ items: ], "spaceLength": 10, - "span": 12, + "span": 10, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_latency_seconds_bucket{job=\"kube-controller-manager\", instance=~\"$instance\", verb=\"GET\"}[5m])) by (verb, url, le))", + "expr": "sum(rate(workqueue_adds_total{job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (instance, name)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{verb}} {{url}}", + "legendFormat": "{{instance}} {{name}}", "refId": "A" } ], @@ -1984,7 +2822,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Get Request Latency 99th Quantile", + "title": "Work Queue Add Rate", "tooltip": { "shared": false, "sort": 0, @@ -2002,19 +2840,19 @@ items: }, "yaxes": [ { - "format": "s", + "format": "ops", "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true }, { - "format": "s", + "format": "ops", "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true } ] @@ -2044,17 +2882,17 @@ items: "gridPos": { }, - "id": 9, + "id": 4, "legend": { - "alignAsTable": false, + "alignAsTable": "true", "avg": false, - "current": false, + "current": "true", "max": false, "min": false, - "rightSide": false, - "show": true, + "rightSide": "true", + "show": "true", "total": false, - "values": false + "values": "true" }, "lines": true, "linewidth": 1, @@ -2071,15 +2909,15 @@ items: ], "spaceLength": 10, - "span": 4, + "span": 12, "stack": false, "steppedLine": false, "targets": [ { - "expr": "process_resident_memory_bytes{job=\"kube-controller-manager\",instance=~\"$instance\"}", + "expr": "sum(rate(workqueue_depth{job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (instance, name)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}}", + "legendFormat": "{{instance}} {{name}}", "refId": "A" } ], @@ -2088,7 +2926,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Memory", + "title": "Work Queue Depth", "tooltip": { "shared": false, "sort": 0, @@ -2106,23 +2944,36 @@ items: }, "yaxes": [ { - "format": "bytes", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true }, { - "format": "bytes", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ { "aliasColors": { @@ -2135,17 +2986,17 @@ items: "gridPos": { }, - "id": 10, + "id": 5, "legend": { - "alignAsTable": false, + "alignAsTable": "true", "avg": false, - "current": false, + "current": "true", "max": false, "min": false, - "rightSide": false, - "show": true, + "rightSide": "true", + "show": "true", "total": false, - "values": false + "values": "true" }, "lines": true, "linewidth": 1, @@ -2162,15 +3013,15 @@ items: ], "spaceLength": 10, - "span": 4, + "span": 12, "stack": false, "steppedLine": false, "targets": [ { - "expr": "rate(process_cpu_seconds_total{job=\"kube-controller-manager\",instance=~\"$instance\"}[5m])", + "expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (instance, name, le))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}}", + "legendFormat": "{{instance}} {{name}}", "refId": "A" } ], @@ -2179,7 +3030,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "CPU usage", + "title": "Work Queue Latency", "tooltip": { "shared": false, "sort": 0, @@ -2197,23 +3048,36 @@ items: }, "yaxes": [ { - "format": "short", + "format": "s", "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true }, { - "format": "short", + "format": "s", "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ { "aliasColors": { @@ -2226,7 +3090,7 @@ items: "gridPos": { }, - "id": 11, + "id": 6, "legend": { "alignAsTable": false, "avg": false, @@ -2258,11 +3122,32 @@ items: "steppedLine": false, "targets": [ { - "expr": "go_goroutines{job=\"kube-controller-manager\",instance=~\"$instance\"}", + "expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"2..\"}[5m]))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}}", + "legendFormat": "2xx", "refId": "A" + }, + { + "expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"3..\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "3xx", + "refId": "B" + }, + { + "expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"4..\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "4xx", + "refId": "C" + }, + { + "expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"5..\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "5xx", + "refId": "D" } ], "thresholds": [ @@ -2270,7 +3155,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Goroutines", + "title": "Kube API Request Rate", "tooltip": { "shared": false, "sort": 0, @@ -2288,7 +3173,7 @@ items: }, "yaxes": [ { - "format": "short", + "format": "ops", "label": null, "logBase": 1, "max": null, @@ -2296,7 +3181,7 @@ items: "show": true }, { - "format": "short", + "format": "ops", "label": null, "logBase": 1, "max": null, @@ -2304,130 +3189,9 @@ items: "show": true } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "kubernetes-mixin" - ], - "templating": { - "list": [ - { - "current": { - "text": "Prometheus", - "value": "Prometheus" }, - "hide": 0, - "label": null, - "name": "datasource", - "options": [ - - ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": null, - "current": { - - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": null, - "multi": false, - "name": "instance", - "options": [ - - ], - "query": "label_values(process_cpu_seconds_total{job=\"kube-controller-manager\"}, instance)", - "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "", - "title": "Kubernetes / Controller Manager", - "uid": "72e0e05bef5099e5f049b05fdc429ed4", - "version": 0 - } - kind: ConfigMap - metadata: - name: grafana-dashboard-controller-manager - namespace: monitoring -- apiVersion: v1 - data: - k8s-resources-cluster.json: |- - { - "annotations": { - "list": [ - - ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "links": [ - - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "100px", - "panels": [ - { - "aliasColors": { + { + "aliasColors": { }, "bars": false, @@ -2435,13 +3199,17 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, - "format": "percentunit", - "id": 1, + "gridPos": { + + }, + "id": 7, "legend": { + "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, + "rightSide": false, "show": true, "total": false, "values": false @@ -2451,37 +3219,40 @@ items: "links": [ ], - "nullPointMode": "null as zero", + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 2, + "span": 8, "stack": false, "steppedLine": false, "targets": [ { - "expr": "1 - avg(rate(node_cpu_seconds_total{mode=\"idle\", cluster=\"$cluster\"}[1m]))", + "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_latency_seconds_bucket{job=\"kube-controller-manager\", instance=~\"$instance\", verb=\"POST\"}[5m])) by (verb, url, le))", "format": "time_series", - "instant": true, "intervalFactor": 2, + "legendFormat": "{{verb}} {{url}}", "refId": "A" } ], - "thresholds": "70,80", + "thresholds": [ + + ], "timeFrom": null, "timeShift": null, - "title": "CPU Utilisation", + "title": "Post Request Latency 99th Quantile", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "type": "singlestat", + "type": "graph", "xaxis": { "buckets": null, "mode": "time", @@ -2493,7 +3264,7 @@ items: }, "yaxes": [ { - "format": "short", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -2501,15 +3272,28 @@ items: "show": true }, { - "format": "short", + "format": "s", "label": null, "logBase": 1, "max": null, - "min": null, - "show": false + "min": 0, + "show": true } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ { "aliasColors": { @@ -2519,53 +3303,60 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, - "format": "percentunit", - "id": 2, + "gridPos": { + + }, + "id": 8, "legend": { + "alignAsTable": "true", "avg": false, - "current": false, + "current": "true", "max": false, "min": false, - "show": true, + "rightSide": "true", + "show": "true", "total": false, - "values": false + "values": "true" }, "lines": true, "linewidth": 1, "links": [ ], - "nullPointMode": "null as zero", + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 2, + "span": 12, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable_cpu_cores{cluster=\"$cluster\"})", + "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_latency_seconds_bucket{job=\"kube-controller-manager\", instance=~\"$instance\", verb=\"GET\"}[5m])) by (verb, url, le))", "format": "time_series", - "instant": true, "intervalFactor": 2, + "legendFormat": "{{verb}} {{url}}", "refId": "A" } ], - "thresholds": "70,80", + "thresholds": [ + + ], "timeFrom": null, "timeShift": null, - "title": "CPU Requests Commitment", + "title": "Get Request Latency 99th Quantile", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "type": "singlestat", + "type": "graph", "xaxis": { "buckets": null, "mode": "time", @@ -2577,7 +3368,7 @@ items: }, "yaxes": [ { - "format": "short", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -2585,15 +3376,28 @@ items: "show": true }, { - "format": "short", + "format": "s", "label": null, "logBase": 1, "max": null, - "min": null, - "show": false + "min": 0, + "show": true } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ { "aliasColors": { @@ -2603,13 +3407,17 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, - "format": "percentunit", - "id": 3, + "gridPos": { + + }, + "id": 9, "legend": { + "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, + "rightSide": false, "show": true, "total": false, "values": false @@ -2619,37 +3427,40 @@ items: "links": [ ], - "nullPointMode": "null as zero", + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 2, + "span": 4, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable_cpu_cores{cluster=\"$cluster\"})", + "expr": "process_resident_memory_bytes{job=\"kube-controller-manager\",instance=~\"$instance\"}", "format": "time_series", - "instant": true, "intervalFactor": 2, + "legendFormat": "{{instance}}", "refId": "A" } ], - "thresholds": "70,80", + "thresholds": [ + + ], "timeFrom": null, "timeShift": null, - "title": "CPU Limits Commitment", + "title": "Memory", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "type": "singlestat", + "type": "graph", "xaxis": { "buckets": null, "mode": "time", @@ -2661,20 +3472,20 @@ items: }, "yaxes": [ { - "format": "short", + "format": "bytes", "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true }, { - "format": "short", + "format": "bytes", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ] }, @@ -2687,13 +3498,17 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, - "format": "percentunit", - "id": 4, + "gridPos": { + + }, + "id": 10, "legend": { + "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, + "rightSide": false, "show": true, "total": false, "values": false @@ -2703,37 +3518,40 @@ items: "links": [ ], - "nullPointMode": "null as zero", + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 2, + "span": 4, "stack": false, "steppedLine": false, "targets": [ { - "expr": "1 - sum(:node_memory_MemFreeCachedBuffers_bytes:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable_memory_bytes{cluster=\"$cluster\"})", + "expr": "rate(process_cpu_seconds_total{job=\"kube-controller-manager\",instance=~\"$instance\"}[5m])", "format": "time_series", - "instant": true, "intervalFactor": 2, + "legendFormat": "{{instance}}", "refId": "A" } ], - "thresholds": "70,80", + "thresholds": [ + + ], "timeFrom": null, "timeShift": null, - "title": "Memory Utilisation", + "title": "CPU usage", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "type": "singlestat", + "type": "graph", "xaxis": { "buckets": null, "mode": "time", @@ -2757,8 +3575,8 @@ items: "label": null, "logBase": 1, "max": null, - "min": null, - "show": false + "min": 0, + "show": true } ] }, @@ -2771,13 +3589,17 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, - "format": "percentunit", - "id": 5, + "gridPos": { + + }, + "id": 11, "legend": { + "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, + "rightSide": false, "show": true, "total": false, "values": false @@ -2787,37 +3609,40 @@ items: "links": [ ], - "nullPointMode": "null as zero", + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 2, + "span": 4, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable_memory_bytes{cluster=\"$cluster\"})", + "expr": "go_goroutines{job=\"kube-controller-manager\",instance=~\"$instance\"}", "format": "time_series", - "instant": true, "intervalFactor": 2, + "legendFormat": "{{instance}}", "refId": "A" } ], - "thresholds": "70,80", + "thresholds": [ + + ], "timeFrom": null, "timeShift": null, - "title": "Memory Requests Commitment", + "title": "Goroutines", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "type": "singlestat", + "type": "graph", "xaxis": { "buckets": null, "mode": "time", @@ -2833,7 +3658,7 @@ items: "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true }, { @@ -2842,10 +3667,131 @@ items: "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": null, + "multi": false, + "name": "instance", + "options": [ + + ], + "query": "label_values(process_cpu_seconds_total{job=\"kube-controller-manager\"}, instance)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Kubernetes / Controller Manager", + "uid": "72e0e05bef5099e5f049b05fdc429ed4", + "version": 0 + } + kind: ConfigMap + metadata: + name: grafana-dashboard-controller-manager + namespace: monitoring +- apiVersion: v1 + data: + k8s-resources-cluster.json: |- + { + "annotations": { + "list": [ + + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "100px", + "panels": [ { "aliasColors": { @@ -2856,7 +3802,7 @@ items: "datasource": "$datasource", "fill": 1, "format": "percentunit", - "id": 6, + "id": 1, "legend": { "avg": false, "current": false, @@ -2885,7 +3831,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable_memory_bytes{cluster=\"$cluster\"})", + "expr": "1 - avg(rate(node_cpu_seconds_total{mode=\"idle\", cluster=\"$cluster\"}[1m]))", "format": "time_series", "instant": true, "intervalFactor": 2, @@ -2895,7 +3841,7 @@ items: "thresholds": "70,80", "timeFrom": null, "timeShift": null, - "title": "Memory Limits Commitment", + "title": "CPU Utilisation", "tooltip": { "shared": false, "sort": 0, @@ -2929,19 +3875,7 @@ items: "show": false } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Headlines", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ + }, { "aliasColors": { @@ -2950,8 +3884,9 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 10, - "id": 7, + "fill": 1, + "format": "percentunit", + "id": 2, "legend": { "avg": false, "current": false, @@ -2962,7 +3897,7 @@ items: "values": false }, "lines": true, - "linewidth": 0, + "linewidth": 1, "links": [ ], @@ -2975,31 +3910,28 @@ items: ], "spaceLength": 10, - "span": 12, - "stack": true, + "span": 2, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace)", + "expr": "sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable_cpu_cores{cluster=\"$cluster\"})", "format": "time_series", + "instant": true, "intervalFactor": 2, - "legendFormat": "{{namespace}}", - "legendLink": null, - "step": 10 + "refId": "A" } ], - "thresholds": [ - - ], + "thresholds": "70,80", "timeFrom": null, "timeShift": null, - "title": "CPU Usage", + "title": "CPU Requests Commitment", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "singlestat", "xaxis": { "buckets": null, "mode": "time", @@ -3027,19 +3959,7 @@ items: "show": false } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "CPU", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ + }, { "aliasColors": { @@ -3049,7 +3969,8 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 8, + "format": "percentunit", + "id": 3, "legend": { "avg": false, "current": false, @@ -3073,254 +3994,280 @@ items: ], "spaceLength": 10, - "span": 12, + "span": 2, "stack": false, "steppedLine": false, - "styles": [ + "targets": [ { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" + "expr": "sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable_cpu_cores{cluster=\"$cluster\"})", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "CPU Limits Commitment", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true }, { - "alias": "Pods", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "link": true, - "linkTooltip": "Drill down to pods", - "linkUrl": "./d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1", - "pattern": "Value #A", - "thresholds": [ - - ], - "type": "number", - "unit": "short" - }, - { - "alias": "Workloads", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "link": true, - "linkTooltip": "Drill down to workloads", - "linkUrl": "./d/a87fb0d919ec0ea5f6543124e16c42a5/k8s-resources-workloads-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1", - "pattern": "Value #B", - "thresholds": [ - - ], - "type": "number", - "unit": "short" - }, - { - "alias": "CPU Usage", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #C", - "thresholds": [ - - ], - "type": "number", - "unit": "short" - }, - { - "alias": "CPU Requests", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #D", - "thresholds": [ - - ], - "type": "number", - "unit": "short" - }, - { - "alias": "CPU Requests %", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #E", - "thresholds": [ - - ], - "type": "number", - "unit": "percentunit" - }, - { - "alias": "CPU Limits", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #F", - "thresholds": [ - - ], - "type": "number", - "unit": "short" - }, - { - "alias": "CPU Limits %", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #G", - "thresholds": [ - - ], - "type": "number", - "unit": "percentunit" - }, - { - "alias": "Namespace", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": true, - "linkTooltip": "Drill down to pods", - "linkUrl": "./d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell", - "pattern": "namespace", - "thresholds": [ + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { - ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "percentunit", + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ - ], - "type": "string", - "unit": "short" - } ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "count(mixin_pod_workload{cluster=\"$cluster\"}) by (namespace)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "A", - "step": 10 - }, - { - "expr": "count(avg(mixin_pod_workload{cluster=\"$cluster\"}) by (workload, namespace)) by (namespace)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "B", - "step": 10 - }, - { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "C", - "step": 10 - }, - { - "expr": "sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\"}) by (namespace)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "D", - "step": 10 - }, - { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace) / sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\"}) by (namespace)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "E", - "step": 10 - }, - { - "expr": "sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\"}) by (namespace)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "F", - "step": 10 - }, - { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace) / sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\"}) by (namespace)", - "format": "table", + "expr": "1 - sum(:node_memory_MemFreeCachedBuffers_bytes:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable_memory_bytes{cluster=\"$cluster\"})", + "format": "time_series", "instant": true, "intervalFactor": 2, - "legendFormat": "", - "refId": "G", - "step": 10 + "refId": "A" } ], - "thresholds": [ - - ], + "thresholds": "70,80", "timeFrom": null, "timeShift": null, - "title": "CPU Quota", + "title": "Memory Utilisation", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "transform": "table", - "type": "table", + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "percentunit", + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable_memory_bytes{cluster=\"$cluster\"})", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Memory Requests Commitment", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "percentunit", + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable_memory_bytes{cluster=\"$cluster\"})", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Memory Limits Commitment", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", "xaxis": { "buckets": null, "mode": "time", @@ -3353,8 +4300,8 @@ items: "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": true, - "title": "CPU Quota", + "showTitle": false, + "title": "Headlines", "titleSize": "h6" }, { @@ -3370,7 +4317,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 9, + "id": 7, "legend": { "avg": false, "current": false, @@ -3399,7 +4346,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(container_memory_rss{cluster=\"$cluster\", container!=\"\"}) by (namespace)", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{namespace}}", @@ -3412,7 +4359,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Memory Usage (w/o cache)", + "title": "CPU Usage", "tooltip": { "shared": false, "sort": 0, @@ -3430,7 +4377,7 @@ items: }, "yaxes": [ { - "format": "bytes", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -3452,7 +4399,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Memory", + "title": "CPU", "titleSize": "h6" }, { @@ -3468,7 +4415,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 10, + "id": 8, "legend": { "avg": false, "current": false, @@ -3539,7 +4486,7 @@ items: "unit": "short" }, { - "alias": "Memory Usage", + "alias": "CPU Usage", "colorMode": null, "colors": [ @@ -3554,10 +4501,10 @@ items: ], "type": "number", - "unit": "bytes" + "unit": "short" }, { - "alias": "Memory Requests", + "alias": "CPU Requests", "colorMode": null, "colors": [ @@ -3572,10 +4519,10 @@ items: ], "type": "number", - "unit": "bytes" + "unit": "short" }, { - "alias": "Memory Requests %", + "alias": "CPU Requests %", "colorMode": null, "colors": [ @@ -3593,7 +4540,7 @@ items: "unit": "percentunit" }, { - "alias": "Memory Limits", + "alias": "CPU Limits", "colorMode": null, "colors": [ @@ -3608,10 +4555,10 @@ items: ], "type": "number", - "unit": "bytes" + "unit": "short" }, { - "alias": "Memory Limits %", + "alias": "CPU Limits %", "colorMode": null, "colors": [ @@ -3682,7 +4629,7 @@ items: "step": 10 }, { - "expr": "sum(container_memory_rss{cluster=\"$cluster\", container!=\"\"}) by (namespace)", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -3691,7 +4638,7 @@ items: "step": 10 }, { - "expr": "sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\"}) by (namespace)", + "expr": "sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -3700,7 +4647,7 @@ items: "step": 10 }, { - "expr": "sum(container_memory_rss{cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\"}) by (namespace)", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace) / sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -3709,7 +4656,7 @@ items: "step": 10 }, { - "expr": "sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\"}) by (namespace)", + "expr": "sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -3718,7 +4665,7 @@ items: "step": 10 }, { - "expr": "sum(container_memory_rss{cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\"}) by (namespace)", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace) / sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -3732,7 +4679,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Requests by Namespace", + "title": "CPU Quota", "tooltip": { "shared": false, "sort": 0, @@ -3773,124 +4720,15 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Memory Requests", + "title": "CPU Quota", "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "kubernetes-mixin" - ], - "templating": { - "list": [ - { - "current": { - "text": "Prometheus", - "value": "Prometheus" - }, - "hide": 0, - "label": null, - "name": "datasource", - "options": [ - - ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 2, - "includeAll": false, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ - - ], - "query": "label_values(node_cpu_seconds_total, cluster)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "", - "title": "Kubernetes / Compute Resources / Cluster", - "uid": "efa86fd1d0c121a26444b636a3f509a8", - "version": 0 - } - kind: ConfigMap - metadata: - name: grafana-dashboard-k8s-resources-cluster - namespace: monitoring -- apiVersion: v1 - data: - k8s-resources-namespace.json: |- - { - "annotations": { - "list": [ - - ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "links": [ - - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, "bars": false, @@ -3898,7 +4736,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 1, + "id": 9, "legend": { "avg": false, "current": false, @@ -3927,10 +4765,10 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", + "expr": "sum(container_memory_rss{cluster=\"$cluster\", container!=\"\"}) by (namespace)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{pod}}", + "legendFormat": "{{namespace}}", "legendLink": null, "step": 10 } @@ -3940,7 +4778,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "CPU Usage", + "title": "Memory Usage (w/o cache)", "tooltip": { "shared": false, "sort": 0, @@ -3958,7 +4796,7 @@ items: }, "yaxes": [ { - "format": "short", + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -3980,7 +4818,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "CPU Usage", + "title": "Memory", "titleSize": "h6" }, { @@ -3996,7 +4834,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 2, + "id": 10, "legend": { "avg": false, "current": false, @@ -4031,7 +4869,43 @@ items: "type": "hidden" }, { - "alias": "CPU Usage", + "alias": "Pods", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "link": true, + "linkTooltip": "Drill down to pods", + "linkUrl": "./d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "Workloads", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "link": true, + "linkTooltip": "Drill down to workloads", + "linkUrl": "./d/a87fb0d919ec0ea5f6543124e16c42a5/k8s-resources-workloads-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "Memory Usage", "colorMode": null, "colors": [ @@ -4041,15 +4915,15 @@ items: "link": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "Value #A", + "pattern": "Value #C", "thresholds": [ ], "type": "number", - "unit": "short" + "unit": "bytes" }, { - "alias": "CPU Requests", + "alias": "Memory Requests", "colorMode": null, "colors": [ @@ -4059,15 +4933,15 @@ items: "link": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "Value #B", + "pattern": "Value #D", "thresholds": [ ], "type": "number", - "unit": "short" + "unit": "bytes" }, { - "alias": "CPU Requests %", + "alias": "Memory Requests %", "colorMode": null, "colors": [ @@ -4077,7 +4951,7 @@ items: "link": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "Value #C", + "pattern": "Value #E", "thresholds": [ ], @@ -4085,7 +4959,7 @@ items: "unit": "percentunit" }, { - "alias": "CPU Limits", + "alias": "Memory Limits", "colorMode": null, "colors": [ @@ -4095,15 +4969,15 @@ items: "link": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "Value #D", + "pattern": "Value #F", "thresholds": [ ], "type": "number", - "unit": "short" + "unit": "bytes" }, { - "alias": "CPU Limits %", + "alias": "Memory Limits %", "colorMode": null, "colors": [ @@ -4113,7 +4987,7 @@ items: "link": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "Value #E", + "pattern": "Value #G", "thresholds": [ ], @@ -4121,7 +4995,7 @@ items: "unit": "percentunit" }, { - "alias": "Pod", + "alias": "Namespace", "colorMode": null, "colors": [ @@ -4129,9 +5003,9 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": true, - "linkTooltip": "Drill down", - "linkUrl": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell", - "pattern": "pod", + "linkTooltip": "Drill down to pods", + "linkUrl": "./d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell", + "pattern": "namespace", "thresholds": [ ], @@ -4156,7 +5030,7 @@ items: ], "targets": [ { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", + "expr": "count(mixin_pod_workload{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -4165,7 +5039,7 @@ items: "step": 10 }, { - "expr": "sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", + "expr": "count(avg(mixin_pod_workload{cluster=\"$cluster\"}) by (workload, namespace)) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -4174,7 +5048,7 @@ items: "step": 10 }, { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", + "expr": "sum(container_memory_rss{cluster=\"$cluster\", container!=\"\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -4183,7 +5057,7 @@ items: "step": 10 }, { - "expr": "sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", + "expr": "sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -4192,13 +5066,31 @@ items: "step": 10 }, { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", + "expr": "sum(container_memory_rss{cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "E", "step": 10 + }, + { + "expr": "sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\"}) by (namespace)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 + }, + { + "expr": "sum(container_memory_rss{cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\"}) by (namespace)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "G", + "step": 10 } ], "thresholds": [ @@ -4206,7 +5098,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "CPU Quota", + "title": "Requests by Namespace", "tooltip": { "shared": false, "sort": 0, @@ -4247,7 +5139,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "CPU Quota", + "title": "Memory Requests", "titleSize": "h6" }, { @@ -4262,8 +5154,8 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 10, - "id": 3, + "fill": 1, + "id": 11, "legend": { "avg": false, "current": false, @@ -4274,7 +5166,7 @@ items: "values": false }, "lines": true, - "linewidth": 0, + "linewidth": 1, "links": [ ], @@ -4288,115 +5180,17 @@ items: ], "spaceLength": 10, "span": 12, - "stack": true, + "stack": false, "steppedLine": false, - "targets": [ - { - "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}) by (pod)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "legendLink": null, - "step": 10 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Memory Usage (w/o cache)", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ + "styles": [ { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" }, { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Memory Usage", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 4, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "Memory Usage", + "alias": "Current Receive Bandwidth", "colorMode": null, "colors": [ @@ -4411,10 +5205,10 @@ items: ], "type": "number", - "unit": "bytes" + "unit": "Bps" }, { - "alias": "Memory Requests", + "alias": "Current Transmit Bandwidth", "colorMode": null, "colors": [ @@ -4429,10 +5223,10 @@ items: ], "type": "number", - "unit": "bytes" + "unit": "Bps" }, { - "alias": "Memory Requests %", + "alias": "Rate of Received Packets", "colorMode": null, "colors": [ @@ -4447,10 +5241,10 @@ items: ], "type": "number", - "unit": "percentunit" + "unit": "pps" }, { - "alias": "Memory Limits", + "alias": "Rate of Transmitted Packets", "colorMode": null, "colors": [ @@ -4465,10 +5259,10 @@ items: ], "type": "number", - "unit": "bytes" + "unit": "pps" }, { - "alias": "Memory Limits %", + "alias": "Rate of Received Packets Dropped", "colorMode": null, "colors": [ @@ -4483,10 +5277,10 @@ items: ], "type": "number", - "unit": "percentunit" + "unit": "pps" }, { - "alias": "Memory Usage (RSS)", + "alias": "Rate of Transmitted Packets Dropped", "colorMode": null, "colors": [ @@ -4501,46 +5295,10 @@ items: ], "type": "number", - "unit": "bytes" - }, - { - "alias": "Memory Usage (Cache)", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #G", - "thresholds": [ - - ], - "type": "number", - "unit": "bytes" - }, - { - "alias": "Memory Usage (Swap)", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #H", - "thresholds": [ - - ], - "type": "number", - "unit": "bytes" + "unit": "pps" }, { - "alias": "Pod", + "alias": "Namespace", "colorMode": null, "colors": [ @@ -4548,9 +5306,9 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": true, - "linkTooltip": "Drill down", - "linkUrl": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell", - "pattern": "pod", + "linkTooltip": "Drill down to pods", + "linkUrl": "./d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell", + "pattern": "namespace", "thresholds": [ ], @@ -4575,7 +5333,7 @@ items: ], "targets": [ { - "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)", + "expr": "sum(irate(container_network_receive_bytes_total{namespace=~\".+\"}[$interval])) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -4584,7 +5342,7 @@ items: "step": 10 }, { - "expr": "sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", + "expr": "sum(irate(container_network_transmit_bytes_total{namespace=~\".+\"}[$interval])) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -4593,7 +5351,7 @@ items: "step": 10 }, { - "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod) / sum(kube_pod_container_resource_requests_memory_bytes{namespace=\"$namespace\"}) by (pod)", + "expr": "sum(irate(container_network_receive_packets_total{namespace=~\".+\"}[$interval])) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -4602,7 +5360,7 @@ items: "step": 10 }, { - "expr": "sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", + "expr": "sum(irate(container_network_transmit_packets_total{namespace=~\".+\"}[$interval])) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -4611,7 +5369,7 @@ items: "step": 10 }, { - "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod) / sum(kube_pod_container_resource_limits_memory_bytes{namespace=\"$namespace\"}) by (pod)", + "expr": "sum(irate(container_network_receive_packets_dropped_total{namespace=~\".+\"}[$interval])) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -4620,31 +5378,13 @@ items: "step": 10 }, { - "expr": "sum(container_memory_rss{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)", + "expr": "sum(irate(container_network_transmit_packets_dropped_total{namespace=~\".+\"}[$interval])) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "F", "step": 10 - }, - { - "expr": "sum(container_memory_cache{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "G", - "step": 10 - }, - { - "expr": "sum(container_memory_swap{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "H", - "step": 10 } ], "thresholds": [ @@ -4652,7 +5392,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Memory Quota", + "title": "Current Network Usage", "tooltip": { "shared": false, "sort": 0, @@ -4693,145 +5433,107 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Memory Quota", + "title": "Network", "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "kubernetes-mixin" - ], - "templating": { - "list": [ - { - "current": { - "text": "Prometheus", - "value": "Prometheus" - }, - "hide": 0, - "label": null, - "name": "datasource", - "options": [ + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { - ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 2, - "includeAll": false, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 12, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ - ], - "query": "label_values(kube_pod_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ - ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_receive_bytes_total{namespace=~\".+\"}[$interval])) by (namespace)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{namespace}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ - ], - "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "", - "title": "Kubernetes / Compute Resources / Namespace (Pods)", - "uid": "85a562078cdf77779eaa1add43ccec1e", - "version": 0 - } - kind: ConfigMap - metadata: - name: grafana-dashboard-k8s-resources-namespace - namespace: monitoring -- apiVersion: v1 - data: - k8s-resources-node.json: |- - { - "annotations": { - "list": [ - - ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "links": [ + ], + "timeFrom": null, + "timeShift": null, + "title": "Receive Bandwidth", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ - ], - "refresh": "10s", - "rows": [ + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + }, { "collapse": false, "height": "250px", @@ -4845,7 +5547,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 1, + "id": 13, "legend": { "avg": false, "current": false, @@ -4874,10 +5576,10 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", node=\"$node\"}) by (pod)", + "expr": "sum(irate(container_network_transmit_bytes_total{namespace=~\".+\"}[$interval])) by (namespace)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{pod}}", + "legendFormat": "{{namespace}}", "legendLink": null, "step": 10 } @@ -4887,7 +5589,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "CPU Usage", + "title": "Transmit Bandwidth", "tooltip": { "shared": false, "sort": 0, @@ -4905,7 +5607,7 @@ items: }, "yaxes": [ { - "format": "short", + "format": "Bps", "label": null, "logBase": 1, "max": null, @@ -4927,7 +5629,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "CPU Usage", + "title": "Network", "titleSize": "h6" }, { @@ -4942,8 +5644,8 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "id": 2, + "fill": 10, + "id": 14, "legend": { "avg": false, "current": false, @@ -4954,7 +5656,7 @@ items: "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], @@ -4968,183 +5670,211 @@ items: ], "spaceLength": 10, "span": 12, - "stack": false, + "stack": true, "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, + "targets": [ { - "alias": "CPU Usage", - "colorMode": null, - "colors": [ + "expr": "avg(irate(container_network_receive_bytes_total{namespace=~\".+\"}[$interval])) by (namespace)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{namespace}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ + ], + "timeFrom": null, + "timeShift": null, + "title": "Average Container Bandwidth by Namespace: Received", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ - ], - "type": "number", - "unit": "short" - }, + ] + }, + "yaxes": [ { - "alias": "CPU Requests", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #B", - "thresholds": [ - - ], - "type": "number", - "unit": "short" + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true }, { - "alias": "CPU Requests %", - "colorMode": null, - "colors": [ + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #C", - "thresholds": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 15, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ - ], - "type": "number", - "unit": "percentunit" - }, - { - "alias": "CPU Limits", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #D", - "thresholds": [ + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ - ], - "type": "number", - "unit": "short" - }, + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ { - "alias": "CPU Limits %", - "colorMode": null, - "colors": [ + "expr": "avg(irate(container_network_transmit_bytes_total{namespace=~\".+\"}[$interval])) by (namespace)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{namespace}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #E", - "thresholds": [ + ], + "timeFrom": null, + "timeShift": null, + "title": "Average Container Bandwidth by Namespace: Transmitted", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ - ], - "type": "number", - "unit": "percentunit" - }, + ] + }, + "yaxes": [ { - "alias": "Pod", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "pod", - "thresholds": [ - - ], - "type": "number", - "unit": "short" + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true }, { - "alias": "", - "colorMode": null, - "colors": [ + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 16, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ - ], - "type": "string", - "unit": "short" - } ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, "targets": [ { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", node=\"$node\"}) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "A", - "step": 10 - }, - { - "expr": "sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", node=\"$node\"}) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "B", - "step": 10 - }, - { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", node=\"$node\"}) by (pod) / sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", node=\"$node\"}) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "C", - "step": 10 - }, - { - "expr": "sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", node=\"$node\"}) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "D", - "step": 10 - }, - { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", node=\"$node\"}) by (pod) / sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", node=\"$node\"}) by (pod)", - "format": "table", - "instant": true, + "expr": "sum(irate(container_network_receive_packets_total{namespace=~\".+\"}[$interval])) by (namespace)", + "format": "time_series", "intervalFactor": 2, - "legendFormat": "", - "refId": "E", + "legendFormat": "{{namespace}}", + "legendLink": null, "step": 10 } ], @@ -5153,14 +5883,13 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "CPU Quota", + "title": "Rate of Received Packets", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "transform": "table", - "type": "table", + "type": "graph", "xaxis": { "buckets": null, "mode": "time", @@ -5172,7 +5901,7 @@ items: }, "yaxes": [ { - "format": "short", + "format": "Bps", "label": null, "logBase": 1, "max": null, @@ -5194,7 +5923,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "CPU Quota", + "title": "Network", "titleSize": "h6" }, { @@ -5210,7 +5939,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 3, + "id": 17, "legend": { "avg": false, "current": false, @@ -5239,10 +5968,10 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", node=\"$node\", container!=\"\"}) by (pod)", + "expr": "sum(irate(container_network_receive_packets_total{namespace=~\".+\"}[$interval])) by (namespace)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{pod}}", + "legendFormat": "{{namespace}}", "legendLink": null, "step": 10 } @@ -5252,7 +5981,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Memory Usage (w/o cache)", + "title": "Rate of Transmitted Packets", "tooltip": { "shared": false, "sort": 0, @@ -5270,7 +5999,7 @@ items: }, "yaxes": [ { - "format": "bytes", + "format": "Bps", "label": null, "logBase": 1, "max": null, @@ -5292,7 +6021,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Memory Usage", + "title": "Network", "titleSize": "h6" }, { @@ -5307,8 +6036,8 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "id": 4, + "fill": 10, + "id": 18, "legend": { "avg": false, "current": false, @@ -5319,7 +6048,7 @@ items: "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], @@ -5333,280 +6062,30 @@ items: ], "spaceLength": 10, "span": 12, - "stack": false, + "stack": true, "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, + "targets": [ { - "alias": "Memory Usage", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ - - ], - "type": "number", - "unit": "bytes" - }, - { - "alias": "Memory Requests", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #B", - "thresholds": [ - - ], - "type": "number", - "unit": "bytes" - }, - { - "alias": "Memory Requests %", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #C", - "thresholds": [ - - ], - "type": "number", - "unit": "percentunit" - }, - { - "alias": "Memory Limits", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #D", - "thresholds": [ - - ], - "type": "number", - "unit": "bytes" - }, - { - "alias": "Memory Limits %", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #E", - "thresholds": [ - - ], - "type": "number", - "unit": "percentunit" - }, - { - "alias": "Memory Usage (RSS)", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #F", - "thresholds": [ - - ], - "type": "number", - "unit": "bytes" - }, - { - "alias": "Memory Usage (Cache)", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #G", - "thresholds": [ - - ], - "type": "number", - "unit": "bytes" - }, - { - "alias": "Memory Usage (Swap)", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #H", - "thresholds": [ - - ], - "type": "number", - "unit": "bytes" - }, - { - "alias": "Pod", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "pod", - "thresholds": [ - - ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ - - ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", node=\"$node\",container!=\"\"}) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "A", - "step": 10 - }, - { - "expr": "sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", node=\"$node\"}) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "B", - "step": 10 - }, - { - "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", node=\"$node\",container!=\"\"}) by (pod) / sum(kube_pod_container_resource_requests_memory_bytes{node=\"$node\"}) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "C", - "step": 10 - }, - { - "expr": "sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", node=\"$node\"}) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "D", - "step": 10 - }, - { - "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", node=\"$node\",container!=\"\"}) by (pod) / sum(kube_pod_container_resource_limits_memory_bytes{node=\"$node\"}) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "E", - "step": 10 - }, - { - "expr": "sum(container_memory_rss{cluster=\"$cluster\", node=\"$node\",container!=\"\"}) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "F", - "step": 10 - }, - { - "expr": "sum(container_memory_cache{cluster=\"$cluster\", node=\"$node\",container!=\"\"}) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "G", - "step": 10 - }, - { - "expr": "sum(container_memory_swap{cluster=\"$cluster\", node=\"$node\",container!=\"\"}) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "H", - "step": 10 - } - ], - "thresholds": [ + "expr": "sum(irate(container_network_receive_packets_dropped_total{namespace=~\".+\"}[$interval])) by (namespace)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{namespace}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], "timeFrom": null, "timeShift": null, - "title": "Memory Quota", + "title": "Rate of Received Packets Dropped", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "transform": "table", - "type": "table", + "type": "graph", "xaxis": { "buckets": null, "mode": "time", @@ -5618,7 +6097,7 @@ items: }, "yaxes": [ { - "format": "short", + "format": "Bps", "label": null, "logBase": 1, "max": null, @@ -5640,20 +6119,118 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Memory Quota", + "title": "Network", "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "kubernetes-mixin" - ], - "templating": { - "list": [ - { - "current": { - "text": "Prometheus", + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 19, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_transmit_packets_dropped_total{namespace=~\".+\"}[$interval])) by (namespace)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{namespace}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Transmitted Packets Dropped", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", "value": "Prometheus" }, "hide": 0, @@ -5682,7 +6259,7 @@ items: "options": [ ], - "query": "label_values(kube_pod_info, cluster)", + "query": "label_values(node_cpu_seconds_total, cluster)", "refresh": 1, "regex": "", "sort": 2, @@ -5696,29 +6273,37 @@ items: }, { "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { - "text": "prod", - "value": "prod" + "text": "5m", + "value": "5m" }, - "datasource": "$datasource", - "hide": 0, + "datasource": "prometheus", + "hide": 2, "includeAll": false, - "label": "node", + "label": null, "multi": false, - "name": "node", + "name": "interval", "options": [ - + { + "selected": true, + "text": "4h", + "value": "4h" + } ], - "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, node)", - "refresh": 1, + "query": "4h", + "refresh": 2, "regex": "", - "sort": 2, + "skipUrlSync": false, + "sort": 1, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", - "type": "query", + "type": "interval", "useTags": false } ] @@ -5753,17 +6338,17 @@ items: ] }, "timezone": "", - "title": "Kubernetes / Compute Resources / Node (Pods)", - "uid": "200ac8fdbfbb74b39aff88118e4d1c2c", + "title": "Kubernetes / Compute Resources / Cluster", + "uid": "efa86fd1d0c121a26444b636a3f509a8", "version": 0 } kind: ConfigMap metadata: - name: grafana-dashboard-k8s-resources-node + name: grafana-dashboard-k8s-resources-cluster namespace: monitoring - apiVersion: v1 data: - k8s-resources-pod.json: |- + k8s-resources-namespace.json: |- { "annotations": { "list": [ @@ -5821,10 +6406,10 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\", cluster=\"$cluster\"}) by (container)", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{container}}", + "legendFormat": "{{pod}}", "legendLink": null, "step": 10 } @@ -6015,17 +6600,17 @@ items: "unit": "percentunit" }, { - "alias": "Container", + "alias": "Pod", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, - "link": false, + "link": true, "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "container", + "linkUrl": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell", + "pattern": "pod", "thresholds": [ ], @@ -6050,7 +6635,7 @@ items: ], "targets": [ { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\"}) by (container)", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -6059,7 +6644,7 @@ items: "step": 10 }, { - "expr": "sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "expr": "sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -6068,7 +6653,7 @@ items: "step": 10 }, { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -6077,7 +6662,7 @@ items: "step": 10 }, { - "expr": "sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "expr": "sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -6086,7 +6671,7 @@ items: "step": 10 }, { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -6186,26 +6771,10 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(container_memory_rss{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\", container!=\"\"}) by (container)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{container}} (RSS)", - "legendLink": null, - "step": 10 - }, - { - "expr": "sum(container_memory_cache{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\", container!=\"\"}) by (container)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{container}} (Cache)", - "legendLink": null, - "step": 10 - }, - { - "expr": "sum(container_memory_swap{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\", container!=\"\"}) by (container)", + "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}) by (pod)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{container}} (Swap)", + "legendFormat": "{{pod}}", "legendLink": null, "step": 10 } @@ -6215,7 +6784,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Memory Usage", + "title": "Memory Usage (w/o cache)", "tooltip": { "shared": false, "sort": 0, @@ -6450,17 +7019,17 @@ items: "unit": "bytes" }, { - "alias": "Container", + "alias": "Pod", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, - "link": false, + "link": true, "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "container", + "linkUrl": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell", + "pattern": "pod", "thresholds": [ ], @@ -6485,7 +7054,7 @@ items: ], "targets": [ { - "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\", container!=\"\"}) by (container)", + "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -6494,7 +7063,7 @@ items: "step": 10 }, { - "expr": "sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "expr": "sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -6503,7 +7072,7 @@ items: "step": 10 }, { - "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_container_resource_requests_memory_bytes{namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod) / sum(kube_pod_container_resource_requests_memory_bytes{namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -6512,7 +7081,7 @@ items: "step": 10 }, { - "expr": "sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\"}) by (container)", + "expr": "sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -6521,7 +7090,7 @@ items: "step": 10 }, { - "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\"}) by (container) / sum(kube_pod_container_resource_limits_memory_bytes{namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod) / sum(kube_pod_container_resource_limits_memory_bytes{namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -6530,7 +7099,7 @@ items: "step": 10 }, { - "expr": "sum(container_memory_rss{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)", + "expr": "sum(container_memory_rss{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -6539,7 +7108,7 @@ items: "step": 10 }, { - "expr": "sum(container_memory_cache{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)", + "expr": "sum(container_memory_cache{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -6548,7 +7117,7 @@ items: "step": 10 }, { - "expr": "sum(container_memory_swap{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)", + "expr": "sum(container_memory_swap{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -6605,274 +7174,13 @@ items: "showTitle": true, "title": "Memory Quota", "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "kubernetes-mixin" - ], - "templating": { - "list": [ - { - "current": { - "text": "Prometheus", - "value": "Prometheus" - }, - "hide": 0, - "label": null, - "name": "datasource", - "options": [ - - ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 2, - "includeAll": false, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ - - ], - "query": "label_values(kube_pod_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ - - ], - "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "pod", - "multi": false, - "name": "pod", - "options": [ - - ], - "query": "label_values(kube_pod_info{cluster=\"$cluster\", namespace=\"$namespace\"}, pod)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "", - "title": "Kubernetes / Compute Resources / Pod", - "uid": "6581e46e4e5c7ba40a07646395ef7b23", - "version": 0 - } - kind: ConfigMap - metadata: - name: grafana-dashboard-k8s-resources-pod - namespace: monitoring -- apiVersion: v1 - data: - k8s-resources-workload.json: |- - { - "annotations": { - "list": [ - - ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "links": [ - - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 10, - "id": 1, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 0, - "links": [ - - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 12, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "legendLink": null, - "step": 10 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "CPU Usage", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "CPU Usage", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, "bars": false, @@ -6880,7 +7188,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 2, + "id": 5, "legend": { "avg": false, "current": false, @@ -6915,7 +7223,7 @@ items: "type": "hidden" }, { - "alias": "CPU Usage", + "alias": "Current Receive Bandwidth", "colorMode": null, "colors": [ @@ -6930,10 +7238,10 @@ items: ], "type": "number", - "unit": "short" + "unit": "Bps" }, { - "alias": "CPU Requests", + "alias": "Current Transmit Bandwidth", "colorMode": null, "colors": [ @@ -6948,10 +7256,10 @@ items: ], "type": "number", - "unit": "short" + "unit": "Bps" }, { - "alias": "CPU Requests %", + "alias": "Rate of Received Packets", "colorMode": null, "colors": [ @@ -6966,10 +7274,10 @@ items: ], "type": "number", - "unit": "percentunit" + "unit": "pps" }, { - "alias": "CPU Limits", + "alias": "Rate of Transmitted Packets", "colorMode": null, "colors": [ @@ -6984,10 +7292,10 @@ items: ], "type": "number", - "unit": "short" + "unit": "pps" }, { - "alias": "CPU Limits %", + "alias": "Rate of Received Packets Dropped", "colorMode": null, "colors": [ @@ -7002,7 +7310,25 @@ items: ], "type": "number", - "unit": "percentunit" + "unit": "pps" + }, + { + "alias": "Rate of Transmitted Packets Dropped", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" }, { "alias": "Pod", @@ -7013,7 +7339,7 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": true, - "linkTooltip": "Drill down", + "linkTooltip": "Drill down to pods", "linkUrl": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell", "pattern": "pod", "thresholds": [ @@ -7040,7 +7366,7 @@ items: ], "targets": [ { - "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "expr": "sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[$interval])) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -7049,7 +7375,7 @@ items: "step": 10 }, { - "expr": "sum(\n kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "expr": "sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\"}[$interval])) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -7058,7 +7384,7 @@ items: "step": 10 }, { - "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "expr": "sum(irate(container_network_receive_packets_total{namespace=~\"$namespace\"}[$interval])) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -7067,7 +7393,7 @@ items: "step": 10 }, { - "expr": "sum(\n kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "expr": "sum(irate(container_network_transmit_packets_total{namespace=~\"$namespace\"}[$interval])) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -7076,21 +7402,30 @@ items: "step": 10 }, { - "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "expr": "sum(irate(container_network_receive_packets_dropped_total{namespace=~\"$namespace\"}[$interval])) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "E", "step": 10 - } - ], + }, + { + "expr": "sum(irate(container_network_transmit_packets_dropped_total{namespace=~\"$namespace\"}[$interval])) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 + } + ], "thresholds": [ ], "timeFrom": null, "timeShift": null, - "title": "CPU Quota", + "title": "Current Network Usage", "tooltip": { "shared": false, "sort": 0, @@ -7131,7 +7466,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "CPU Quota", + "title": "Network", "titleSize": "h6" }, { @@ -7147,7 +7482,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 3, + "id": 6, "legend": { "avg": false, "current": false, @@ -7176,7 +7511,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "expr": "sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[$interval])) by (pod)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", @@ -7189,7 +7524,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Memory Usage", + "title": "Receive Bandwidth", "tooltip": { "shared": false, "sort": 0, @@ -7207,7 +7542,7 @@ items: }, "yaxes": [ { - "format": "bytes", + "format": "Bps", "label": null, "logBase": 1, "max": null, @@ -7229,7 +7564,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Memory Usage", + "title": "Network", "titleSize": "h6" }, { @@ -7244,8 +7579,8 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "id": 4, + "fill": 10, + "id": 7, "legend": { "avg": false, "current": false, @@ -7256,7 +7591,7 @@ items: "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], @@ -7270,183 +7605,15 @@ items: ], "spaceLength": 10, "span": 12, - "stack": false, + "stack": true, "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "Memory Usage", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ - - ], - "type": "number", - "unit": "bytes" - }, - { - "alias": "Memory Requests", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #B", - "thresholds": [ - - ], - "type": "number", - "unit": "bytes" - }, - { - "alias": "Memory Requests %", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #C", - "thresholds": [ - - ], - "type": "number", - "unit": "percentunit" - }, - { - "alias": "Memory Limits", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #D", - "thresholds": [ - - ], - "type": "number", - "unit": "bytes" - }, - { - "alias": "Memory Limits %", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #E", - "thresholds": [ - - ], - "type": "number", - "unit": "percentunit" - }, - { - "alias": "Pod", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": true, - "linkTooltip": "Drill down", - "linkUrl": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell", - "pattern": "pod", - "thresholds": [ - - ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ - - ], - "type": "string", - "unit": "short" - } - ], "targets": [ { - "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "A", - "step": 10 - }, - { - "expr": "sum(\n kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "B", - "step": 10 - }, - { - "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "C", - "step": 10 - }, - { - "expr": "sum(\n kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "D", - "step": 10 - }, - { - "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", - "format": "table", - "instant": true, + "expr": "sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\"}[$interval])) by (pod)", + "format": "time_series", "intervalFactor": 2, - "legendFormat": "", - "refId": "E", + "legendFormat": "{{pod}}", + "legendLink": null, "step": 10 } ], @@ -7455,14 +7622,13 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Memory Quota", + "title": "Transmit Bandwidth", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "transform": "table", - "type": "table", + "type": "graph", "xaxis": { "buckets": null, "mode": "time", @@ -7474,7 +7640,7 @@ items: }, "yaxes": [ { - "format": "short", + "format": "Bps", "label": null, "logBase": 1, "max": null, @@ -7496,199 +7662,107 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Memory Quota", + "title": "Network", "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "kubernetes-mixin" - ], - "templating": { - "list": [ - { - "current": { - "text": "Prometheus", - "value": "Prometheus" - }, - "hide": 0, - "label": null, - "name": "datasource", - "options": [ + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { - ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 2, - "includeAll": false, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ - - ], - "query": "label_values(kube_pod_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ - - ], - "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "workload", - "multi": false, - "name": "workload", - "options": [ - - ], - "query": "label_values(mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}, workload)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "type", - "multi": false, - "name": "type", - "options": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ - ], - "query": "label_values(mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\"}, workload_type)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ - ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "", - "title": "Kubernetes / Compute Resources / Workload", - "uid": "a164a7f0339f99e89cea5cb47e9be617", - "version": 0 - } - kind: ConfigMap - metadata: - name: grafana-dashboard-k8s-resources-workload - namespace: monitoring -- apiVersion: v1 - data: - k8s-resources-workloads-namespace.json: |- - { - "annotations": { - "list": [ + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_receive_packets_total{namespace=~\"$namespace\"}[$interval])) by (pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ - ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "links": [ + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Received Packets", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ - ], - "refresh": "10s", - "rows": [ + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + }, { "collapse": false, "height": "250px", @@ -7702,7 +7776,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 1, + "id": 9, "legend": { "avg": false, "current": false, @@ -7731,10 +7805,10 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", + "expr": "sum(irate(container_network_receive_packets_total{namespace=~\"$namespace\"}[$interval])) by (pod)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{workload}} - {{workload_type}}", + "legendFormat": "{{pod}}", "legendLink": null, "step": 10 } @@ -7744,7 +7818,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "CPU Usage", + "title": "Rate of Transmitted Packets", "tooltip": { "shared": false, "sort": 0, @@ -7762,7 +7836,7 @@ items: }, "yaxes": [ { - "format": "short", + "format": "Bps", "label": null, "logBase": 1, "max": null, @@ -7784,7 +7858,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "CPU Usage", + "title": "Network", "titleSize": "h6" }, { @@ -7799,8 +7873,8 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "id": 2, + "fill": 10, + "id": 10, "legend": { "avg": false, "current": false, @@ -7811,7 +7885,7 @@ items: "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], @@ -7825,228 +7899,15 @@ items: ], "spaceLength": 10, "span": 12, - "stack": false, + "stack": true, "steppedLine": false, - "styles": [ + "targets": [ { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "Running Pods", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ - - ], - "type": "number", - "unit": "short" - }, - { - "alias": "CPU Usage", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #B", - "thresholds": [ - - ], - "type": "number", - "unit": "short" - }, - { - "alias": "CPU Requests", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #C", - "thresholds": [ - - ], - "type": "number", - "unit": "short" - }, - { - "alias": "CPU Requests %", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #D", - "thresholds": [ - - ], - "type": "number", - "unit": "percentunit" - }, - { - "alias": "CPU Limits", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #E", - "thresholds": [ - - ], - "type": "number", - "unit": "short" - }, - { - "alias": "CPU Limits %", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #F", - "thresholds": [ - - ], - "type": "number", - "unit": "percentunit" - }, - { - "alias": "Workload", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": true, - "linkTooltip": "Drill down", - "linkUrl": "./d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=$__cell&var-type=$__cell_2", - "pattern": "workload", - "thresholds": [ - - ], - "type": "number", - "unit": "short" - }, - { - "alias": "Workload Type", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "workload_type", - "thresholds": [ - - ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ - - ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "count(mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}) by (workload, workload_type)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "A", - "step": 10 - }, - { - "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "B", - "step": 10 - }, - { - "expr": "sum(\n kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "C", - "step": 10 - }, - { - "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "D", - "step": 10 - }, - { - "expr": "sum(\n kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "E", - "step": 10 - }, - { - "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", - "format": "table", - "instant": true, + "expr": "sum(irate(container_network_receive_packets_dropped_total{namespace=~\"$namespace\"}[$interval])) by (pod)", + "format": "time_series", "intervalFactor": 2, - "legendFormat": "", - "refId": "F", + "legendFormat": "{{pod}}", + "legendLink": null, "step": 10 } ], @@ -8055,14 +7916,13 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "CPU Quota", + "title": "Rate of Received Packets Dropped", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "transform": "table", - "type": "table", + "type": "graph", "xaxis": { "buckets": null, "mode": "time", @@ -8074,7 +7934,7 @@ items: }, "yaxes": [ { - "format": "short", + "format": "Bps", "label": null, "logBase": 1, "max": null, @@ -8096,7 +7956,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "CPU Quota", + "title": "Network", "titleSize": "h6" }, { @@ -8112,7 +7972,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 3, + "id": 11, "legend": { "avg": false, "current": false, @@ -8141,10 +8001,10 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", + "expr": "sum(irate(container_network_transmit_packets_dropped_total{namespace=~\"$namespace\"}[$interval])) by (pod)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{workload}} - {{workload_type}}", + "legendFormat": "{{pod}}", "legendLink": null, "step": 10 } @@ -8154,7 +8014,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Memory Usage", + "title": "Rate of Transmitted Packets Dropped", "tooltip": { "shared": false, "sort": 0, @@ -8172,7 +8032,7 @@ items: }, "yaxes": [ { - "format": "bytes", + "format": "Bps", "label": null, "logBase": 1, "max": null, @@ -8194,34 +8054,205 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Memory Usage", + "title": "Network", "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 4, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(kube_pod_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [ + + ], + "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "5m", + "value": "5m" + }, + "datasource": "prometheus", + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "interval", + "options": [ + { + "selected": true, + "text": "4h", + "value": "4h" + } + ], + "query": "4h", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "interval", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Kubernetes / Compute Resources / Namespace (Pods)", + "uid": "85a562078cdf77779eaa1add43ccec1e", + "version": 0 + } + kind: ConfigMap + metadata: + name: grafana-dashboard-k8s-resources-namespace + namespace: monitoring +- apiVersion: v1 + data: + k8s-resources-node.json: |- + { + "annotations": { + "list": [ + + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, "show": true, "total": false, "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], @@ -8235,71 +8266,133 @@ items: ], "spaceLength": 10, "span": 12, - "stack": false, + "stack": true, "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, + "targets": [ { - "alias": "Running Pods", - "colorMode": null, - "colors": [ + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", node=\"$node\"}) by (pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Usage", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ - ], - "type": "number", - "unit": "short" - }, + ] + }, + "yaxes": [ { - "alias": "Memory Usage", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #B", - "thresholds": [ - - ], - "type": "number", - "unit": "bytes" + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true }, { - "alias": "Memory Requests", - "colorMode": null, - "colors": [ - + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU Usage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "CPU Usage", + "colorMode": null, + "colors": [ + ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "Value #C", + "pattern": "Value #A", "thresholds": [ ], "type": "number", - "unit": "bytes" + "unit": "short" }, { - "alias": "Memory Requests %", + "alias": "CPU Requests", "colorMode": null, "colors": [ @@ -8309,15 +8402,15 @@ items: "link": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "Value #D", + "pattern": "Value #B", "thresholds": [ ], "type": "number", - "unit": "percentunit" + "unit": "short" }, { - "alias": "Memory Limits", + "alias": "CPU Requests %", "colorMode": null, "colors": [ @@ -8327,15 +8420,15 @@ items: "link": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "Value #E", + "pattern": "Value #C", "thresholds": [ ], "type": "number", - "unit": "bytes" + "unit": "percentunit" }, { - "alias": "Memory Limits %", + "alias": "CPU Limits", "colorMode": null, "colors": [ @@ -8345,33 +8438,33 @@ items: "link": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "Value #F", + "pattern": "Value #D", "thresholds": [ ], "type": "number", - "unit": "percentunit" + "unit": "short" }, { - "alias": "Workload", + "alias": "CPU Limits %", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, - "link": true, + "link": false, "linkTooltip": "Drill down", - "linkUrl": "./d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=$__cell&var-type=$__cell_2", - "pattern": "workload", + "linkUrl": "", + "pattern": "Value #E", "thresholds": [ ], "type": "number", - "unit": "short" + "unit": "percentunit" }, { - "alias": "Workload Type", + "alias": "Pod", "colorMode": null, "colors": [ @@ -8381,7 +8474,7 @@ items: "link": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "workload_type", + "pattern": "pod", "thresholds": [ ], @@ -8406,7 +8499,7 @@ items: ], "targets": [ { - "expr": "count(mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}) by (workload, workload_type)", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", node=\"$node\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -8415,7 +8508,7 @@ items: "step": 10 }, { - "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", + "expr": "sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", node=\"$node\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -8424,7 +8517,7 @@ items: "step": 10 }, { - "expr": "sum(\n kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", node=\"$node\"}) by (pod) / sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", node=\"$node\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -8433,7 +8526,7 @@ items: "step": 10 }, { - "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", + "expr": "sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", node=\"$node\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -8442,22 +8535,13 @@ items: "step": 10 }, { - "expr": "sum(\n kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", node=\"$node\"}) by (pod) / sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", node=\"$node\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "E", "step": 10 - }, - { - "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "F", - "step": 10 } ], "thresholds": [ @@ -8465,7 +8549,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Memory Quota", + "title": "CPU Quota", "tooltip": { "shared": false, "sort": 0, @@ -8506,673 +8590,8603 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Memory Quota", + "title": "CPU Quota", "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "kubernetes-mixin" - ], - "templating": { - "list": [ - { - "current": { - "text": "Prometheus", - "value": "Prometheus" - }, - "hide": 0, - "label": null, - "name": "datasource", - "options": [ - - ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 2, - "includeAll": false, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { - ], - "query": "label_values(kube_pod_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ - ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ - ], - "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", node=\"$node\", container!=\"\"}) by (pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ - ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "", - "title": "Kubernetes / Compute Resources / Namespace (Workloads)", - "uid": "a87fb0d919ec0ea5f6543124e16c42a5", - "version": 0 - } - kind: ConfigMap - metadata: - name: grafana-dashboard-k8s-resources-workloads-namespace - namespace: monitoring -- apiVersion: v1 - data: - kubelet.json: |- - { - "__inputs": [ - - ], - "__requires": [ - - ], - "annotations": { - "list": [ - - ] - }, - "editable": false, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "id": null, - "links": [ - - ], - "refresh": "", - "rows": [ - { - "collapse": false, - "collapsed": false, - "panels": [ - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" ], - "datasource": "$datasource", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true + "timeFrom": null, + "timeShift": null, + "title": "Memory Usage (w/o cache)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" }, - "gridPos": { + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + ] }, - "id": 2, - "interval": null, - "links": [ - - ], - "mappingType": 1, - "mappingTypes": [ + "yaxes": [ { - "name": "value to text", - "value": 1 + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true }, { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 2, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "sum(up{job=\"kubelet\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": "", - "title": "Up", - "tooltip": { - "shared": false - }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false } - ], - "valueName": "min" - }, + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memory Usage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { + "aliasColors": { }, - "id": 3, - "interval": null, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, "links": [ ], - "mappingType": 1, - "mappingTypes": [ + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ { - "name": "value to text", - "value": 1 + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" }, { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", + "alias": "Memory Usage", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Requests", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Requests %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Memory Limits", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Limits %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Memory Usage (RSS)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Usage (Cache)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #G", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Usage (Swap)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #H", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Pod", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "pod", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=\"$node\",container!=\"\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", node=\"$node\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=\"$node\",container!=\"\"}) by (pod) / sum(kube_pod_container_resource_requests_memory_bytes{node=\"$node\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", node=\"$node\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=\"$node\",container!=\"\"}) by (pod) / sum(kube_pod_container_resource_limits_memory_bytes{node=\"$node\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + }, + { + "expr": "sum(node_namespace_pod_container:container_memory_rss{cluster=\"$cluster\", node=\"$node\",container!=\"\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 + }, + { + "expr": "sum(node_namespace_pod_container:container_memory_cache{cluster=\"$cluster\", node=\"$node\",container!=\"\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "G", + "step": 10 + }, + { + "expr": "sum(node_namespace_pod_container:container_memory_swap{cluster=\"$cluster\", node=\"$node\",container!=\"\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "H", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Quota", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memory Quota", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(kube_pod_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "node", + "multi": false, + "name": "node", + "options": [ + + ], + "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, node)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Kubernetes / Compute Resources / Node (Pods)", + "uid": "200ac8fdbfbb74b39aff88118e4d1c2c", + "version": 0 + } + kind: ConfigMap + metadata: + name: grafana-dashboard-k8s-resources-node + namespace: monitoring +- apiVersion: v1 + data: + k8s-resources-pod.json: |- + { + "annotations": { + "list": [ + + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\", cluster=\"$cluster\"}) by (container)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{container}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Usage", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU Usage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "CPU Usage", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Requests", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Requests %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "CPU Limits", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Limits %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Container", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "container", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Quota", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU Quota", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(container_memory_rss{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\", container!=\"\"}) by (container)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{container}} (RSS)", + "legendLink": null, + "step": 10 + }, + { + "expr": "sum(container_memory_cache{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\", container!=\"\"}) by (container)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{container}} (Cache)", + "legendLink": null, + "step": 10 + }, + { + "expr": "sum(container_memory_swap{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\", container!=\"\"}) by (container)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{container}} (Swap)", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Usage", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memory Usage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "Memory Usage", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Requests", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Requests %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Memory Limits", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Limits %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Memory Usage (RSS)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Usage (Cache)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #G", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Usage (Swap)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #H", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Container", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "container", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\", container!=\"\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_container_resource_requests_memory_bytes{namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\"}) by (container) / sum(kube_pod_container_resource_limits_memory_bytes{namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + }, + { + "expr": "sum(container_memory_rss{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 + }, + { + "expr": "sum(container_memory_cache{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "G", + "step": 10 + }, + { + "expr": "sum(container_memory_swap{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "H", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Quota", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memory Quota", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$interval])) by (pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Receive Bandwidth", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$interval])) by (pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Transmit Bandwidth", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_receive_packets_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$interval])) by (pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Received Packets", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_transmit_packets_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$interval])) by (pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Transmitted Packets", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_receive_packets_dropped_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$interval])) by (pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Received Packets Dropped", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_transmit_packets_dropped_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$interval])) by (pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Transmitted Packets Dropped", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(kube_pod_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [ + + ], + "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "pod", + "multi": false, + "name": "pod", + "options": [ + + ], + "query": "label_values(kube_pod_info{cluster=\"$cluster\", namespace=\"$namespace\"}, pod)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "5m", + "value": "5m" + }, + "datasource": "prometheus", + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "interval", + "options": [ + { + "selected": true, + "text": "4h", + "value": "4h" + } + ], + "query": "4h", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "interval", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Kubernetes / Compute Resources / Pod", + "uid": "6581e46e4e5c7ba40a07646395ef7b23", + "version": 0 + } + kind: ConfigMap + metadata: + name: grafana-dashboard-k8s-resources-pod + namespace: monitoring +- apiVersion: v1 + data: + k8s-resources-workload.json: |- + { + "annotations": { + "list": [ + + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Usage", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU Usage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "CPU Usage", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Requests", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Requests %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "CPU Limits", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Limits %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Pod", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTooltip": "Drill down", + "linkUrl": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell", + "pattern": "pod", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sum(\n kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum(\n kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Quota", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU Quota", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Usage", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memory Usage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "Memory Usage", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Requests", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Requests %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Memory Limits", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Limits %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Pod", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTooltip": "Drill down", + "linkUrl": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell", + "pattern": "pod", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sum(\n kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum(\n kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Quota", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memory Quota", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "Current Receive Bandwidth", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Current Transmit Bandwidth", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Rate of Received Packets", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Rate of Transmitted Packets", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Rate of Received Packets Dropped", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Rate of Transmitted Packets Dropped", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Pod", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTooltip": "Drill down", + "linkUrl": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell", + "pattern": "pod", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "(sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (pod))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "(sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (pod))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "(sum(irate(container_network_receive_packets_total{namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (pod))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "(sum(irate(container_network_transmit_packets_total{namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (pod))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "(sum(irate(container_network_receive_packets_dropped_total{namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (pod))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + }, + { + "expr": "(sum(irate(container_network_transmit_packets_dropped_total{namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (pod))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Current Network Usage", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (pod))\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Receive Bandwidth", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (pod))\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Transmit Bandwidth", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(avg(irate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (pod))\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Average Container Bandwidth by Pod: Received", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(avg(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (pod))\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Average Container Bandwidth by Pod: Transmitted", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(sum(irate(container_network_receive_packets_total{namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (pod))\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Received Packets", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 11, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(sum(irate(container_network_transmit_packets_total{namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (pod))\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Transmitted Packets", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 12, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(sum(irate(container_network_receive_packets_dropped_total{namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (pod))\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Received Packets Dropped", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 13, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(sum(irate(container_network_transmit_packets_dropped_total{namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod) \ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (pod))\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Transmitted Packets Dropped", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(kube_pod_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [ + + ], + "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "workload", + "multi": false, + "name": "workload", + "options": [ + + ], + "query": "label_values(mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}, workload)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "type", + "multi": false, + "name": "type", + "options": [ + + ], + "query": "label_values(mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\"}, workload_type)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "5m", + "value": "5m" + }, + "datasource": "prometheus", + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "interval", + "options": [ + { + "selected": true, + "text": "4h", + "value": "4h" + } + ], + "query": "4h", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "interval", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Kubernetes / Compute Resources / Workload", + "uid": "a164a7f0339f99e89cea5cb47e9be617", + "version": 0 + } + kind: ConfigMap + metadata: + name: grafana-dashboard-k8s-resources-workload + namespace: monitoring +- apiVersion: v1 + data: + k8s-resources-workloads-namespace.json: |- + { + "annotations": { + "list": [ + + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{workload}} - {{workload_type}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Usage", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU Usage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "Running Pods", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Usage", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Requests", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Requests %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "CPU Limits", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Limits %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Workload", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTooltip": "Drill down", + "linkUrl": "./d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=$__cell&var-type=$__cell_2", + "pattern": "workload", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "Workload Type", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "workload_type", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "count(mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}) by (workload, workload_type)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(\n kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum(\n kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + }, + { + "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Quota", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU Quota", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{workload}} - {{workload_type}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Usage", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memory Usage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "Running Pods", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "Memory Usage", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Requests", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Requests %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Memory Limits", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Limits %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Workload", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTooltip": "Drill down", + "linkUrl": "./d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=$__cell&var-type=$__cell_2", + "pattern": "workload", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "Workload Type", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "workload_type", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "count(mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}) by (workload, workload_type)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(\n kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum(\n kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + }, + { + "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Quota", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memory Quota", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "Current Receive Bandwidth", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Current Transmit Bandwidth", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Rate of Received Packets", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Rate of Transmitted Packets", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Rate of Received Packets Dropped", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Rate of Transmitted Packets Dropped", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Workload", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTooltip": "Drill down to pods", + "linkUrl": "./d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=$__cell&var-type=$type", + "pattern": "workload", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "Workload Type", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "workload_type", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "(sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "(sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "(sum(irate(container_network_receive_packets_total{namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "(sum(irate(container_network_transmit_packets_total{namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "(sum(irate(container_network_receive_packets_dropped_total{namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + }, + { + "expr": "(sum(irate(container_network_transmit_packets_dropped_total{namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Current Network Usage", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{workload}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Receive Bandwidth", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{workload}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Transmit Bandwidth", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(avg(irate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{workload}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Average Container Bandwidth by Workload: Received", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(avg(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{workload}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Average Container Bandwidth by Workload: Transmitted", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(sum(irate(container_network_receive_packets_total{namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{workload}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Received Packets", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 11, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(sum(irate(container_network_transmit_packets_total{namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{workload}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Transmitted Packets", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 12, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(sum(irate(container_network_receive_packets_dropped_total{namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{workload}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Received Packets Dropped", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 13, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(sum(irate(container_network_transmit_packets_dropped_total{namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod) \ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{workload}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Transmitted Packets Dropped", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(kube_pod_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [ + + ], + "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "5m", + "value": "5m" + }, + "datasource": "prometheus", + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "interval", + "options": [ + { + "selected": true, + "text": "4h", + "value": "4h" + } + ], + "query": "4h", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "interval", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "deployment", + "value": "deployment" + }, + "datasource": "prometheus", + "definition": "label_values(mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\"}, workload_type)", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "type", + "options": [ + + ], + "query": "label_values(mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\"}, workload_type)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Kubernetes / Compute Resources / Namespace (Workloads)", + "uid": "a87fb0d919ec0ea5f6543124e16c42a5", + "version": 0 + } + kind: ConfigMap + metadata: + name: grafana-dashboard-k8s-resources-workloads-namespace + namespace: monitoring +- apiVersion: v1 + data: + kubelet.json: |- + { + "__inputs": [ + + ], + "__requires": [ + + ], + "annotations": { + "list": [ + + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ + + ], + "refresh": "", + "rows": [ + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + + }, + "id": 2, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(up{cluster=\"$cluster\", job=\"kubelet\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Up", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "min" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + + }, + "id": 3, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(kubelet_running_pod_count{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": "", + "title": "Running Pods", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "min" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + + }, + "id": 4, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(kubelet_running_container_count{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": "", + "title": "Running Container", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "min" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + + }, + "id": 5, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(volume_manager_total_volumes{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\", state=\"actual_state_of_world\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": "", + "title": "Actual Volume Count", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "min" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + + }, + "id": 6, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(volume_manager_total_volumes{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\",state=\"desired_state_of_world\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": "", + "title": "Desired Volume Count", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "min" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + + }, + "id": 7, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", "to": "null" } ], - "span": 2, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(rate(kubelet_node_config_error{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": "", + "title": "Config Error Count", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "min" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 8, + "legend": { + "alignAsTable": "true", + "avg": false, + "current": "true", + "max": false, + "min": false, + "rightSide": "true", + "show": "true", + "total": false, + "values": "true" + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(kubelet_runtime_operations_total{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (operation_type, instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} {{operation_type}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Operation Rate", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 9, + "legend": { + "alignAsTable": "true", + "avg": false, + "current": "true", + "max": false, + "min": false, + "rightSide": "true", + "show": "true", + "total": false, + "values": "true" + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(kubelet_runtime_operations_errors_total{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, operation_type)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} {{operation_type}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Operation Error Rate", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 10, + "legend": { + "alignAsTable": "true", + "avg": false, + "current": "true", + "max": false, + "min": false, + "rightSide": "true", + "show": "true", + "total": false, + "values": "true" + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(kubelet_runtime_operations_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, operation_type, le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} {{operation_type}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Operation duration 99th quantile", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 11, + "legend": { + "alignAsTable": "true", + "avg": false, + "current": "true", + "max": false, + "min": false, + "rightSide": "true", + "show": "true", + "total": false, + "values": "true" + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(kubelet_pod_start_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} pod", + "refId": "A" + }, + { + "expr": "sum(rate(kubelet_pod_worker_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} worker", + "refId": "B" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Pod Start Rate", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 12, + "legend": { + "alignAsTable": "true", + "avg": false, + "current": "true", + "max": false, + "min": false, + "rightSide": "true", + "show": "true", + "total": false, + "values": "true" + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_start_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} pod", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} worker", + "refId": "B" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Pod Start Duration", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 13, + "legend": { + "alignAsTable": "true", + "avg": false, + "current": "true", + "hideEmpty": "true", + "hideZero": "true", + "max": false, + "min": false, + "rightSide": "true", + "show": "true", + "total": false, + "values": "true" + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(storage_operation_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} {{operation_name}} {{volume_plugin}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Storage Operation Rate", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 14, + "legend": { + "alignAsTable": "true", + "avg": false, + "current": "true", + "hideEmpty": "true", + "hideZero": "true", + "max": false, + "min": false, + "rightSide": "true", + "show": "true", + "total": false, + "values": "true" + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(storage_operation_errors_total{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} {{operation_name}} {{volume_plugin}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Storage Operation Error Rate", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 15, + "legend": { + "alignAsTable": "true", + "avg": false, + "current": "true", + "hideEmpty": "true", + "hideZero": "true", + "max": false, + "min": false, + "rightSide": "true", + "show": true, + "total": false, + "values": "true" + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(storage_operation_duration_seconds_bucket{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin, le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} {{operation_name}} {{volume_plugin}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Storage Operation Duration 99th quantile", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 16, + "legend": { + "alignAsTable": "true", + "avg": false, + "current": "true", + "max": false, + "min": false, + "rightSide": "true", + "show": "true", + "total": false, + "values": "true" + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(kubelet_cgroup_manager_duration_seconds_count{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[5m])) by (instance, operation_type)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{operation_type}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Cgroup manager operation rate", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 17, + "legend": { + "alignAsTable": "true", + "avg": false, + "current": "true", + "max": false, + "min": false, + "rightSide": "true", + "show": "true", + "total": false, + "values": "true" + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(kubelet_cgroup_manager_duration_seconds_bucket{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[5m])) by (instance, operation_type, le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} {{operation_type}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Cgroup manager 99th quantile", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Pod lifecycle event generator", + "fill": 1, + "gridPos": { + + }, + "id": 18, + "legend": { + "alignAsTable": "true", + "avg": false, + "current": "true", + "max": false, + "min": false, + "rightSide": "true", + "show": "true", + "total": false, + "values": "true" + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(kubelet_pleg_relist_duration_seconds_count{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[5m])) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "PLEG relist rate", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + }, - "tableColumn": "", + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 19, + "legend": { + "alignAsTable": "true", + "avg": false, + "current": "true", + "max": false, + "min": false, + "rightSide": "true", + "show": "true", + "total": false, + "values": "true" + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "sum(kubelet_running_pod_count{job=\"kubelet\", instance=~\"$instance\"})", + "expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_interval_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, le))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", "refId": "A" } ], - "thresholds": "", - "title": "Running Pods", + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "PLEG relist interval", "tooltip": { - "shared": false + "shared": false, + "sort": 0, + "value_type": "individual" }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ { - "op": "=", - "text": "N/A", - "value": "null" + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true } - ], - "valueName": "min" - }, + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true + "aliasColors": { + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, "gridPos": { }, - "id": 4, - "interval": null, + "id": 20, + "legend": { + "alignAsTable": "true", + "avg": false, + "current": "true", + "max": false, + "min": false, + "rightSide": "true", + "show": "true", + "total": false, + "values": "true" + }, + "lines": true, + "linewidth": 1, "links": [ ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ { - "name": "range to text", - "value": 2 + "expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A" } ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "PLEG relist duration", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ { - "from": "null", - "text": "N/A", - "to": "null" + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true } - ], - "span": 2, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 21, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false }, - "tableColumn": "", + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "sum(kubelet_running_container_count{job=\"kubelet\", instance=~\"$instance\"})", + "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\",code=~\"2..\"}[5m]))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}}", + "legendFormat": "2xx", "refId": "A" + }, + { + "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\",code=~\"3..\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "3xx", + "refId": "B" + }, + { + "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\",code=~\"4..\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "4xx", + "refId": "C" + }, + { + "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\",code=~\"5..\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "5xx", + "refId": "D" } ], - "thresholds": "", - "title": "Running Container", + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "RPC Rate", "tooltip": { - "shared": false + "shared": false, + "sort": 0, + "value_type": "individual" }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ { - "op": "=", - "text": "N/A", - "value": "null" + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true } - ], - "valueName": "min" - }, + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true + "aliasColors": { + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, "gridPos": { }, - "id": 5, - "interval": null, + "id": 22, + "legend": { + "alignAsTable": "true", + "avg": false, + "current": "true", + "max": false, + "min": false, + "rightSide": "true", + "show": "true", + "total": false, + "values": "true" + }, + "lines": true, + "linewidth": 1, "links": [ ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + ], - "span": 2, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "sum(volume_manager_total_volumes{job=\"kubelet\", instance=~\"$instance\", state=\"actual_state_of_world\"})", + "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_latency_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\"}[5m])) by (instance, verb, url, le))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}}", + "legendFormat": "{{instance}} {{verb}} {{url}}", "refId": "A" } ], - "thresholds": "", - "title": "Actual Volume Count", + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Request duration 99th quantile", "tooltip": { - "shared": false + "shared": false, + "sort": 0, + "value_type": "individual" }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ { - "op": "=", - "text": "N/A", - "value": "null" + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true } - ], - "valueName": "min" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, "gridPos": { }, - "id": 6, - "interval": null, + "id": 23, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, "links": [ ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + ], - "span": 2, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "sum(volume_manager_total_volumes{job=\"kubelet\", instance=~\"$instance\",state=\"desired_state_of_world\"})", + "expr": "process_resident_memory_bytes{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", "refId": "A" } ], - "thresholds": "", - "title": "Desired Volume Count", + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory", "tooltip": { - "shared": false + "shared": false, + "sort": 0, + "value_type": "individual" }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ { - "op": "=", - "text": "N/A", - "value": "null" + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true } - ], - "valueName": "min" + ] }, { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true + "aliasColors": { + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, "gridPos": { }, - "id": 7, - "interval": null, + "id": 24, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, "links": [ ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + ], - "span": 2, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "sum(rate(kubelet_node_config_error{job=\"kubelet\", instance=~\"$instance\"}[5m]))", + "expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[5m])", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", "refId": "A" } ], - "thresholds": "", - "title": "Config Error Count", + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU usage", "tooltip": { - "shared": false + "shared": false, + "sort": 0, + "value_type": "individual" }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "min" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": false, - "collapsed": false, - "panels": [ + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, { "aliasColors": { @@ -9185,17 +17199,17 @@ items: "gridPos": { }, - "id": 8, + "id": 25, "legend": { - "alignAsTable": "true", + "alignAsTable": false, "avg": false, - "current": "true", + "current": false, "max": false, "min": false, - "rightSide": "true", - "show": "true", + "rightSide": false, + "show": true, "total": false, - "values": "true" + "values": false }, "lines": true, "linewidth": 1, @@ -9212,265 +17226,983 @@ items: ], "spaceLength": 10, - "span": 6, + "span": 4, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(kubelet_runtime_operations_total{job=\"kubelet\",instance=~\"$instance\"}[5m])) by (operation_type, instance)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} {{operation_type}}", - "refId": "A" + "expr": "go_goroutines{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Goroutines", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(kube_pod_info, cluster)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": null, + "multi": false, + "name": "instance", + "options": [ + + ], + "query": "label_values(kubelet_runtime_operations{cluster=\"$cluster\", job=\"kubelet\"}, instance)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Kubernetes / Kubelet", + "uid": "3138fa155d5915769fbded898ac09fd9", + "version": 0 + } + kind: ConfigMap + metadata: + name: grafana-dashboard-kubelet + namespace: monitoring +- apiVersion: v1 + data: + namespace-by-pod.json: |- + { + "__inputs": [ + + ], + "__requires": [ + + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ + + ], + "panels": [ + { + "collapse": false, + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 2, + "panels": [ + + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Current Bandwidth", + "titleSize": "h6", + "type": "row" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "prometheus", + "decimals": 0, + "format": "time_series", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 1 + }, + "height": 9, + "id": 3, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 12, + "nullPointMode": "connected", + "nullText": null, + "options": { + "fieldOptions": { + "calcs": [ + "last" + ], + "defaults": { + "max": 10000000000, + "min": 0, + "title": "$namespace", + "unit": "Bps" + }, + "mappings": [ + + ], + "override": { + + }, + "thresholds": [ + { + "color": "dark-green", + "index": 0, + "value": null + }, + { + "color": "dark-yellow", + "index": 1, + "value": 5000000000 + }, + { + "color": "dark-red", + "index": 2, + "value": 7000000000 } ], - "thresholds": [ + "values": false + } + }, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 12, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution]))", + "format": "time_series", + "instant": null, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "timeFrom": null, + "timeShift": null, + "title": "Current Rate of Bytes Received", + "type": "gauge", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "prometheus", + "decimals": 0, + "format": "time_series", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 1 + }, + "height": 9, + "id": 4, + "interval": null, + "links": [ + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 12, + "nullPointMode": "connected", + "nullText": null, + "options": { + "fieldOptions": { + "calcs": [ + "last" ], - "timeFrom": null, - "timeShift": null, - "title": "Operation Rate", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" + "defaults": { + "max": 10000000000, + "min": 0, + "title": "$namespace", + "unit": "Bps" }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + "mappings": [ + + ], + "override": { - ] }, - "yaxes": [ + "thresholds": [ { - "format": "ops", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "color": "dark-green", + "index": 0, + "value": null }, { - "format": "ops", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "color": "dark-yellow", + "index": 1, + "value": 5000000000 + }, + { + "color": "dark-red", + "index": 2, + "value": 7000000000 } - ] + ], + "values": false + } + }, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 12, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution]))", + "format": "time_series", + "instant": null, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "timeFrom": null, + "timeShift": null, + "title": "Current Rate of Bytes Transmitted", + "type": "gauge", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "columns": [ + { + "text": "", + "value": "" }, { - "aliasColors": { + "text": "", + "value": "" + }, + { + "text": "", + "value": "" + }, + { + "text": "", + "value": "" + }, + { + "text": "", + "value": "" + }, + { + "text": "", + "value": "" + }, + { + "text": "", + "value": "" + }, + { + "text": "", + "value": "" + } + ], + "datasource": "prometheus", + "fill": 1, + "fontSize": "100%", + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 10 + }, + "id": 5, + "lines": true, + "linewidth": 1, + "minSpan": 24, + "nullPointMode": "null as zero", + "renderer": "flot", + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": false + }, + "spaceLength": 10, + "span": 24, + "styles": [ + { + "alias": "Time", + "colorMode": null, + "colors": [ - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Time", + "thresholds": [ - }, - "id": 9, - "legend": { - "alignAsTable": "true", - "avg": false, - "current": "true", - "max": false, - "min": false, - "rightSide": "true", - "show": "true", - "total": false, - "values": "true" - }, - "lines": true, - "linewidth": 1, - "links": [ + ], + "type": "hidden", + "unit": "short" + }, + { + "alias": "Bandwidth Received", + "colorMode": null, + "colors": [ ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(kubelet_runtime_operations_errors_total{job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, operation_type)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} {{operation_type}}", - "refId": "A" - } + "type": "number", + "unit": "Bps" + }, + { + "alias": "Bandwidth Transmitted", + "colorMode": null, + "colors": [ + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Operation Error Rate", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "ops", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "ops", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": false, - "collapsed": false, - "panels": [ + "type": "number", + "unit": "Bps" + }, { - "aliasColors": { + "alias": "Rate of Received Packets", + "colorMode": null, + "colors": [ - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ - }, - "id": 10, - "legend": { - "alignAsTable": "true", - "avg": false, - "current": "true", - "max": false, - "min": false, - "rightSide": "true", - "show": "true", - "total": false, - "values": "true" - }, - "lines": true, - "linewidth": 1, - "links": [ + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Rate of Transmitted Packets", + "colorMode": null, + "colors": [ ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ ], - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(kubelet_runtime_operations_duration_seconds_bucket{job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, operation_type, le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} {{operation_type}}", - "refId": "A" - } + "type": "number", + "unit": "pps" + }, + { + "alias": "Rate of Received Packets Dropped", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Rate of Transmitted Packets Dropped", + "colorMode": null, + "colors": [ + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Operation duration 99th quantile", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + "type": "number", + "unit": "pps" + }, + { + "alias": "Pod", + "colorMode": null, + "colors": [ - ] - }, - "yaxes": [ - { - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTooltip": "Drill down", + "linkUrl": "d/7a18067ce943a40ae25454675c19ff5c/kubernetes-networking-pod?orgId=1&refresh=30s&var-namespace=$namespace&var-pod=$__cell", + "pattern": "pod", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "expr": "sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(irate(container_network_receive_packets_total{namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum(irate(container_network_transmit_packets_total{namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum(irate(container_network_receive_packets_dropped_total{namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + }, + { + "expr": "sum(irate(container_network_transmit_packets_dropped_total{namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 } + ], + "title": "Current Status", + "transform": "table", + "type": "table" + }, + { + "collapse": false, + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 19 + }, + "id": 6, + "panels": [ + ], "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", + "showTitle": true, + "title": "Bandwidth", "titleSize": "h6", "type": "row" }, { - "collapse": false, - "collapsed": false, + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 2, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 20 + }, + "id": 7, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [ + + ], + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{pod}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Receive Bandwidth", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 2, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 20 + }, + "id": 8, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [ + + ], + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{pod}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Transmit Bandwidth", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "collapse": true, + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 29 + }, + "id": 9, "panels": [ { "aliasColors": { @@ -9479,29 +18211,36 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", - "fill": 1, + "datasource": "prometheus", + "fill": 2, "gridPos": { - + "h": 10, + "w": 12, + "x": 0, + "y": 30 }, - "id": 11, + "id": 10, "legend": { - "alignAsTable": "true", + "alignAsTable": false, "avg": false, - "current": "true", + "current": false, + "hideEmpty": true, + "hideZero": true, "max": false, "min": false, - "rightSide": "true", - "show": "true", + "rightSide": false, + "show": true, "total": false, - "values": "true" + "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 2, "links": [ ], - "nullPointMode": "null", + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, @@ -9511,23 +18250,17 @@ items: ], "spaceLength": 10, - "span": 6, - "stack": false, + "span": 12, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(rate(kubelet_pod_start_duration_seconds_count{job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance)", + "expr": "sum(irate(container_network_receive_packets_total{namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} pod", - "refId": "A" - }, - { - "expr": "sum(rate(kubelet_pod_worker_duration_seconds_count{job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} worker", - "refId": "B" + "intervalFactor": 1, + "legendFormat": "{{pod}}", + "refId": "A", + "step": 10 } ], "thresholds": [ @@ -9535,10 +18268,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Pod Start Rate", + "title": "Rate of Received Packets", "tooltip": { - "shared": false, - "sort": 0, + "shared": true, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -9553,7 +18286,7 @@ items: }, "yaxes": [ { - "format": "ops", + "format": "pps", "label": null, "logBase": 1, "max": null, @@ -9561,7 +18294,7 @@ items: "show": true }, { - "format": "ops", + "format": "pps", "label": null, "logBase": 1, "max": null, @@ -9577,29 +18310,36 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", - "fill": 1, + "datasource": "prometheus", + "fill": 2, "gridPos": { - + "h": 10, + "w": 12, + "x": 12, + "y": 30 }, - "id": 12, + "id": 11, "legend": { - "alignAsTable": "true", + "alignAsTable": false, "avg": false, - "current": "true", + "current": false, + "hideEmpty": true, + "hideZero": true, "max": false, "min": false, - "rightSide": "true", - "show": "true", + "rightSide": false, + "show": true, "total": false, - "values": "true" + "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 2, "links": [ ], - "nullPointMode": "null", + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, @@ -9609,23 +18349,17 @@ items: ], "spaceLength": 10, - "span": 6, - "stack": false, + "span": 12, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_start_duration_seconds_count{job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} pod", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, le))", + "expr": "sum(irate(container_network_transmit_packets_total{namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} worker", - "refId": "B" + "intervalFactor": 1, + "legendFormat": "{{pod}}", + "refId": "A", + "step": 10 } ], "thresholds": [ @@ -9633,10 +18367,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Pod Start Duration", + "title": "Rate of Transmitted Packets", "tooltip": { - "shared": false, - "sort": 0, + "shared": true, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -9651,7 +18385,7 @@ items: }, "yaxes": [ { - "format": "s", + "format": "pps", "label": null, "logBase": 1, "max": null, @@ -9659,7 +18393,7 @@ items: "show": true }, { - "format": "s", + "format": "pps", "label": null, "logBase": 1, "max": null, @@ -9672,14 +18406,21 @@ items: "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", + "showTitle": true, + "title": "Packets", "titleSize": "h6", "type": "row" }, { - "collapse": false, - "collapsed": false, + "collapse": true, + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 30 + }, + "id": 12, "panels": [ { "aliasColors": { @@ -9688,31 +18429,36 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", - "fill": 1, + "datasource": "prometheus", + "fill": 2, "gridPos": { - + "h": 10, + "w": 12, + "x": 0, + "y": 40 }, "id": 13, "legend": { - "alignAsTable": "true", + "alignAsTable": false, "avg": false, - "current": "true", - "hideEmpty": "true", - "hideZero": "true", + "current": false, + "hideEmpty": true, + "hideZero": true, "max": false, "min": false, - "rightSide": "true", - "show": "true", + "rightSide": false, + "show": true, "total": false, - "values": "true" + "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 2, "links": [ ], - "nullPointMode": "null", + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, @@ -9722,16 +18468,17 @@ items: ], "spaceLength": 10, - "span": 6, - "stack": false, + "span": 12, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(rate(storage_operation_duration_seconds_count{job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin)", + "expr": "sum(irate(container_network_receive_packets_dropped_total{namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} {{operation_name}} {{volume_plugin}}", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{pod}}", + "refId": "A", + "step": 10 } ], "thresholds": [ @@ -9739,10 +18486,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Storage Operation Rate", + "title": "Rate of Received Packets Dropped", "tooltip": { - "shared": false, - "sort": 0, + "shared": true, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -9757,7 +18504,7 @@ items: }, "yaxes": [ { - "format": "ops", + "format": "pps", "label": null, "logBase": 1, "max": null, @@ -9765,7 +18512,7 @@ items: "show": true }, { - "format": "ops", + "format": "pps", "label": null, "logBase": 1, "max": null, @@ -9781,31 +18528,36 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", - "fill": 1, + "datasource": "prometheus", + "fill": 2, "gridPos": { - + "h": 10, + "w": 12, + "x": 12, + "y": 40 }, "id": 14, "legend": { - "alignAsTable": "true", + "alignAsTable": false, "avg": false, - "current": "true", - "hideEmpty": "true", - "hideZero": "true", + "current": false, + "hideEmpty": true, + "hideZero": true, "max": false, "min": false, - "rightSide": "true", - "show": "true", + "rightSide": false, + "show": true, "total": false, - "values": "true" + "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 2, "links": [ ], - "nullPointMode": "null", + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, @@ -9815,16 +18567,17 @@ items: ], "spaceLength": 10, - "span": 6, - "stack": false, + "span": 12, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(rate(storage_operation_errors_total{job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin)", + "expr": "sum(irate(container_network_transmit_packets_dropped_total{namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} {{operation_name}} {{volume_plugin}}", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{pod}}", + "refId": "A", + "step": 10 } ], "thresholds": [ @@ -9832,10 +18585,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Storage Operation Error Rate", + "title": "Rate of Transmitted Packets Dropped", "tooltip": { - "shared": false, - "sort": 0, + "shared": true, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -9850,7 +18603,7 @@ items: }, "yaxes": [ { - "format": "ops", + "format": "pps", "label": null, "logBase": 1, "max": null, @@ -9858,7 +18611,7 @@ items: "show": true }, { - "format": "ops", + "format": "pps", "label": null, "logBase": 1, "max": null, @@ -9871,315 +18624,983 @@ items: "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", + "showTitle": true, + "title": "Errors", "titleSize": "h6", "type": "row" - }, + } + ], + "refresh": "30s", + "rows": [ + + ], + "schemaVersion": 18, + "style": "dark", + "tags": [ + + ], + "templating": { + "list": [ + { + "allValue": ".+", + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "kube-system", + "value": "kube-system" + }, + "datasource": "prometheus", + "definition": "label_values(container_network_receive_packets_total, namespace)", + "hide": 0, + "includeAll": true, + "label": null, + "multi": false, + "name": "namespace", + "options": [ + + ], + "query": "label_values(container_network_receive_packets_total, namespace)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "5m", + "value": "5m" + }, + "datasource": "prometheus", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "resolution", + "options": [ + { + "selected": false, + "text": "30s", + "value": "30s" + }, + { + "selected": true, + "text": "5m", + "value": "5m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + } + ], + "query": "30s,5m,1h", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "interval", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "5m", + "value": "5m" + }, + "datasource": "prometheus", + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "interval", + "options": [ + { + "selected": true, + "text": "4h", + "value": "4h" + } + ], + "query": "4h", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "interval", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Kubernetes / Networking / Namespace (Pods)", + "uid": "8b7a8b326d7a6f1f04244066368c67af", + "version": 0 + } + kind: ConfigMap + metadata: + name: grafana-dashboard-namespace-by-pod + namespace: monitoring +- apiVersion: v1 + data: + namespace-by-workload.json: |- + { + "__inputs": [ + + ], + "__requires": [ + + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ + + ], + "panels": [ { "collapse": false, "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 2, "panels": [ + + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Current Bandwidth", + "titleSize": "h6", + "type": "row" + }, + { + "aliasColors": { + + }, + "breakpoint": "50%", + "cacheTimeout": null, + "combine": { + "label": "Others", + "threshold": 0 + }, + "datasource": "prometheus", + "fontSize": "80%", + "format": "Bps", + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 1 + }, + "id": 3, + "interval": null, + "legend": { + "percentage": true, + "percentageDecimals": null, + "show": true, + "values": true + }, + "legendType": "Right side", + "maxDataPoints": 3, + "nullPointMode": "connected", + "pieType": "donut", + "targets": [ + { + "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "time_series", + "instant": null, + "intervalFactor": 1, + "legendFormat": "{{workload}}", + "refId": "A" + } + ], + "title": "Current Rate of Bytes Received", + "type": "grafana-piechart-panel", + "valueName": "current" + }, + { + "aliasColors": { + + }, + "breakpoint": "50%", + "cacheTimeout": null, + "combine": { + "label": "Others", + "threshold": 0 + }, + "datasource": "prometheus", + "fontSize": "80%", + "format": "Bps", + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 1 + }, + "id": 4, + "interval": null, + "legend": { + "percentage": true, + "percentageDecimals": null, + "show": true, + "values": true + }, + "legendType": "Right side", + "maxDataPoints": 3, + "nullPointMode": "connected", + "pieType": "donut", + "targets": [ + { + "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "time_series", + "instant": null, + "intervalFactor": 1, + "legendFormat": "{{workload}}", + "refId": "A" + } + ], + "title": "Current Rate of Bytes Transmitted", + "type": "grafana-piechart-panel", + "valueName": "current" + }, + { + "columns": [ + { + "text": "", + "value": "" + }, + { + "text": "", + "value": "" + }, + { + "text": "", + "value": "" + }, + { + "text": "", + "value": "" + }, + { + "text": "", + "value": "" + }, + { + "text": "", + "value": "" + }, + { + "text": "", + "value": "" + }, + { + "text": "", + "value": "" + }, + { + "text": "", + "value": "" + }, + { + "text": "", + "value": "" + } + ], + "datasource": "prometheus", + "fill": 1, + "fontSize": "90%", + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 10 + }, + "id": 5, + "lines": true, + "linewidth": 1, + "minSpan": 24, + "nullPointMode": "null as zero", + "renderer": "flot", + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": false + }, + "spaceLength": 10, + "span": 24, + "styles": [ + { + "alias": "Time", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Time", + "thresholds": [ + + ], + "type": "hidden", + "unit": "short" + }, + { + "alias": "Current Bandwidth Received", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Current Bandwidth Transmitted", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Average Bandwidth Received", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Average Bandwidth Transmitted", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Rate of Received Packets", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, { - "aliasColors": { + "alias": "Rate of Transmitted Packets", + "colorMode": null, + "colors": [ - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", + "thresholds": [ - }, - "id": 15, - "legend": { - "alignAsTable": "true", - "avg": false, - "current": "true", - "hideEmpty": "true", - "hideZero": "true", - "max": false, - "min": false, - "rightSide": "true", - "show": true, - "total": false, - "values": "true" - }, - "lines": true, - "linewidth": 1, - "links": [ + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Rate of Received Packets Dropped", + "colorMode": null, + "colors": [ ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #G", + "thresholds": [ ], - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(storage_operation_duration_seconds_bucket{job=\"kubelet\", instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin, le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} {{operation_name}} {{volume_plugin}}", - "refId": "A" - } + "type": "number", + "unit": "pps" + }, + { + "alias": "Rate of Transmitted Packets Dropped", + "colorMode": null, + "colors": [ + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #H", "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Storage Operation Duration 99th quantile", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + "type": "number", + "unit": "pps" + }, + { + "alias": "Workload", + "colorMode": null, + "colors": [ - ] - }, - "yaxes": [ - { - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - } - ] + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTooltip": "Drill down", + "linkUrl": "d/728bf77cc1166d2f3133bf25846876cc/kubernetes-networking-workload?orgId=1&refresh=30s&var-namespace=$namespace&var-type=$type&var-workload=$__cell", + "pattern": "workload", + "thresholds": [ + + ], + "type": "number", + "unit": "short" } ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" + "targets": [ + { + "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sort_desc(avg(irate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sort_desc(avg(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sort_desc(sum(irate(container_network_receive_packets_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + }, + { + "expr": "sort_desc(sum(irate(container_network_transmit_packets_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 + }, + { + "expr": "sort_desc(sum(irate(container_network_receive_packets_dropped_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "G", + "step": 10 + }, + { + "expr": "sort_desc(sum(irate(container_network_transmit_packets_dropped_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "H", + "step": 10 + } + ], + "title": "Current Status", + "transform": "table", + "type": "table" }, { - "collapse": false, - "collapsed": false, + "collapse": true, + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 19 + }, + "id": 6, "panels": [ { "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, + "breakpoint": "50%", + "cacheTimeout": null, + "combine": { + "label": "Others", + "threshold": 0 + }, + "datasource": "prometheus", + "fontSize": "80%", + "format": "Bps", "gridPos": { - + "h": 9, + "w": 12, + "x": 0, + "y": 20 }, - "id": 16, + "id": 7, + "interval": null, "legend": { - "alignAsTable": "true", - "avg": false, - "current": "true", - "max": false, - "min": false, - "rightSide": "true", - "show": "true", - "total": false, - "values": "true" + "percentage": true, + "percentageDecimals": null, + "show": true, + "values": true }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, + "legendType": "Right side", + "maxDataPoints": 3, + "nullPointMode": "connected", + "pieType": "donut", "targets": [ { - "expr": "sum(rate(kubelet_cgroup_manager_duration_seconds_count{job=\"kubelet\", instance=~\"$instance\"}[5m])) by (instance, operation_type)", + "expr": "sort_desc(avg(irate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{operation_type}}", + "instant": null, + "intervalFactor": 1, + "legendFormat": "{{workload}}", "refId": "A" } ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Cgroup manager operation rate", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "ops", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "ops", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - } - ] + "title": "Average Rate of Bytes Received", + "type": "grafana-piechart-panel", + "valueName": "current" }, { "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, + "breakpoint": "50%", + "cacheTimeout": null, + "combine": { + "label": "Others", + "threshold": 0 + }, + "datasource": "prometheus", + "fontSize": "80%", + "format": "Bps", "gridPos": { - + "h": 9, + "w": 12, + "x": 12, + "y": 20 }, - "id": 17, + "id": 8, + "interval": null, "legend": { - "alignAsTable": "true", - "avg": false, - "current": "true", - "max": false, - "min": false, - "rightSide": "true", - "show": "true", - "total": false, - "values": "true" + "percentage": true, + "percentageDecimals": null, + "show": true, + "values": true }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, + "legendType": "Right side", + "maxDataPoints": 3, + "nullPointMode": "connected", + "pieType": "donut", "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(kubelet_cgroup_manager_duration_seconds_bucket{job=\"kubelet\", instance=~\"$instance\"}[5m])) by (instance, operation_type, le))", + "expr": "sort_desc(avg(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} {{operation_type}}", + "instant": null, + "intervalFactor": 1, + "legendFormat": "{{workload}}", "refId": "A" } ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Cgroup manager 99th quantile", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - } - ] + "title": "Average Rate of Bytes Transmitted", + "type": "grafana-piechart-panel", + "valueName": "current" } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", + "showTitle": true, + "title": "Average Bandwidth", "titleSize": "h6", "type": "row" }, { "collapse": false, "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 29 + }, + "id": 9, + "panels": [ + + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Bandwidth HIstory", + "titleSize": "h6", + "type": "row" + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 2, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 38 + }, + "id": 10, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [ + + ], + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{workload}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Receive Bandwidth", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 2, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 38 + }, + "id": 11, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [ + + ], + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{workload}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Transmit Bandwidth", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "collapse": true, + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 39 + }, + "id": 12, "panels": [ { "aliasColors": { @@ -10188,30 +19609,36 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", - "description": "Pod lifecycle event generator", - "fill": 1, + "datasource": "prometheus", + "fill": 2, "gridPos": { - + "h": 9, + "w": 12, + "x": 0, + "y": 40 }, - "id": 18, + "id": 13, "legend": { - "alignAsTable": "true", + "alignAsTable": false, "avg": false, - "current": "true", + "current": false, + "hideEmpty": true, + "hideZero": true, "max": false, "min": false, - "rightSide": "true", - "show": "true", + "rightSide": false, + "show": true, "total": false, - "values": "true" + "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 2, "links": [ ], - "nullPointMode": "null", + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, @@ -10221,16 +19648,17 @@ items: ], "spaceLength": 10, - "span": 6, - "stack": false, + "span": 12, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(rate(kubelet_pleg_relist_duration_seconds_count{job=\"kubelet\", instance=~\"$instance\"}[5m])) by (instance)", + "expr": "sort_desc(sum(irate(container_network_receive_packets_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{workload}}", + "refId": "A", + "step": 10 } ], "thresholds": [ @@ -10238,10 +19666,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "PLEG relist rate", + "title": "Rate of Received Packets", "tooltip": { - "shared": false, - "sort": 0, + "shared": true, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -10256,7 +19684,7 @@ items: }, "yaxes": [ { - "format": "ops", + "format": "pps", "label": null, "logBase": 1, "max": null, @@ -10264,7 +19692,7 @@ items: "show": true }, { - "format": "ops", + "format": "pps", "label": null, "logBase": 1, "max": null, @@ -10280,29 +19708,36 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", - "fill": 1, + "datasource": "prometheus", + "fill": 2, "gridPos": { - + "h": 9, + "w": 12, + "x": 12, + "y": 40 }, - "id": 19, + "id": 14, "legend": { - "alignAsTable": "true", + "alignAsTable": false, "avg": false, - "current": "true", + "current": false, + "hideEmpty": true, + "hideZero": true, "max": false, "min": false, - "rightSide": "true", - "show": "true", + "rightSide": false, + "show": true, "total": false, - "values": "true" + "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 2, "links": [ ], - "nullPointMode": "null", + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, @@ -10312,16 +19747,17 @@ items: ], "spaceLength": 10, - "span": 6, - "stack": false, + "span": 12, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_interval_seconds_bucket{job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, le))", + "expr": "sort_desc(sum(irate(container_network_transmit_packets_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{workload}}", + "refId": "A", + "step": 10 } ], "thresholds": [ @@ -10329,10 +19765,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "PLEG relist interval", + "title": "Rate of Transmitted Packets", "tooltip": { - "shared": false, - "sort": 0, + "shared": true, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -10347,7 +19783,7 @@ items: }, "yaxes": [ { - "format": "s", + "format": "pps", "label": null, "logBase": 1, "max": null, @@ -10355,7 +19791,7 @@ items: "show": true }, { - "format": "s", + "format": "pps", "label": null, "logBase": 1, "max": null, @@ -10368,14 +19804,21 @@ items: "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", + "showTitle": true, + "title": "Packets", "titleSize": "h6", "type": "row" }, { - "collapse": false, - "collapsed": false, + "collapse": true, + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 40 + }, + "id": 15, "panels": [ { "aliasColors": { @@ -10384,29 +19827,36 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", - "fill": 1, + "datasource": "prometheus", + "fill": 2, "gridPos": { - + "h": 9, + "w": 12, + "x": 0, + "y": 41 }, - "id": 20, + "id": 16, "legend": { - "alignAsTable": "true", + "alignAsTable": false, "avg": false, - "current": "true", + "current": false, + "hideEmpty": true, + "hideZero": true, "max": false, "min": false, - "rightSide": "true", - "show": "true", + "rightSide": false, + "show": true, "total": false, - "values": "true" + "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 2, "links": [ ], - "nullPointMode": "null", + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, @@ -10417,15 +19867,16 @@ items: ], "spaceLength": 10, "span": 12, - "stack": false, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{job=\"kubelet\",instance=~\"$instance\"}[5m])) by (instance, le))", + "expr": "sort_desc(sum(irate(container_network_receive_packets_dropped_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{workload}}", + "refId": "A", + "step": 10 } ], "thresholds": [ @@ -10433,10 +19884,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "PLEG relist duration", + "title": "Rate of Received Packets Dropped", "tooltip": { - "shared": false, - "sort": 0, + "shared": true, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -10451,7 +19902,7 @@ items: }, "yaxes": [ { - "format": "s", + "format": "pps", "label": null, "logBase": 1, "max": null, @@ -10459,7 +19910,7 @@ items: "show": true }, { - "format": "s", + "format": "pps", "label": null, "logBase": 1, "max": null, @@ -10467,20 +19918,7 @@ items: "show": true } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": false, - "collapsed": false, - "panels": [ + }, { "aliasColors": { @@ -10488,16 +19926,21 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", - "fill": 1, + "datasource": "prometheus", + "fill": 2, "gridPos": { - + "h": 9, + "w": 12, + "x": 12, + "y": 41 }, - "id": 21, + "id": 17, "legend": { "alignAsTable": false, "avg": false, "current": false, + "hideEmpty": true, + "hideZero": true, "max": false, "min": false, "rightSide": false, @@ -10506,11 +19949,13 @@ items: "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 2, "links": [ ], - "nullPointMode": "null", + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, @@ -10521,36 +19966,16 @@ items: ], "spaceLength": 10, "span": 12, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(rest_client_requests_total{job=\"kubelet\", instance=~\"$instance\",code=~\"2..\"}[5m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "2xx", - "refId": "A" - }, - { - "expr": "sum(rate(rest_client_requests_total{job=\"kubelet\", instance=~\"$instance\",code=~\"3..\"}[5m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "3xx", - "refId": "B" - }, - { - "expr": "sum(rate(rest_client_requests_total{job=\"kubelet\", instance=~\"$instance\",code=~\"4..\"}[5m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "4xx", - "refId": "C" - }, + "stack": true, + "steppedLine": false, + "targets": [ { - "expr": "sum(rate(rest_client_requests_total{job=\"kubelet\", instance=~\"$instance\",code=~\"5..\"}[5m]))", + "expr": "sort_desc(sum(irate(container_network_transmit_packets_dropped_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "5xx", - "refId": "D" + "intervalFactor": 1, + "legendFormat": "{{workload}}", + "refId": "A", + "step": 10 } ], "thresholds": [ @@ -10558,10 +19983,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "RPC Rate", + "title": "Rate of Transmitted Packets Dropped", "tooltip": { - "shared": false, - "sort": 0, + "shared": true, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -10576,7 +20001,7 @@ items: }, "yaxes": [ { - "format": "ops", + "format": "pps", "label": null, "logBase": 1, "max": null, @@ -10584,7 +20009,7 @@ items: "show": true }, { - "format": "ops", + "format": "pps", "label": null, "logBase": 1, "max": null, @@ -10597,14 +20022,228 @@ items: "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", + "showTitle": true, + "title": "Errors", "titleSize": "h6", "type": "row" - }, + } + ], + "refresh": "30s", + "rows": [ + + ], + "schemaVersion": 18, + "style": "dark", + "tags": [ + + ], + "templating": { + "list": [ + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "kube-system", + "value": "kube-system" + }, + "datasource": "prometheus", + "definition": "label_values(container_network_receive_packets_total, namespace)", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "namespace", + "options": [ + + ], + "query": "label_values(container_network_receive_packets_total, namespace)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "deployment", + "value": "deployment" + }, + "datasource": "prometheus", + "definition": "label_values(mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\"}, workload_type)", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "type", + "options": [ + + ], + "query": "label_values(mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\"}, workload_type)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "5m", + "value": "5m" + }, + "datasource": "prometheus", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "resolution", + "options": [ + { + "selected": false, + "text": "30s", + "value": "30s" + }, + { + "selected": true, + "text": "5m", + "value": "5m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + } + ], + "query": "30s,5m,1h", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "interval", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "5m", + "value": "5m" + }, + "datasource": "prometheus", + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "interval", + "options": [ + { + "selected": true, + "text": "4h", + "value": "4h" + } + ], + "query": "4h", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "interval", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Kubernetes / Networking / Namespace (Workload)", + "uid": "bbb2a765a623ae38130206c7d94a160f", + "version": 0 + } + kind: ConfigMap + metadata: + name: grafana-dashboard-namespace-by-workload + namespace: monitoring +- apiVersion: v1 + data: + node-cluster-rsrc-use.json: |- + { + "annotations": { + "list": [ + + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + + ], + "refresh": "10s", + "rows": [ { "collapse": false, - "collapsed": false, + "height": "250px", "panels": [ { "aliasColors": { @@ -10614,47 +20253,42 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "gridPos": { - - }, - "id": 22, + "fill": 10, + "id": 1, "legend": { - "alignAsTable": "true", "avg": false, - "current": "true", + "current": false, "max": false, "min": false, - "rightSide": "true", - "show": "true", + "show": true, "total": false, - "values": "true" + "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], - "nullPointMode": "null", + "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 12, - "stack": false, + "span": 6, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_latency_seconds_bucket{job=\"kubelet\", instance=~\"$instance\"}[5m])) by (instance, verb, url, le))", + "expr": "(\n instance:node_cpu_utilisation:rate1m{job=\"node-exporter\"}\n*\n instance:node_num_cpu:sum{job=\"node-exporter\"}\n/ ignoring (instance) group_left\n sum without (instance) (instance:node_num_cpu:sum{job=\"node-exporter\"})\n)\n", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}} {{verb}} {{url}}", - "refId": "A" + "legendFormat": "{{instance}}", + "legendLink": "/dashboard/file/node-rsrc-use.json", + "step": 10 } ], "thresholds": [ @@ -10662,7 +20296,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Request duration 99th quantile", + "title": "CPU Utilisation", "tooltip": { "shared": false, "sort": 0, @@ -10680,36 +20314,23 @@ items: }, "yaxes": [ { - "format": "s", + "format": "percentunit", "label": null, "logBase": 1, - "max": null, + "max": 1, "min": 0, "show": true }, { - "format": "s", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": 0, - "show": true + "min": null, + "show": false } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": false, - "collapsed": false, - "panels": [ + }, { "aliasColors": { @@ -10718,47 +20339,42 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "gridPos": { - - }, - "id": 23, + "fill": 10, + "id": 2, "legend": { - "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, - "rightSide": false, "show": true, "total": false, "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], - "nullPointMode": "null", + "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 4, - "stack": false, + "span": 6, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "process_resident_memory_bytes{job=\"kubelet\",instance=~\"$instance\"}", + "expr": "(\n instance:node_load1_per_cpu:ratio{job=\"node-exporter\"}\n/ ignoring (instance) group_left\n count without (instance) (instance:node_load1_per_cpu:ratio{job=\"node-exporter\"})\n)\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", - "refId": "A" + "legendLink": "/dashboard/file/node-rsrc-use.json", + "step": 10 } ], "thresholds": [ @@ -10766,7 +20382,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Memory", + "title": "CPU Saturation (load1 per CPU)", "tooltip": { "shared": false, "sort": 0, @@ -10784,23 +20400,35 @@ items: }, "yaxes": [ { - "format": "bytes", + "format": "percentunit", "label": null, "logBase": 1, - "max": null, - "min": null, + "max": 1, + "min": 0, "show": true }, { - "format": "bytes", + "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": true + "show": false } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ { "aliasColors": { @@ -10809,47 +20437,42 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "gridPos": { - - }, - "id": 24, + "fill": 10, + "id": 3, "legend": { - "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, - "rightSide": false, "show": true, "total": false, "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], - "nullPointMode": "null", + "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 4, - "stack": false, + "span": 6, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "rate(process_cpu_seconds_total{job=\"kubelet\",instance=~\"$instance\"}[5m])", + "expr": "(\n instance:node_memory_utilisation:ratio{job=\"node-exporter\"}\n/ ignoring (instance) group_left\n count without (instance) (instance:node_memory_utilisation:ratio{job=\"node-exporter\"})\n)\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", - "refId": "A" + "legendLink": "/dashboard/file/node-rsrc-use.json", + "step": 10 } ], "thresholds": [ @@ -10857,7 +20480,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "CPU usage", + "title": "Memory Utilisation", "tooltip": { "shared": false, "sort": 0, @@ -10875,10 +20498,10 @@ items: }, "yaxes": [ { - "format": "short", + "format": "percentunit", "label": null, "logBase": 1, - "max": null, + "max": 1, "min": 0, "show": true }, @@ -10887,8 +20510,8 @@ items: "label": null, "logBase": 1, "max": null, - "min": 0, - "show": true + "min": null, + "show": false } ] }, @@ -10900,47 +20523,42 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "gridPos": { - - }, - "id": 25, + "fill": 10, + "id": 4, "legend": { - "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, - "rightSide": false, "show": true, "total": false, "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], - "nullPointMode": "null", + "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 4, - "stack": false, + "span": 6, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "go_goroutines{job=\"kubelet\",instance=~\"$instance\"}", + "expr": "instance:node_memory_swap_io_pages:rate1m{job=\"node-exporter\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", - "refId": "A" + "legendLink": "/dashboard/file/node-rsrc-use.json", + "step": 10 } ], "thresholds": [ @@ -10948,7 +20566,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Goroutines", + "title": "Memory Saturation (Swapped Pages)", "tooltip": { "shared": false, "sort": 0, @@ -10966,11 +20584,11 @@ items: }, "yaxes": [ { - "format": "short", + "format": "rps", "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true }, { @@ -10979,7 +20597,7 @@ items: "logBase": 1, "max": null, "min": null, - "show": true + "show": false } ] } @@ -10987,123 +20605,116 @@ items: "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "kubernetes-mixin" - ], - "templating": { - "list": [ - { - "current": { - "text": "Prometheus", - "value": "Prometheus" - }, - "hide": 0, - "label": null, - "name": "datasource", - "options": [ - - ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": null, - "current": { - - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": null, - "multi": false, - "name": "instance", - "options": [ - - ], - "query": "label_values(kubelet_runtime_operations{job=\"kubelet\"}, instance)", - "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ + "showTitle": true, + "title": "Memory", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { - ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "", - "title": "Kubernetes / Kubelet", - "uid": "3138fa155d5915769fbded898ac09fd9", - "version": 0 - } - kind: ConfigMap - metadata: - name: grafana-dashboard-kubelet - namespace: monitoring -- apiVersion: v1 - data: - node-cluster-rsrc-use.json: |- - { - "annotations": { - "list": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ - ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "links": [ + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/ Receive/", + "stack": "A" + }, + { + "alias": "/ Transmit/", + "stack": "B", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "instance:node_network_receive_bytes_excluding_lo:rate1m{job=\"node-exporter\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} Receive", + "legendLink": "/dashboard/file/node-rsrc-use.json", + "step": 10 + }, + { + "expr": "instance:node_network_transmit_bytes_excluding_lo:rate1m{job=\"node-exporter\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} Transmit", + "legendLink": "/dashboard/file/node-rsrc-use.json", + "step": 10 + } + ], + "thresholds": [ - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ + ], + "timeFrom": null, + "timeShift": null, + "title": "Net Utilisation (Bytes Receive/Transmit)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, { "aliasColors": { @@ -11113,7 +20724,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 1, + "id": 6, "legend": { "avg": false, "current": false, @@ -11134,7 +20745,15 @@ items: "points": false, "renderer": "flot", "seriesOverrides": [ - + { + "alias": "/ Receive/", + "stack": "A" + }, + { + "alias": "/ Transmit/", + "stack": "B", + "transform": "negative-Y" + } ], "spaceLength": 10, "span": 6, @@ -11142,10 +20761,18 @@ items: "steppedLine": false, "targets": [ { - "expr": "(\n instance:node_cpu_utilisation:rate1m{job=\"node-exporter\"}\n*\n instance:node_num_cpu:sum{job=\"node-exporter\"}\n/ ignoring (instance) group_left\n sum without (instance) (instance:node_num_cpu:sum{job=\"node-exporter\"})\n)\n", + "expr": "instance:node_network_receive_drop_excluding_lo:rate1m{job=\"node-exporter\"}", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}}", + "legendFormat": "{{instance}} Receive", + "legendLink": "/dashboard/file/node-rsrc-use.json", + "step": 10 + }, + { + "expr": "instance:node_network_transmit_drop_excluding_lo:rate1m{job=\"node-exporter\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} Transmit", "legendLink": "/dashboard/file/node-rsrc-use.json", "step": 10 } @@ -11155,7 +20782,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "CPU Utilisation", + "title": "Net Saturation (Drops Receive/Transmit)", "tooltip": { "shared": false, "sort": 0, @@ -11173,11 +20800,11 @@ items: }, "yaxes": [ { - "format": "percentunit", + "format": "rps", "label": null, "logBase": 1, - "max": 1, - "min": 0, + "max": null, + "min": null, "show": true }, { @@ -11189,7 +20816,19 @@ items: "show": false } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ { "aliasColors": { @@ -11199,7 +20838,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 2, + "id": 7, "legend": { "avg": false, "current": false, @@ -11228,10 +20867,10 @@ items: "steppedLine": false, "targets": [ { - "expr": "(\n instance:node_load1_per_cpu:ratio{job=\"node-exporter\"}\n/ ignoring (instance) group_left\n count without (instance) (instance:node_load1_per_cpu:ratio{job=\"node-exporter\"})\n)\n", + "expr": "(\n instance_device:node_disk_io_time_seconds:rate1m{job=\"node-exporter\"}\n/ ignoring (instance, device) group_left\n count without (instance, device) (instance_device:node_disk_io_time_seconds:rate1m{job=\"node-exporter\"})\n)\n", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}}", + "legendFormat": "{{instance}} {{device}}", "legendLink": "/dashboard/file/node-rsrc-use.json", "step": 10 } @@ -11241,7 +20880,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "CPU Saturation (load1 per CPU)", + "title": "Disk IO Utilisation", "tooltip": { "shared": false, "sort": 0, @@ -11275,19 +20914,7 @@ items: "show": false } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "CPU", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ + }, { "aliasColors": { @@ -11297,7 +20924,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 3, + "id": 8, "legend": { "avg": false, "current": false, @@ -11326,10 +20953,10 @@ items: "steppedLine": false, "targets": [ { - "expr": "(\n instance:node_memory_utilisation:ratio{job=\"node-exporter\"}\n/ ignoring (instance) group_left\n count without (instance) (instance:node_memory_utilisation:ratio{job=\"node-exporter\"})\n)\n", + "expr": "(\n instance_device:node_disk_io_time_weighted_seconds:rate1m{job=\"node-exporter\"}\n/ ignoring (instance, device) group_left\n count without (instance, device) (instance_device:node_disk_io_time_weighted_seconds:rate1m{job=\"node-exporter\"})\n)\n", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}}", + "legendFormat": "{{instance}} {{device}}", "legendLink": "/dashboard/file/node-rsrc-use.json", "step": 10 } @@ -11339,7 +20966,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Memory Utilisation", + "title": "Disk IO Saturation", "tooltip": { "shared": false, "sort": 0, @@ -11373,7 +21000,19 @@ items: "show": false } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Disk IO", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ { "aliasColors": { @@ -11383,7 +21022,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 4, + "id": 9, "legend": { "avg": false, "current": false, @@ -11407,12 +21046,12 @@ items: ], "spaceLength": 10, - "span": 6, + "span": 12, "stack": true, "steppedLine": false, "targets": [ { - "expr": "instance:node_memory_swap_io_pages:rate1m{job=\"node-exporter\"}", + "expr": "(\n sum without (device) (\n max without (fstype, mountpoint) (\n node_filesystem_size_bytes{job=\"node-exporter\", fstype!=\"\"} - node_filesystem_avail_bytes{job=\"node-exporter\", fstype!=\"\"}\n )\n ) \n/ ignoring (instance) group_left\n sum without (instance, device) (\n max without (fstype, mountpoint) (\n node_filesystem_size_bytes{job=\"node-exporter\", fstype!=\"\"}\n )\n )\n) \n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", @@ -11425,7 +21064,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Memory Saturation (Swapped Pages)", + "title": "Disk Space Utilisation", "tooltip": { "shared": false, "sort": 0, @@ -11443,10 +21082,10 @@ items: }, "yaxes": [ { - "format": "rps", + "format": "percentunit", "label": null, "logBase": 1, - "max": null, + "max": 1, "min": 0, "show": true }, @@ -11465,9 +21104,91 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Memory", + "title": "Disk Space", "titleSize": "h6" - }, + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "USE Method / Cluster", + "uid": "3e97d1d02672cdd0861f4c97c64f89b2", + "version": 0 + } + kind: ConfigMap + metadata: + name: grafana-dashboard-node-cluster-rsrc-use + namespace: monitoring +- apiVersion: v1 + data: + node-rsrc-use.json: |- + { + "annotations": { + "list": [ + + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + + ], + "refresh": "10s", + "rows": [ { "collapse": false, "height": "250px", @@ -11480,57 +21201,41 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 10, - "id": 5, + "fill": 1, + "id": 1, "legend": { "avg": false, "current": false, "max": false, "min": false, - "show": true, + "show": false, "total": false, "values": false }, "lines": true, - "linewidth": 0, + "linewidth": 1, "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/ Receive/", - "stack": "A" - }, - { - "alias": "/ Transmit/", - "stack": "B", - "transform": "negative-Y" - } + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + ], "spaceLength": 10, "span": 6, - "stack": true, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "instance:node_network_receive_bytes_excluding_lo:rate1m{job=\"node-exporter\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} Receive", - "legendLink": "/dashboard/file/node-rsrc-use.json", - "step": 10 - }, - { - "expr": "instance:node_network_transmit_bytes_excluding_lo:rate1m{job=\"node-exporter\"}", + "expr": "instance:node_cpu_utilisation:rate1m{job=\"node-exporter\", instance=\"$instance\"}", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}} Transmit", - "legendLink": "/dashboard/file/node-rsrc-use.json", + "legendFormat": "Utilisation", + "legendLink": null, "step": 10 } ], @@ -11539,7 +21244,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Net Utilisation (Bytes Receive/Transmit)", + "title": "CPU Utilisation", "tooltip": { "shared": false, "sort": 0, @@ -11557,11 +21262,11 @@ items: }, "yaxes": [ { - "format": "Bps", + "format": "percentunit", "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true }, { @@ -11582,19 +21287,19 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 10, - "id": 6, + "fill": 1, + "id": 2, "legend": { "avg": false, "current": false, "max": false, "min": false, - "show": true, + "show": false, "total": false, "values": false }, "lines": true, - "linewidth": 0, + "linewidth": 1, "links": [ ], @@ -11604,35 +21309,19 @@ items: "points": false, "renderer": "flot", "seriesOverrides": [ - { - "alias": "/ Receive/", - "stack": "A" - }, - { - "alias": "/ Transmit/", - "stack": "B", - "transform": "negative-Y" - } + ], "spaceLength": 10, "span": 6, - "stack": true, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "instance:node_network_receive_drop_excluding_lo:rate1m{job=\"node-exporter\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} Receive", - "legendLink": "/dashboard/file/node-rsrc-use.json", - "step": 10 - }, - { - "expr": "instance:node_network_transmit_drop_excluding_lo:rate1m{job=\"node-exporter\"}", + "expr": "instance:node_load1_per_cpu:ratio{job=\"node-exporter\", instance=\"$instance\"}", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}} Transmit", - "legendLink": "/dashboard/file/node-rsrc-use.json", + "legendFormat": "Saturation", + "legendLink": null, "step": 10 } ], @@ -11641,7 +21330,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Net Saturation (Drops Receive/Transmit)", + "title": "CPU Saturation (Load1 per CPU)", "tooltip": { "shared": false, "sort": 0, @@ -11659,11 +21348,11 @@ items: }, "yaxes": [ { - "format": "rps", + "format": "percentunit", "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true }, { @@ -11681,7 +21370,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Network", + "title": "CPU", "titleSize": "h6" }, { @@ -11696,8 +21385,8 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 10, - "id": 7, + "fill": 1, + "id": 3, "legend": { "avg": false, "current": false, @@ -11708,7 +21397,7 @@ items: "values": false }, "lines": true, - "linewidth": 0, + "linewidth": 1, "links": [ ], @@ -11722,15 +21411,15 @@ items: ], "spaceLength": 10, "span": 6, - "stack": true, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "(\n instance_device:node_disk_io_time_seconds:rate1m{job=\"node-exporter\"}\n/ ignoring (instance, device) group_left\n count without (instance, device) (instance_device:node_disk_io_time_seconds:rate1m{job=\"node-exporter\"})\n)\n", + "expr": "instance:node_memory_utilisation:ratio{job=\"node-exporter\", job=\"node-exporter\", instance=\"$instance\"}", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}} {{device}}", - "legendLink": "/dashboard/file/node-rsrc-use.json", + "legendFormat": "Memory", + "legendLink": null, "step": 10 } ], @@ -11739,7 +21428,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Disk IO Utilisation", + "title": "Memory Utilisation", "tooltip": { "shared": false, "sort": 0, @@ -11760,7 +21449,7 @@ items: "format": "percentunit", "label": null, "logBase": 1, - "max": 1, + "max": null, "min": 0, "show": true }, @@ -11782,19 +21471,19 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 10, - "id": 8, + "fill": 1, + "id": 4, "legend": { "avg": false, "current": false, "max": false, "min": false, - "show": true, + "show": false, "total": false, "values": false }, "lines": true, - "linewidth": 0, + "linewidth": 1, "links": [ ], @@ -11808,15 +21497,15 @@ items: ], "spaceLength": 10, "span": 6, - "stack": true, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "(\n instance_device:node_disk_io_time_weighted_seconds:rate1m{job=\"node-exporter\"}\n/ ignoring (instance, device) group_left\n count without (instance, device) (instance_device:node_disk_io_time_weighted_seconds:rate1m{job=\"node-exporter\"})\n)\n", + "expr": "instance:node_memory_swap_io_pages:rate1m{job=\"node-exporter\", instance=\"$instance\"}", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}} {{device}}", - "legendLink": "/dashboard/file/node-rsrc-use.json", + "legendFormat": "Swap IO", + "legendLink": null, "step": 10 } ], @@ -11825,7 +21514,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Disk IO Saturation", + "title": "Memory Saturation (pages swapped per second)", "tooltip": { "shared": false, "sort": 0, @@ -11843,10 +21532,10 @@ items: }, "yaxes": [ { - "format": "percentunit", + "format": "short", "label": null, "logBase": 1, - "max": 1, + "max": null, "min": 0, "show": true }, @@ -11865,7 +21554,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Disk IO", + "title": "Memory", "titleSize": "h6" }, { @@ -11880,8 +21569,8 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 10, - "id": 9, + "fill": 1, + "id": 5, "legend": { "avg": false, "current": false, @@ -11892,7 +21581,7 @@ items: "values": false }, "lines": true, - "linewidth": 0, + "linewidth": 1, "links": [ ], @@ -11902,19 +21591,35 @@ items: "points": false, "renderer": "flot", "seriesOverrides": [ - + { + "alias": "/Receive/", + "stack": "A" + }, + { + "alias": "/Transmit/", + "stack": "B", + "transform": "negative-Y" + } ], "spaceLength": 10, - "span": 12, - "stack": true, + "span": 6, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "(\n sum without (device) (\n max without (fstype, mountpoint) (\n node_filesystem_size_bytes{job=\"node-exporter\", fstype!=\"\"} - node_filesystem_avail_bytes{job=\"node-exporter\", fstype!=\"\"}\n )\n ) \n/ ignoring (instance) group_left\n sum without (instance, device) (\n max without (fstype, mountpoint) (\n node_filesystem_size_bytes{job=\"node-exporter\", fstype!=\"\"}\n )\n )\n) \n", + "expr": "instance:node_network_receive_bytes_excluding_lo:rate1m{job=\"node-exporter\", instance=\"$instance\"}", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}}", - "legendLink": "/dashboard/file/node-rsrc-use.json", + "legendFormat": "Receive", + "legendLink": null, + "step": 10 + }, + { + "expr": "instance:node_network_transmit_bytes_excluding_lo:rate1m{job=\"node-exporter\", instance=\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Transmit", + "legendLink": null, "step": 10 } ], @@ -11923,7 +21628,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Disk Space Utilisation", + "title": "Net Utilisation (Bytes Receive/Transmit)", "tooltip": { "shared": false, "sort": 0, @@ -11941,117 +21646,23 @@ items: }, "yaxes": [ { - "format": "percentunit", + "format": "Bps", "label": null, "logBase": 1, - "max": 1, - "min": 0, + "max": null, + "min": null, "show": true }, { "format": "short", "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Disk Space", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - - ], - "templating": { - "list": [ - { - "current": { - "text": "Prometheus", - "value": "Prometheus" - }, - "hide": 0, - "label": null, - "name": "datasource", - "options": [ - - ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "", - "title": "USE Method / Cluster", - "uid": "3e97d1d02672cdd0861f4c97c64f89b2", - "version": 0 - } - kind: ConfigMap - metadata: - name: grafana-dashboard-node-cluster-rsrc-use - namespace: monitoring -- apiVersion: v1 - data: - node-rsrc-use.json: |- - { - "annotations": { - "list": [ - - ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "links": [ - - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, { "aliasColors": { @@ -12061,13 +21672,13 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 1, + "id": 6, "legend": { "avg": false, "current": false, "max": false, "min": false, - "show": false, + "show": true, "total": false, "values": false }, @@ -12082,7 +21693,15 @@ items: "points": false, "renderer": "flot", "seriesOverrides": [ - + { + "alias": "/Receive/", + "stack": "A" + }, + { + "alias": "/Transmit/", + "stack": "B", + "transform": "negative-Y" + } ], "spaceLength": 10, "span": 6, @@ -12090,10 +21709,18 @@ items: "steppedLine": false, "targets": [ { - "expr": "instance:node_cpu_utilisation:rate1m{job=\"node-exporter\", instance=\"$instance\"}", + "expr": "instance:node_network_receive_drop_excluding_lo:rate1m{job=\"node-exporter\", instance=\"$instance\"}", "format": "time_series", "intervalFactor": 2, - "legendFormat": "Utilisation", + "legendFormat": "Receive drops", + "legendLink": null, + "step": 10 + }, + { + "expr": "instance:node_network_transmit_drop_excluding_lo:rate1m{job=\"node-exporter\", instance=\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Transmit drops", "legendLink": null, "step": 10 } @@ -12103,7 +21730,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "CPU Utilisation", + "title": "Net Saturation (Drops Receive/Transmit)", "tooltip": { "shared": false, "sort": 0, @@ -12121,11 +21748,11 @@ items: }, "yaxes": [ { - "format": "percentunit", + "format": "rps", "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true }, { @@ -12137,7 +21764,19 @@ items: "show": false } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Net", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ { "aliasColors": { @@ -12147,13 +21786,13 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 2, + "id": 7, "legend": { "avg": false, "current": false, "max": false, "min": false, - "show": false, + "show": true, "total": false, "values": false }, @@ -12176,10 +21815,10 @@ items: "steppedLine": false, "targets": [ { - "expr": "instance:node_load1_per_cpu:ratio{job=\"node-exporter\", instance=\"$instance\"}", + "expr": "instance_device:node_disk_io_time_seconds:rate1m{job=\"node-exporter\", instance=\"$instance\"}", "format": "time_series", "intervalFactor": 2, - "legendFormat": "Saturation", + "legendFormat": "{{device}}", "legendLink": null, "step": 10 } @@ -12189,7 +21828,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "CPU Saturation (Load1 per CPU)", + "title": "Disk IO Utilisation", "tooltip": { "shared": false, "sort": 0, @@ -12223,19 +21862,7 @@ items: "show": false } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "CPU", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ + }, { "aliasColors": { @@ -12245,7 +21872,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 3, + "id": 8, "legend": { "avg": false, "current": false, @@ -12274,10 +21901,10 @@ items: "steppedLine": false, "targets": [ { - "expr": "instance:node_memory_utilisation:ratio{job=\"node-exporter\", job=\"node-exporter\", instance=\"$instance\"}", + "expr": "instance_device:node_disk_io_time_weighted_seconds:rate1m{job=\"node-exporter\", instance=\"$instance\"}", "format": "time_series", "intervalFactor": 2, - "legendFormat": "Memory", + "legendFormat": "{{device}}", "legendLink": null, "step": 10 } @@ -12287,7 +21914,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Memory Utilisation", + "title": "Disk IO Saturation", "tooltip": { "shared": false, "sort": 0, @@ -12321,7 +21948,19 @@ items: "show": false } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Disk IO", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ { "aliasColors": { @@ -12331,7 +21970,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 4, + "id": 9, "legend": { "avg": false, "current": false, @@ -12355,15 +21994,15 @@ items: ], "spaceLength": 10, - "span": 6, + "span": 12, "stack": false, "steppedLine": false, "targets": [ { - "expr": "instance:node_memory_swap_io_pages:rate1m{job=\"node-exporter\", instance=\"$instance\"}", + "expr": "1 -\n(\n max without (mountpoint, fstype) (node_filesystem_avail_bytes{job=\"node-exporter\", fstype!=\"\", instance=\"$instance\"})\n/\n max without (mountpoint, fstype) (node_filesystem_size_bytes{job=\"node-exporter\", fstype!=\"\", instance=\"$instance\"})\n)\n", "format": "time_series", "intervalFactor": 2, - "legendFormat": "Swap IO", + "legendFormat": "{{device}}", "legendLink": null, "step": 10 } @@ -12373,7 +22012,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Memory Saturation (pages swapped per second)", + "title": "Disk Space Utilisation", "tooltip": { "shared": false, "sort": 0, @@ -12391,7 +22030,7 @@ items: }, "yaxes": [ { - "format": "short", + "format": "percentunit", "label": null, "logBase": 1, "max": null, @@ -12413,12 +22052,128 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Memory", + "title": "Disk Space", "titleSize": "h6" - }, + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "instance", + "multi": false, + "name": "instance", + "options": [ + + ], + "query": "label_values(up{job=\"node-exporter\"}, instance)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "USE Method / Node", + "uid": "fac67cfbe174d3ef53eb473d73d9212f", + "version": 0 + } + kind: ConfigMap + metadata: + name: grafana-dashboard-node-rsrc-use + namespace: monitoring +- apiVersion: v1 + data: + nodes.json: |- + { + "__inputs": [ + + ], + "__requires": [ + + ], + "annotations": { + "list": [ + + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ + + ], + "refresh": "", + "rows": [ { "collapse": false, - "height": "250px", + "collapsed": false, "panels": [ { "aliasColors": { @@ -12429,12 +22184,17 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 5, + "gridPos": { + + }, + "id": 2, "legend": { + "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, + "rightSide": false, "show": true, "total": false, "values": false @@ -12444,42 +22204,27 @@ items: "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/Receive/", - "stack": "A" - }, - { - "alias": "/Transmit/", - "stack": "B", - "transform": "negative-Y" - } + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + ], "spaceLength": 10, "span": 6, - "stack": false, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "instance:node_network_receive_bytes_excluding_lo:rate1m{job=\"node-exporter\", instance=\"$instance\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Receive", - "legendLink": null, - "step": 10 - }, - { - "expr": "instance:node_network_transmit_bytes_excluding_lo:rate1m{job=\"node-exporter\", instance=\"$instance\"}", + "expr": "(\n (1 - rate(node_cpu_seconds_total{job=\"node-exporter\", mode=\"idle\", instance=\"$instance\"}[$__interval]))\n/ ignoring(cpu) group_left\n count without (cpu)( node_cpu_seconds_total{job=\"node-exporter\", mode=\"idle\", instance=\"$instance\"})\n)\n", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Transmit", - "legendLink": null, - "step": 10 + "interval": "1m", + "intervalFactor": 5, + "legendFormat": "{{cpu}}", + "refId": "A" } ], "thresholds": [ @@ -12487,7 +22232,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Net Utilisation (Bytes Receive/Transmit)", + "title": "CPU Usage", "tooltip": { "shared": false, "sort": 0, @@ -12505,20 +22250,20 @@ items: }, "yaxes": [ { - "format": "Bps", + "format": "percentunit", "label": null, "logBase": 1, - "max": null, - "min": null, + "max": 1, + "min": 0, "show": true }, { - "format": "short", + "format": "percentunit", "label": null, "logBase": 1, - "max": null, - "min": null, - "show": false + "max": 1, + "min": 0, + "show": true } ] }, @@ -12530,13 +22275,18 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "id": 6, + "fill": 0, + "gridPos": { + + }, + "id": 3, "legend": { + "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, + "rightSide": false, "show": true, "total": false, "values": false @@ -12546,21 +22296,14 @@ items: "links": [ ], - "nullPointMode": "null as zero", + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, "seriesOverrides": [ - { - "alias": "/Receive/", - "stack": "A" - }, - { - "alias": "/Transmit/", - "stack": "B", - "transform": "negative-Y" - } + ], "spaceLength": 10, "span": 6, @@ -12568,20 +22311,32 @@ items: "steppedLine": false, "targets": [ { - "expr": "instance:node_network_receive_drop_excluding_lo:rate1m{job=\"node-exporter\", instance=\"$instance\"}", + "expr": "node_load1{job=\"node-exporter\", instance=\"$instance\"}", "format": "time_series", "intervalFactor": 2, - "legendFormat": "Receive drops", - "legendLink": null, - "step": 10 + "legendFormat": "1m load average", + "refId": "A" }, { - "expr": "instance:node_network_transmit_drop_excluding_lo:rate1m{job=\"node-exporter\", instance=\"$instance\"}", + "expr": "node_load5{job=\"node-exporter\", instance=\"$instance\"}", "format": "time_series", "intervalFactor": 2, - "legendFormat": "Transmit drops", - "legendLink": null, - "step": 10 + "legendFormat": "5m load average", + "refId": "B" + }, + { + "expr": "node_load15{job=\"node-exporter\", instance=\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "15m load average", + "refId": "C" + }, + { + "expr": "count(node_cpu_seconds_total{job=\"node-exporter\", instance=\"$instance\", mode=\"idle\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "logical cores", + "refId": "D" } ], "thresholds": [ @@ -12589,7 +22344,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Net Saturation (Drops Receive/Transmit)", + "title": "Load Average", "tooltip": { "shared": false, "sort": 0, @@ -12607,11 +22362,11 @@ items: }, "yaxes": [ { - "format": "rps", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true }, { @@ -12619,8 +22374,8 @@ items: "label": null, "logBase": 1, "max": null, - "min": null, - "show": false + "min": 0, + "show": true } ] } @@ -12628,13 +22383,14 @@ items: "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": true, - "title": "Net", - "titleSize": "h6" + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" }, { "collapse": false, - "height": "250px", + "collapsed": false, "panels": [ { "aliasColors": { @@ -12645,12 +22401,17 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 7, + "gridPos": { + + }, + "id": 4, "legend": { + "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, + "rightSide": false, "show": true, "total": false, "values": false @@ -12660,26 +22421,47 @@ items: "links": [ ], - "nullPointMode": "null as zero", + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 6, - "stack": false, + "span": 9, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "instance_device:node_disk_io_time_seconds:rate1m{job=\"node-exporter\", instance=\"$instance\"}", + "expr": "(\n node_memory_MemTotal_bytes{job=\"node-exporter\", instance=\"$instance\"}\n-\n node_memory_MemFree_bytes{job=\"node-exporter\", instance=\"$instance\"}\n-\n node_memory_Buffers_bytes{job=\"node-exporter\", instance=\"$instance\"}\n-\n node_memory_Cached_bytes{job=\"node-exporter\", instance=\"$instance\"}\n)\n", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{device}}", - "legendLink": null, - "step": 10 + "legendFormat": "memory used", + "refId": "A" + }, + { + "expr": "node_memory_Buffers_bytes{job=\"node-exporter\", instance=\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "memory buffers", + "refId": "B" + }, + { + "expr": "node_memory_Cached_bytes{job=\"node-exporter\", instance=\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "memory cached", + "refId": "C" + }, + { + "expr": "node_memory_MemFree_bytes{job=\"node-exporter\", instance=\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "memory free", + "refId": "D" } ], "thresholds": [ @@ -12687,7 +22469,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Disk IO Utilisation", + "title": "Memory Usage", "tooltip": { "shared": false, "sort": 0, @@ -12705,7 +22487,7 @@ items: }, "yaxes": [ { - "format": "percentunit", + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -12713,15 +22495,112 @@ items: "show": true }, { - "format": "short", + "format": "bytes", "label": null, "logBase": 1, "max": null, - "min": null, - "show": false + "min": 0, + "show": true } ] }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "$datasource", + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + + }, + "id": 5, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "100 -\n(\n node_memory_MemAvailable_bytes{job=\"node-exporter\", instance=\"$instance\"}\n/\n node_memory_MemTotal_bytes{job=\"node-exporter\", instance=\"$instance\"}\n* 100\n)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "80, 90", + "title": "Memory Usage", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ { "aliasColors": { @@ -12730,13 +22609,18 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "id": 8, + "fill": 0, + "gridPos": { + + }, + "id": 6, "legend": { + "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, + "rightSide": false, "show": true, "total": false, "values": false @@ -12746,13 +22630,21 @@ items: "links": [ ], - "nullPointMode": "null as zero", + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, "seriesOverrides": [ - + { + "alias": "/ read| written/", + "yaxis": 1 + }, + { + "alias": "/ io time/", + "yaxis": 2 + } ], "spaceLength": 10, "span": 6, @@ -12760,12 +22652,28 @@ items: "steppedLine": false, "targets": [ { - "expr": "instance_device:node_disk_io_time_weighted_seconds:rate1m{job=\"node-exporter\", instance=\"$instance\"}", + "expr": "rate(node_disk_read_bytes_total{job=\"node-exporter\", instance=\"$instance\", device=~\"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+\"}[$__interval])", "format": "time_series", + "interval": "1m", "intervalFactor": 2, - "legendFormat": "{{device}}", - "legendLink": null, - "step": 10 + "legendFormat": "{{device}} read", + "refId": "A" + }, + { + "expr": "rate(node_disk_written_bytes_total{job=\"node-exporter\", instance=\"$instance\", device=~\"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+\"}[$__interval])", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{device}} written", + "refId": "B" + }, + { + "expr": "rate(node_disk_io_time_seconds_total{job=\"node-exporter\", instance=\"$instance\", device=~\"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+\"}[$__interval])", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{device}} io time", + "refId": "C" } ], "thresholds": [ @@ -12773,7 +22681,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Disk IO Saturation", + "title": "Disk I/O", "tooltip": { "shared": false, "sort": 0, @@ -12791,35 +22699,23 @@ items: }, "yaxes": [ { - "format": "percentunit", + "format": "bytes", "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true }, { - "format": "short", + "format": "s", "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Disk IO", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ + }, { "aliasColors": { @@ -12829,13 +22725,18 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 9, + "gridPos": { + + }, + "id": 7, "legend": { + "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, - "show": false, + "rightSide": false, + "show": true, "total": false, "values": false }, @@ -12844,192 +22745,91 @@ items: "links": [ ], - "nullPointMode": "null as zero", + "nullPointMode": "null", "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "1 -\n(\n max without (mountpoint, fstype) (node_filesystem_avail_bytes{job=\"node-exporter\", fstype!=\"\", instance=\"$instance\"})\n/\n max without (mountpoint, fstype) (node_filesystem_size_bytes{job=\"node-exporter\", fstype!=\"\", instance=\"$instance\"})\n)\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{device}}", - "legendLink": null, - "step": 10 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Disk Space Utilisation", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "percentunit", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Disk Space", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - - ], - "templating": { - "list": [ - { - "current": { - "text": "Prometheus", - "value": "Prometheus" - }, - "hide": 0, - "label": null, - "name": "datasource", - "options": [ - - ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "instance", - "multi": false, - "name": "instance", - "options": [ - - ], - "query": "label_values(up{job=\"node-exporter\"}, instance)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "", - "title": "USE Method / Node", - "uid": "fac67cfbe174d3ef53eb473d73d9212f", - "version": 0 - } - kind: ConfigMap - metadata: - name: grafana-dashboard-node-rsrc-use - namespace: monitoring -- apiVersion: v1 - data: - nodes.json: |- - { - "__inputs": [ - - ], - "__requires": [ - - ], - "annotations": { - "list": [ + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + { + "alias": "used", + "color": "#E0B400" + }, + { + "alias": "available", + "color": "#73BF69" + } + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(\n max by (device) (\n node_filesystem_size_bytes{job=\"node-exporter\", instance=\"$instance\", fstype!=\"\"}\n -\n node_filesystem_avail_bytes{job=\"node-exporter\", instance=\"$instance\", fstype!=\"\"}\n )\n)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "used", + "refId": "A" + }, + { + "expr": "sum(\n max by (device) (\n node_filesystem_avail_bytes{job=\"node-exporter\", instance=\"$instance\", fstype!=\"\"}\n )\n)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "available", + "refId": "B" + } + ], + "thresholds": [ - ] - }, - "editable": false, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "id": null, - "links": [ + ], + "timeFrom": null, + "timeShift": null, + "title": "Disk Space Usage", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ - ], - "refresh": "", - "rows": [ + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, { "collapse": false, "collapsed": false, @@ -13042,11 +22842,11 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, + "fill": 0, "gridPos": { }, - "id": 2, + "id": 8, "legend": { "alignAsTable": false, "avg": false, @@ -13074,15 +22874,15 @@ items: ], "spaceLength": 10, "span": 6, - "stack": true, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "(\n (1 - rate(node_cpu_seconds_total{job=\"node-exporter\", mode=\"idle\", instance=\"$instance\"}[$__interval]))\n/ ignoring(cpu) group_left\n count without (cpu)( node_cpu_seconds_total{job=\"node-exporter\", mode=\"idle\", instance=\"$instance\"})\n)\n", + "expr": "rate(node_network_receive_bytes_total{job=\"node-exporter\", instance=\"$instance\", device!=\"lo\"}[$__interval])", "format": "time_series", "interval": "1m", - "intervalFactor": 5, - "legendFormat": "{{cpu}}", + "intervalFactor": 2, + "legendFormat": "{{device}}", "refId": "A" } ], @@ -13091,7 +22891,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "CPU Usage", + "title": "Network Received", "tooltip": { "shared": false, "sort": 0, @@ -13109,18 +22909,18 @@ items: }, "yaxes": [ { - "format": "percentunit", + "format": "bytes", "label": null, "logBase": 1, - "max": 1, + "max": null, "min": 0, "show": true }, { - "format": "percentunit", + "format": "bytes", "label": null, "logBase": 1, - "max": 1, + "max": null, "min": 0, "show": true } @@ -13138,7 +22938,7 @@ items: "gridPos": { }, - "id": 3, + "id": 9, "legend": { "alignAsTable": false, "avg": false, @@ -13170,83 +22970,178 @@ items: "steppedLine": false, "targets": [ { - "expr": "node_load1{job=\"node-exporter\", instance=\"$instance\"}", + "expr": "rate(node_network_transmit_bytes_total{job=\"node-exporter\", instance=\"$instance\", device!=\"lo\"}[$__interval])", "format": "time_series", + "interval": "1m", "intervalFactor": 2, - "legendFormat": "1m load average", + "legendFormat": "{{device}}", "refId": "A" - }, - { - "expr": "node_load5{job=\"node-exporter\", instance=\"$instance\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "5m load average", - "refId": "B" - }, + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Network Transmitted", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ { - "expr": "node_load15{job=\"node-exporter\", instance=\"$instance\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "15m load average", - "refId": "C" + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true }, { - "expr": "count(node_cpu_seconds_total{job=\"node-exporter\", instance=\"$instance\", mode=\"idle\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "logical cores", - "refId": "D" + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true } - ], - "thresholds": [ + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "instance", + "options": [ + + ], + "query": "label_values(node_exporter_build_info{job=\"node-exporter\"}, instance)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Nodes", + "uid": "fa49a4706d07a042595b664c87fb33ea", + "version": 0 + } + kind: ConfigMap + metadata: + name: grafana-dashboard-nodes + namespace: monitoring +- apiVersion: v1 + data: + persistentvolumesusage.json: |- + { + "__inputs": [ - ], - "timeFrom": null, - "timeShift": null, - "title": "Load Average", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + ], + "__requires": [ - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" - }, + ], + "annotations": { + "list": [ + + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ + + ], + "refresh": "", + "rows": [ { "collapse": false, "collapsed": false, @@ -13263,17 +23158,17 @@ items: "gridPos": { }, - "id": 4, + "id": 2, "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, "rightSide": false, "show": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, @@ -13295,32 +23190,18 @@ items: "steppedLine": false, "targets": [ { - "expr": "(\n node_memory_MemTotal_bytes{job=\"node-exporter\", instance=\"$instance\"}\n-\n node_memory_MemFree_bytes{job=\"node-exporter\", instance=\"$instance\"}\n-\n node_memory_Buffers_bytes{job=\"node-exporter\", instance=\"$instance\"}\n-\n node_memory_Cached_bytes{job=\"node-exporter\", instance=\"$instance\"}\n)\n", + "expr": "(\n sum without(instance, node) (kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n -\n sum without(instance, node) (kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n)\n", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "memory used", + "intervalFactor": 1, + "legendFormat": "Used Space", "refId": "A" }, { - "expr": "node_memory_Buffers_bytes{job=\"node-exporter\", instance=\"$instance\"}", + "expr": "sum without(instance, node) (kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "memory buffers", + "intervalFactor": 1, + "legendFormat": "Free Space", "refId": "B" - }, - { - "expr": "node_memory_Cached_bytes{job=\"node-exporter\", instance=\"$instance\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "memory cached", - "refId": "C" - }, - { - "expr": "node_memory_MemFree_bytes{job=\"node-exporter\", instance=\"$instance\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "memory free", - "refId": "D" } ], "thresholds": [ @@ -13328,7 +23209,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Memory Usage", + "title": "Volume Space Usage", "tooltip": { "shared": false, "sort": 0, @@ -13384,7 +23265,7 @@ items: "gridPos": { }, - "id": 5, + "id": 3, "interval": null, "links": [ @@ -13424,7 +23305,7 @@ items: "tableColumn": "", "targets": [ { - "expr": "100 -\n(\n node_memory_MemAvailable_bytes{job=\"node-exporter\", instance=\"$instance\"}\n/\n node_memory_MemTotal_bytes{job=\"node-exporter\", instance=\"$instance\"}\n* 100\n)\n", + "expr": "(\n kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n -\n kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n)\n/\nkubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n* 100\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "", @@ -13432,7 +23313,7 @@ items: } ], "thresholds": "80, 90", - "title": "Memory Usage", + "title": "Volume Space Usage", "tooltip": { "shared": false }, @@ -13468,21 +23349,21 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 0, + "fill": 1, "gridPos": { }, - "id": 6, + "id": 4, "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, "rightSide": false, "show": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, @@ -13496,43 +23377,26 @@ items: "renderer": "flot", "repeat": null, "seriesOverrides": [ - { - "alias": "/ read| written/", - "yaxis": 1 - }, - { - "alias": "/ io time/", - "yaxis": 2 - } + ], "spaceLength": 10, - "span": 6, - "stack": false, + "span": 9, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "rate(node_disk_read_bytes_total{job=\"node-exporter\", instance=\"$instance\", device=~\"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+\"}[$__interval])", + "expr": "sum without(instance, node) (kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{device}} read", + "intervalFactor": 1, + "legendFormat": "Used inodes", "refId": "A" }, { - "expr": "rate(node_disk_written_bytes_total{job=\"node-exporter\", instance=\"$instance\", device=~\"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+\"}[$__interval])", + "expr": "(\n sum without(instance, node) (kubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n -\n sum without(instance, node) (kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n)\n", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{device}} written", + "intervalFactor": 1, + "legendFormat": " Free inodes", "refId": "B" - }, - { - "expr": "rate(node_disk_io_time_seconds_total{job=\"node-exporter\", instance=\"$instance\", device=~\"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+\"}[$__interval])", - "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{device}} io time", - "refId": "C" } ], "thresholds": [ @@ -13540,7 +23404,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Disk I/O", + "title": "Volume inodes Usage", "tooltip": { "shared": false, "sort": 0, @@ -13558,452 +23422,794 @@ items: }, "yaxes": [ { - "format": "bytes", + "format": "none", "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true }, { - "format": "s", + "format": "none", "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true } ] }, { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], "datasource": "$datasource", - "fill": 1, + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, "gridPos": { }, - "id": 7, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, + "id": 5, + "interval": null, "links": [ ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ + "mappingType": 1, + "mappingTypes": [ { - "alias": "used", - "color": "#E0B400" + "name": "value to text", + "value": 1 }, { - "alias": "available", - "color": "#73BF69" + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" } ], - "spaceLength": 10, - "span": 6, - "stack": true, - "steppedLine": false, + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", "targets": [ { - "expr": "sum(\n max by (device) (\n node_filesystem_size_bytes{job=\"node-exporter\", instance=\"$instance\", fstype!=\"\"}\n -\n node_filesystem_avail_bytes{job=\"node-exporter\", instance=\"$instance\", fstype!=\"\"}\n )\n)\n", + "expr": "kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n/\nkubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n* 100\n", "format": "time_series", "intervalFactor": 2, - "legendFormat": "used", + "legendFormat": "", "refId": "A" - }, - { - "expr": "sum(\n max by (device) (\n node_filesystem_avail_bytes{job=\"node-exporter\", instance=\"$instance\", fstype!=\"\"}\n )\n)\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "available", - "refId": "B" } ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Disk Space Usage", + "thresholds": "80, 90", + "title": "Volume inodes Usage", "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] + "shared": false }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true + "op": "=", + "text": "N/A", + "value": "null" } - ] + ], + "valueName": "current" } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(kubelet_volume_stats_capacity_bytes, cluster)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "Namespace", + "multi": false, + "name": "namespace", + "options": [ + + ], + "query": "label_values(kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\"}, namespace)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "PersistentVolumeClaim", + "multi": false, + "name": "volume", + "options": [ + + ], + "query": "label_values(kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\"}, persistentvolumeclaim)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-7d", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Kubernetes / Persistent Volumes", + "uid": "919b92a8e8041bd567af9edab12c840c", + "version": 0 + } + kind: ConfigMap + metadata: + name: grafana-dashboard-persistentvolumesusage + namespace: monitoring +- apiVersion: v1 + data: + pod-total.json: |- + { + "__inputs": [ + + ], + "__requires": [ + + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ + + ], + "panels": [ + { + "collapse": false, + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 2, + "panels": [ + + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Current Bandwidth", "titleSize": "h6", "type": "row" }, { - "collapse": false, - "collapsed": false, - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 0, - "gridPos": { - - }, - "id": 8, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "prometheus", + "decimals": 0, + "format": "time_series", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 1 + }, + "height": 9, + "id": 3, + "interval": null, + "links": [ + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 12, + "nullPointMode": "connected", + "nullText": null, + "options": { + "fieldOptions": { + "calcs": [ + "last" ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "rate(node_network_receive_bytes_total{job=\"node-exporter\", instance=\"$instance\", device!=\"lo\"}[$__interval])", - "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{device}}", - "refId": "A" - } - ], - "thresholds": [ + "defaults": { + "max": 10000000000, + "min": 0, + "title": "$namespace: $pod", + "unit": "Bps" + }, + "mappings": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Network Received", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + "override": { - ] }, - "yaxes": [ + "thresholds": [ { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true + "color": "dark-green", + "index": 0, + "value": null }, { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - } - ] - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 0, - "gridPos": { - - }, - "id": 9, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ + "color": "dark-yellow", + "index": 1, + "value": 5000000000 + }, { - "expr": "rate(node_network_transmit_bytes_total{job=\"node-exporter\", instance=\"$instance\", device!=\"lo\"}[$__interval])", - "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{device}}", - "refId": "A" + "color": "dark-red", + "index": 2, + "value": 7000000000 } ], - "thresholds": [ + "values": false + } + }, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 12, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution]))", + "format": "time_series", + "instant": null, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "timeFrom": null, + "timeShift": null, + "title": "Current Rate of Bytes Received", + "type": "gauge", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "prometheus", + "decimals": 0, + "format": "time_series", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 1 + }, + "height": 9, + "id": 4, + "interval": null, + "links": [ + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 12, + "nullPointMode": "connected", + "nullText": null, + "options": { + "fieldOptions": { + "calcs": [ + "last" ], - "timeFrom": null, - "timeShift": null, - "title": "Network Transmitted", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" + "defaults": { + "max": 10000000000, + "min": 0, + "title": "$namespace: $pod", + "unit": "Bps" }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + "mappings": [ + + ], + "override": { - ] }, - "yaxes": [ + "thresholds": [ { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true + "color": "dark-green", + "index": 0, + "value": null }, { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true + "color": "dark-yellow", + "index": 1, + "value": 5000000000 + }, + { + "color": "dark-red", + "index": 2, + "value": 7000000000 } - ] + ], + "values": false + } + }, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" } ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - - ], - "templating": { - "list": [ - { - "current": { - "text": "Prometheus", - "value": "Prometheus" - }, - "hide": 0, - "label": null, - "name": "datasource", - "options": [ + "span": 12, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution]))", + "format": "time_series", + "instant": null, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "timeFrom": null, + "timeShift": null, + "title": "Current Rate of Bytes Transmitted", + "type": "gauge", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "collapse": false, + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 10 + }, + "id": 5, + "panels": [ + + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Bandwidth", + "titleSize": "h6", + "type": "row" + }, + { + "aliasColors": { - ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" }, - { - "allValue": null, - "current": { + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 2, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 11 + }, + "id": 6, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [ - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": null, - "multi": false, - "name": "instance", - "options": [ + ], + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ - ], - "query": "label_values(node_exporter_build_info{job=\"node-exporter\"}, instance)", - "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{pod}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ - ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "", - "title": "Nodes", - "uid": "fa49a4706d07a042595b664c87fb33ea", - "version": 0 - } - kind: ConfigMap - metadata: - name: grafana-dashboard-nodes - namespace: monitoring -- apiVersion: v1 - data: - persistentvolumesusage.json: |- - { - "__inputs": [ + ], + "timeFrom": null, + "timeShift": null, + "title": "Receive Bandwidth", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { - ], - "__requires": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 2, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 11 + }, + "id": 7, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [ - ], - "annotations": { - "list": [ + ], + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ - ] - }, - "editable": false, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "id": null, - "links": [ + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{pod}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ - ], - "refresh": "", - "rows": [ + ], + "timeFrom": null, + "timeShift": null, + "title": "Transmit Bandwidth", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, { - "collapse": false, - "collapsed": false, + "collapse": true, + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 20 + }, + "id": 8, "panels": [ { "aliasColors": { @@ -14012,29 +24218,36 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", - "fill": 1, + "datasource": "prometheus", + "fill": 2, "gridPos": { - + "h": 10, + "w": 12, + "x": 0, + "y": 21 }, - "id": 2, + "id": 9, "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": true, + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, "rightSide": false, "show": true, "total": false, - "values": true + "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 2, "links": [ ], - "nullPointMode": "null", + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, @@ -14044,23 +24257,17 @@ items: ], "spaceLength": 10, - "span": 9, + "span": 12, "stack": true, "steppedLine": false, "targets": [ { - "expr": "(\n sum without(instance, node) (kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n -\n sum without(instance, node) (kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n)\n", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Used Space", - "refId": "A" - }, - { - "expr": "sum without(instance, node) (kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n", + "expr": "sum(irate(container_network_receive_packets_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)", "format": "time_series", "intervalFactor": 1, - "legendFormat": "Free Space", - "refId": "B" + "legendFormat": "{{pod}}", + "refId": "A", + "step": 10 } ], "thresholds": [ @@ -14068,10 +24275,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Volume Space Usage", + "title": "Rate of Received Packets", "tooltip": { - "shared": false, - "sort": 0, + "shared": true, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -14086,7 +24293,7 @@ items: }, "yaxes": [ { - "format": "bytes", + "format": "pps", "label": null, "logBase": 1, "max": null, @@ -14094,7 +24301,7 @@ items: "show": true }, { - "format": "bytes", + "format": "pps", "label": null, "logBase": 1, "max": null, @@ -14104,101 +24311,123 @@ items: ] }, { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": "$datasource", - "format": "percent", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": true, - "thresholdLabels": false, - "thresholdMarkers": true + "aliasColors": { + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 2, "gridPos": { - + "h": 10, + "w": 12, + "x": 12, + "y": 21 }, - "id": 3, - "interval": null, + "id": 10, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, "links": [ ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, + "minSpan": 12, "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, "targets": [ { - "expr": "(\n kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n -\n kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n)\n/\nkubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n* 100\n", + "expr": "sum(irate(container_network_transmit_packets_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{pod}}", + "refId": "A", + "step": 10 } ], - "thresholds": "80, 90", - "title": "Volume Space Usage", + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Transmitted Packets", "tooltip": { - "shared": false + "shared": true, + "sort": 2, + "value_type": "individual" }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ { - "op": "=", - "text": "N/A", - "value": "null" + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true } - ], - "valueName": "current" + ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", + "showTitle": true, + "title": "Packets", "titleSize": "h6", "type": "row" }, { - "collapse": false, - "collapsed": false, + "collapse": true, + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 21 + }, + "id": 11, "panels": [ { "aliasColors": { @@ -14207,29 +24436,36 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", - "fill": 1, + "datasource": "prometheus", + "fill": 2, "gridPos": { - + "h": 10, + "w": 12, + "x": 0, + "y": 32 }, - "id": 4, + "id": 12, "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": true, + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, "rightSide": false, "show": true, "total": false, - "values": true + "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 2, "links": [ ], - "nullPointMode": "null", + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, @@ -14239,23 +24475,17 @@ items: ], "spaceLength": 10, - "span": 9, + "span": 12, "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum without(instance, node) (kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Used inodes", - "refId": "A" - }, - { - "expr": "(\n sum without(instance, node) (kubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n -\n sum without(instance, node) (kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n)\n", + "expr": "sum(irate(container_network_receive_packets_dropped_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)", "format": "time_series", "intervalFactor": 1, - "legendFormat": " Free inodes", - "refId": "B" + "legendFormat": "{{pod}}", + "refId": "A", + "step": 10 } ], "thresholds": [ @@ -14263,10 +24493,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Volume inodes Usage", + "title": "Rate of Received Packets Dropped", "tooltip": { - "shared": false, - "sort": 0, + "shared": true, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -14281,7 +24511,7 @@ items: }, "yaxes": [ { - "format": "none", + "format": "pps", "label": null, "logBase": 1, "max": null, @@ -14289,7 +24519,7 @@ items: "show": true }, { - "format": "none", + "format": "pps", "label": null, "logBase": 1, "max": null, @@ -14299,140 +24529,181 @@ items: ] }, { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": "$datasource", - "format": "percent", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": true, - "thresholdLabels": false, - "thresholdMarkers": true + "aliasColors": { + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 2, "gridPos": { - + "h": 10, + "w": 12, + "x": 12, + "y": 32 }, - "id": 5, - "interval": null, + "id": 13, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, "links": [ ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, + "minSpan": 12, "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, "targets": [ { - "expr": "kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n/\nkubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n* 100\n", + "expr": "sum(irate(container_network_transmit_packets_dropped_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{pod}}", + "refId": "A", + "step": 10 } ], - "thresholds": "80, 90", - "title": "Volume inodes Usage", + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Transmitted Packets Dropped", "tooltip": { - "shared": false + "shared": true, + "sort": 2, + "value_type": "individual" }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ { - "op": "=", - "text": "N/A", - "value": "null" + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true } - ], - "valueName": "current" + ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", + "showTitle": true, + "title": "Errors", "titleSize": "h6", "type": "row" } ], - "schemaVersion": 14, + "refresh": "30s", + "rows": [ + + ], + "schemaVersion": 18, "style": "dark", "tags": [ - "kubernetes-mixin" + ], "templating": { "list": [ { + "allValue": ".+", + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { - "text": "Prometheus", - "value": "Prometheus" + "text": "kube-system", + "value": "kube-system" }, + "datasource": "prometheus", + "definition": "label_values(container_network_receive_packets_total, namespace)", "hide": 0, + "includeAll": true, "label": null, - "name": "datasource", + "multi": false, + "name": "namespace", "options": [ ], - "query": "prometheus", + "query": "label_values(container_network_receive_packets_total, namespace)", "refresh": 1, "regex": "", - "type": "datasource" + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false }, { - "allValue": null, + "allValue": ".+", + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { - + "text": "", + "value": "" }, - "datasource": "$datasource", - "hide": 2, + "datasource": "prometheus", + "definition": "label_values(container_network_receive_packets_total{namespace=~\"$namespace\"}, pod)", + "hide": 0, "includeAll": false, - "label": "cluster", + "label": null, "multi": false, - "name": "cluster", + "name": "pod", "options": [ ], - "query": "label_values(kubelet_volume_stats_capacity_bytes, cluster)", - "refresh": 2, + "query": "label_values(container_network_receive_packets_total{namespace=~\"$namespace\"}, pod)", + "refresh": 1, "regex": "", - "sort": 0, + "skipUrlSync": false, + "sort": 1, "tagValuesQuery": "", "tags": [ @@ -14443,60 +24714,88 @@ items: }, { "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { - + "text": "5m", + "value": "5m" }, - "datasource": "$datasource", + "datasource": "prometheus", "hide": 0, "includeAll": false, - "label": "Namespace", + "label": null, "multi": false, - "name": "namespace", + "name": "resolution", "options": [ - + { + "selected": false, + "text": "30s", + "value": "30s" + }, + { + "selected": true, + "text": "5m", + "value": "5m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + } ], - "query": "label_values(kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\"}, namespace)", + "query": "30s,5m,1h", "refresh": 2, "regex": "", - "sort": 0, + "skipUrlSync": false, + "sort": 1, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", - "type": "query", + "type": "interval", "useTags": false }, { "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { - + "text": "5m", + "value": "5m" }, - "datasource": "$datasource", - "hide": 0, + "datasource": "prometheus", + "hide": 2, "includeAll": false, - "label": "PersistentVolumeClaim", + "label": null, "multi": false, - "name": "volume", + "name": "interval", "options": [ - + { + "selected": true, + "text": "4h", + "value": "4h" + } ], - "query": "label_values(kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\"}, persistentvolumeclaim)", + "query": "4h", "refresh": 2, "regex": "", - "sort": 0, + "skipUrlSync": false, + "sort": 1, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", - "type": "query", + "type": "interval", "useTags": false } ] }, "time": { - "from": "now-7d", + "from": "now-1h", "to": "now" }, "timepicker": { @@ -14525,13 +24824,13 @@ items: ] }, "timezone": "", - "title": "Kubernetes / Persistent Volumes", - "uid": "919b92a8e8041bd567af9edab12c840c", + "title": "Kubernetes / Networking / Pod", + "uid": "7a18067ce943a40ae25454675c19ff5c", "version": 0 } kind: ConfigMap metadata: - name: grafana-dashboard-persistentvolumesusage + name: grafana-dashboard-pod-total namespace: monitoring - apiVersion: v1 data: @@ -15070,7 +25369,7 @@ items: "query": "label_values(kube_pod_info, cluster)", "refresh": 2, "regex": "", - "sort": 0, + "sort": 1, "tagValuesQuery": "", "tags": [ @@ -15096,7 +25395,7 @@ items: "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)", "refresh": 2, "regex": "", - "sort": 0, + "sort": 1, "tagValuesQuery": "", "tags": [ @@ -15122,7 +25421,7 @@ items: "query": "label_values(kube_pod_info{cluster=\"$cluster\", namespace=~\"$namespace\"}, pod)", "refresh": 2, "regex": "", - "sort": 0, + "sort": 1, "tagValuesQuery": "", "tags": [ @@ -15148,7 +25447,7 @@ items: "query": "label_values(kube_pod_container_info{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}, container)", "refresh": 2, "regex": "", - "sort": 0, + "sort": 1, "tagValuesQuery": "", "tags": [ @@ -18509,7 +28808,7 @@ items: "query": "label_values(kubeproxy_network_programming_duration_seconds_bucket{job=\"kube-proxy\"}, instance)", "refresh": 2, "regex": "", - "sort": 0, + "sort": 1, "tagValuesQuery": "", "tags": [ @@ -19551,7 +29850,7 @@ items: "query": "label_values(process_cpu_seconds_total{job=\"kube-scheduler\"}, instance)", "refresh": 2, "regex": "", - "sort": 0, + "sort": 1, "tagValuesQuery": "", "tags": [ @@ -19650,7 +29949,270 @@ items: "gridPos": { }, - "id": 2, + "id": 2, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "cores", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 4, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(rate(container_cpu_usage_seconds_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$statefulset.*\"}[3m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "CPU", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "0", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + + }, + "id": 3, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "GB", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 4, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(container_memory_usage_bytes{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$statefulset.*\"}) / 1024^3", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Memory", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "0", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + + }, + "id": 4, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "Bps", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 4, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(rate(container_network_transmit_bytes_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$statefulset.*\"}[3m])) + sum(rate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\",pod=~\"$statefulset.*\"}[3m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Network", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "0", + "value": "null" + } + ], + "valueName": "current" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "height": "100px", + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + + }, + "id": 5, "interval": null, "links": [ @@ -19669,7 +30231,7 @@ items: "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, - "postfix": "cores", + "postfix": "", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", @@ -19680,16 +30242,17 @@ items: "to": "null" } ], - "span": 4, + "span": 3, "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, "lineColor": "rgb(31, 120, 193)", - "show": true + "show": false }, "tableColumn": "", "targets": [ { - "expr": "sum(rate(container_cpu_usage_seconds_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$statefulset.*\"}[3m]))", + "expr": "max(kube_statefulset_replicas{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", statefulset=\"$statefulset\"}) without (instance, pod)", "format": "time_series", "intervalFactor": 2, "legendFormat": "", @@ -19697,7 +30260,7 @@ items: } ], "thresholds": "", - "title": "CPU", + "title": "Desired Replicas", "tooltip": { "shared": false }, @@ -19733,7 +30296,7 @@ items: "gridPos": { }, - "id": 3, + "id": 6, "interval": null, "links": [ @@ -19752,7 +30315,7 @@ items: "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, - "postfix": "GB", + "postfix": "", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", @@ -19763,16 +30326,17 @@ items: "to": "null" } ], - "span": 4, + "span": 3, "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, "lineColor": "rgb(31, 120, 193)", - "show": true + "show": false }, "tableColumn": "", "targets": [ { - "expr": "sum(container_memory_usage_bytes{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$statefulset.*\"}) / 1024^3", + "expr": "min(kube_statefulset_status_replicas_current{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", statefulset=\"$statefulset\"}) without (instance, pod)", "format": "time_series", "intervalFactor": 2, "legendFormat": "", @@ -19780,7 +30344,7 @@ items: } ], "thresholds": "", - "title": "Memory", + "title": "Replicas of current version", "tooltip": { "shared": false }, @@ -19816,430 +30380,1135 @@ items: "gridPos": { }, - "id": 4, + "id": 7, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "max(kube_statefulset_status_observed_generation{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", statefulset=\"$statefulset\"}) without (instance, pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Observed Generation", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "0", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + + }, + "id": 8, "interval": null, "links": [ ], - "mappingType": 1, - "mappingTypes": [ + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "max(kube_statefulset_metadata_generation{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Metadata Generation", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "0", + "value": "null" + } + ], + "valueName": "current" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 9, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ { - "name": "value to text", - "value": 1 + "expr": "max(kube_statefulset_replicas{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "replicas specified", + "refId": "A" }, { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "Bps", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ + "expr": "max(kube_statefulset_status_replicas{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "replicas created", + "refId": "B" + }, { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 4, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "lineColor": "rgb(31, 120, 193)", - "show": true - }, - "tableColumn": "", - "targets": [ + "expr": "min(kube_statefulset_status_replicas_ready{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "ready", + "refId": "C" + }, { - "expr": "sum(rate(container_network_transmit_bytes_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$statefulset.*\"}[3m])) + sum(rate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\",pod=~\"$statefulset.*\"}[3m]))", + "expr": "min(kube_statefulset_status_replicas_current{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "", - "refId": "A" + "legendFormat": "replicas of current version", + "refId": "D" + }, + { + "expr": "min(kube_statefulset_status_replicas_updated{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "updated", + "refId": "E" } ], - "thresholds": "", - "title": "Network", + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Replicas", "tooltip": { - "shared": false + "shared": false, + "sort": 0, + "value_type": "individual" }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ { - "op": "=", - "text": "0", - "value": "null" + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true } - ], - "valueName": "current" + ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(kube_statefulset_metadata_generation, cluster)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "Namespace", + "multi": false, + "name": "namespace", + "options": [ + + ], + "query": "label_values(kube_statefulset_metadata_generation{job=\"kube-state-metrics\", cluster=\"$cluster\"}, namespace)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "Name", + "multi": false, + "name": "statefulset", + "options": [ + + ], + "query": "label_values(kube_statefulset_metadata_generation{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\"}, statefulset)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Kubernetes / StatefulSets", + "uid": "a31c1f46e6f727cb37c0d731a7245005", + "version": 0 + } + kind: ConfigMap + metadata: + name: grafana-dashboard-statefulset + namespace: monitoring +- apiVersion: v1 + data: + workload-total.json: |- + { + "__inputs": [ + + ], + "__requires": [ + + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ + + ], + "panels": [ + { + "collapse": false, + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 2, + "panels": [ + + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Current Bandwidth", "titleSize": "h6", "type": "row" }, { - "collapse": false, - "collapsed": false, - "height": "100px", + "aliasColors": { + + }, + "breakpoint": "50%", + "cacheTimeout": null, + "combine": { + "label": "Others", + "threshold": 0 + }, + "datasource": "prometheus", + "fontSize": "80%", + "format": "Bps", + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 1 + }, + "id": 3, + "interval": null, + "legend": { + "percentage": true, + "percentageDecimals": null, + "show": true, + "values": true + }, + "legendType": "Right side", + "maxDataPoints": 3, + "nullPointMode": "connected", + "pieType": "donut", + "targets": [ + { + "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", + "format": "time_series", + "instant": null, + "intervalFactor": 1, + "legendFormat": "{{pod}}", + "refId": "A" + } + ], + "title": "Current Rate of Bytes Received", + "type": "grafana-piechart-panel", + "valueName": "current" + }, + { + "aliasColors": { + + }, + "breakpoint": "50%", + "cacheTimeout": null, + "combine": { + "label": "Others", + "threshold": 0 + }, + "datasource": "prometheus", + "fontSize": "80%", + "format": "Bps", + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 1 + }, + "id": 4, + "interval": null, + "legend": { + "percentage": true, + "percentageDecimals": null, + "show": true, + "values": true + }, + "legendType": "Right side", + "maxDataPoints": 3, + "nullPointMode": "connected", + "pieType": "donut", + "targets": [ + { + "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", + "format": "time_series", + "instant": null, + "intervalFactor": 1, + "legendFormat": "{{pod}}", + "refId": "A" + } + ], + "title": "Current Rate of Bytes Transmitted", + "type": "grafana-piechart-panel", + "valueName": "current" + }, + { + "collapse": true, + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 10 + }, + "id": 5, "panels": [ { + "aliasColors": { + + }, + "breakpoint": "50%", "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true + "combine": { + "label": "Others", + "threshold": 0 }, + "datasource": "prometheus", + "fontSize": "80%", + "format": "Bps", "gridPos": { - + "h": 9, + "w": 12, + "x": 0, + "y": 11 }, - "id": 5, + "id": 6, "interval": null, - "links": [ - - ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false + "legend": { + "percentage": true, + "percentageDecimals": null, + "show": true, + "values": true }, - "tableColumn": "", + "legendType": "Right side", + "maxDataPoints": 3, + "nullPointMode": "connected", + "pieType": "donut", "targets": [ { - "expr": "max(kube_statefulset_replicas{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", statefulset=\"$statefulset\"}) without (instance, pod)", + "expr": "sort_desc(avg(irate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", + "instant": null, + "intervalFactor": 1, + "legendFormat": "{{pod}}", "refId": "A" } ], - "thresholds": "", - "title": "Desired Replicas", - "tooltip": { - "shared": false - }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "0", - "value": "null" - } - ], + "title": "Average Rate of Bytes Received", + "type": "grafana-piechart-panel", "valueName": "current" }, { + "aliasColors": { + + }, + "breakpoint": "50%", "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true + "combine": { + "label": "Others", + "threshold": 0 }, + "datasource": "prometheus", + "fontSize": "80%", + "format": "Bps", "gridPos": { - + "h": 9, + "w": 12, + "x": 12, + "y": 11 }, - "id": 6, + "id": 7, "interval": null, - "links": [ - - ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false + "legend": { + "percentage": true, + "percentageDecimals": null, + "show": true, + "values": true }, - "tableColumn": "", + "legendType": "Right side", + "maxDataPoints": 3, + "nullPointMode": "connected", + "pieType": "donut", "targets": [ { - "expr": "min(kube_statefulset_status_replicas_current{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", statefulset=\"$statefulset\"}) without (instance, pod)", + "expr": "sort_desc(avg(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", + "instant": null, + "intervalFactor": 1, + "legendFormat": "{{pod}}", "refId": "A" } ], - "thresholds": "", - "title": "Replicas of current version", - "tooltip": { - "shared": false - }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "0", - "value": "null" - } - ], + "title": "Average Rate of Bytes Transmitted", + "type": "grafana-piechart-panel", "valueName": "current" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Average Bandwidth", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 11 + }, + "id": 8, + "panels": [ + + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Bandwidth HIstory", + "titleSize": "h6", + "type": "row" + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 2, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 12 + }, + "id": 9, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [ + + ], + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{pod}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Receive Bandwidth", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true }, { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 2, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 12 + }, + "id": 10, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [ + + ], + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{pod}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Transmit Bandwidth", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "collapse": true, + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 21 + }, + "id": 11, + "panels": [ + { + "aliasColors": { + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 2, "gridPos": { - + "h": 9, + "w": 12, + "x": 0, + "y": 22 }, - "id": 7, - "interval": null, + "id": 12, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, "links": [ ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, + "minSpan": 12, "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, "targets": [ { - "expr": "max(kube_statefulset_status_observed_generation{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", statefulset=\"$statefulset\"}) without (instance, pod)", + "expr": "sort_desc(sum(irate(container_network_receive_packets_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{pod}}", + "refId": "A", + "step": 10 } ], - "thresholds": "", - "title": "Observed Generation", + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Received Packets", "tooltip": { - "shared": false + "shared": true, + "sort": 2, + "value_type": "individual" }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ { - "op": "=", - "text": "0", - "value": "null" + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true } - ], - "valueName": "current" + ] }, { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true + "aliasColors": { + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 2, "gridPos": { - + "h": 9, + "w": 12, + "x": 12, + "y": 22 }, - "id": 8, - "interval": null, + "id": 13, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, "links": [ ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, + "minSpan": 12, "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, "targets": [ { - "expr": "max(kube_statefulset_metadata_generation{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)", + "expr": "sort_desc(sum(irate(container_network_transmit_packets_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{pod}}", + "refId": "A", + "step": 10 } ], - "thresholds": "", - "title": "Metadata Generation", + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Transmitted Packets", "tooltip": { - "shared": false + "shared": true, + "sort": 2, + "value_type": "individual" }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ { - "op": "=", - "text": "0", - "value": "null" + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true } - ], - "valueName": "current" + ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", + "showTitle": true, + "title": "Packets", "titleSize": "h6", "type": "row" }, { - "collapse": false, - "collapsed": false, + "collapse": true, + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 22 + }, + "id": 14, "panels": [ { "aliasColors": { @@ -20248,16 +31517,21 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", - "fill": 1, + "datasource": "prometheus", + "fill": 2, "gridPos": { - + "h": 9, + "w": 12, + "x": 0, + "y": 23 }, - "id": 9, + "id": 15, "legend": { "alignAsTable": false, "avg": false, "current": false, + "hideEmpty": true, + "hideZero": true, "max": false, "min": false, "rightSide": false, @@ -20266,11 +31540,13 @@ items: "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 2, "links": [ ], - "nullPointMode": "null", + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, @@ -20280,43 +31556,116 @@ items: ], "spaceLength": 10, - "stack": false, + "span": 12, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "max(kube_statefulset_replicas{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "replicas specified", - "refId": "A" - }, - { - "expr": "max(kube_statefulset_status_replicas{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)", + "expr": "sort_desc(sum(irate(container_network_receive_packets_dropped_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "replicas created", - "refId": "B" - }, + "intervalFactor": 1, + "legendFormat": "{{pod}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Received Packets Dropped", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ { - "expr": "min(kube_statefulset_status_replicas_ready{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "ready", - "refId": "C" + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true }, { - "expr": "min(kube_statefulset_status_replicas_current{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "replicas of current version", - "refId": "D" - }, + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 2, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 23 + }, + "id": 16, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [ + + ], + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ { - "expr": "min(kube_statefulset_status_replicas_updated{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)", + "expr": "sort_desc(sum(irate(container_network_transmit_packets_dropped_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "updated", - "refId": "E" + "intervalFactor": 1, + "legendFormat": "{{pod}}", + "refId": "A", + "step": 10 } ], "thresholds": [ @@ -20324,10 +31673,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Replicas", + "title": "Rate of Transmitted Packets Dropped", "tooltip": { - "shared": false, - "sort": 0, + "shared": true, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -20342,19 +31691,19 @@ items: }, "yaxes": [ { - "format": "short", + "format": "pps", "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true }, { - "format": "short", + "format": "pps", "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true } ] @@ -20363,53 +31712,79 @@ items: "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", + "showTitle": true, + "title": "Errors", "titleSize": "h6", "type": "row" } ], - "schemaVersion": 14, + "refresh": "30s", + "rows": [ + + ], + "schemaVersion": 18, "style": "dark", "tags": [ - "kubernetes-mixin" + ], "templating": { "list": [ { + "allValue": ".+", + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { - "text": "Prometheus", - "value": "Prometheus" + "text": "kube-system", + "value": "kube-system" }, + "datasource": "prometheus", + "definition": "label_values(container_network_receive_packets_total, namespace)", "hide": 0, + "includeAll": true, "label": null, - "name": "datasource", + "multi": false, + "name": "namespace", "options": [ ], - "query": "prometheus", + "query": "label_values(container_network_receive_packets_total, namespace)", "refresh": 1, "regex": "", - "type": "datasource" + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false }, { "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { - + "text": "", + "value": "" }, - "datasource": "$datasource", - "hide": 2, + "datasource": "prometheus", + "definition": "label_values(mixin_pod_workload{namespace=~\"$namespace\"}, workload)", + "hide": 0, "includeAll": false, - "label": "cluster", + "label": null, "multi": false, - "name": "cluster", + "name": "workload", "options": [ ], - "query": "label_values(kube_statefulset_metadata_generation, cluster)", - "refresh": 2, + "query": "label_values(mixin_pod_workload{namespace=~\"$namespace\"}, workload)", + "refresh": 1, "regex": "", - "sort": 0, + "skipUrlSync": false, + "sort": 1, "tagValuesQuery": "", "tags": [ @@ -20420,21 +31795,27 @@ items: }, { "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { - + "text": "deployment", + "value": "deployment" }, - "datasource": "$datasource", + "datasource": "prometheus", + "definition": "label_values(mixin_pod_workload{namespace=~\"$namespace\", workload=~\"$workload\"}, workload_type)", "hide": 0, "includeAll": false, - "label": "Namespace", + "label": null, "multi": false, - "name": "namespace", + "name": "type", "options": [ ], - "query": "label_values(kube_statefulset_metadata_generation{job=\"kube-state-metrics\", cluster=\"$cluster\"}, namespace)", - "refresh": 2, + "query": "label_values(mixin_pod_workload{namespace=~\"$namespace\", workload=~\"$workload\"}, workload_type)", + "refresh": 1, "regex": "", + "skipUrlSync": false, "sort": 0, "tagValuesQuery": "", "tags": [ @@ -20446,28 +31827,82 @@ items: }, { "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { - + "text": "5m", + "value": "5m" }, - "datasource": "$datasource", + "datasource": "prometheus", "hide": 0, "includeAll": false, - "label": "Name", + "label": null, "multi": false, - "name": "statefulset", + "name": "resolution", "options": [ + { + "selected": false, + "text": "30s", + "value": "30s" + }, + { + "selected": true, + "text": "5m", + "value": "5m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + } + ], + "query": "30s,5m,1h", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], - "query": "label_values(kube_statefulset_metadata_generation{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\"}, statefulset)", + "tagsQuery": "", + "type": "interval", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "5m", + "value": "5m" + }, + "datasource": "prometheus", + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "interval", + "options": [ + { + "selected": true, + "text": "4h", + "value": "4h" + } + ], + "query": "4h", "refresh": 2, "regex": "", - "sort": 0, + "skipUrlSync": false, + "sort": 1, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", - "type": "query", + "type": "interval", "useTags": false } ] @@ -20502,12 +31937,12 @@ items: ] }, "timezone": "", - "title": "Kubernetes / StatefulSets", - "uid": "a31c1f46e6f727cb37c0d731a7245005", + "title": "Kubernetes / Networking / Workload", + "uid": "728bf77cc1166d2f3133bf25846876cc", "version": 0 } kind: ConfigMap metadata: - name: grafana-dashboard-statefulset + name: grafana-dashboard-workload-total namespace: monitoring kind: ConfigMapList diff --git a/manifests/grafana-deployment.yaml b/manifests/grafana-deployment.yaml index e4bd4d734242e9e9b180855fb02f2ead12b2d9de..aec58617ea8312dc7538f759193528ec0bca7839 100644 --- a/manifests/grafana-deployment.yaml +++ b/manifests/grafana-deployment.yaml @@ -45,6 +45,9 @@ spec: - mountPath: /grafana-dashboard-definitions/0/apiserver name: grafana-dashboard-apiserver readOnly: false + - mountPath: /grafana-dashboard-definitions/0/cluster-total + name: grafana-dashboard-cluster-total + readOnly: false - mountPath: /grafana-dashboard-definitions/0/controller-manager name: grafana-dashboard-controller-manager readOnly: false @@ -69,6 +72,12 @@ spec: - mountPath: /grafana-dashboard-definitions/0/kubelet name: grafana-dashboard-kubelet readOnly: false + - mountPath: /grafana-dashboard-definitions/0/namespace-by-pod + name: grafana-dashboard-namespace-by-pod + readOnly: false + - mountPath: /grafana-dashboard-definitions/0/namespace-by-workload + name: grafana-dashboard-namespace-by-workload + readOnly: false - mountPath: /grafana-dashboard-definitions/0/node-cluster-rsrc-use name: grafana-dashboard-node-cluster-rsrc-use readOnly: false @@ -81,6 +90,9 @@ spec: - mountPath: /grafana-dashboard-definitions/0/persistentvolumesusage name: grafana-dashboard-persistentvolumesusage readOnly: false + - mountPath: /grafana-dashboard-definitions/0/pod-total + name: grafana-dashboard-pod-total + readOnly: false - mountPath: /grafana-dashboard-definitions/0/pods name: grafana-dashboard-pods readOnly: false @@ -99,6 +111,9 @@ spec: - mountPath: /grafana-dashboard-definitions/0/statefulset name: grafana-dashboard-statefulset readOnly: false + - mountPath: /grafana-dashboard-definitions/0/workload-total + name: grafana-dashboard-workload-total + readOnly: false nodeSelector: beta.kubernetes.io/os: linux securityContext: @@ -117,6 +132,9 @@ spec: - configMap: name: grafana-dashboard-apiserver name: grafana-dashboard-apiserver + - configMap: + name: grafana-dashboard-cluster-total + name: grafana-dashboard-cluster-total - configMap: name: grafana-dashboard-controller-manager name: grafana-dashboard-controller-manager @@ -141,6 +159,12 @@ spec: - configMap: name: grafana-dashboard-kubelet name: grafana-dashboard-kubelet + - configMap: + name: grafana-dashboard-namespace-by-pod + name: grafana-dashboard-namespace-by-pod + - configMap: + name: grafana-dashboard-namespace-by-workload + name: grafana-dashboard-namespace-by-workload - configMap: name: grafana-dashboard-node-cluster-rsrc-use name: grafana-dashboard-node-cluster-rsrc-use @@ -153,6 +177,9 @@ spec: - configMap: name: grafana-dashboard-persistentvolumesusage name: grafana-dashboard-persistentvolumesusage + - configMap: + name: grafana-dashboard-pod-total + name: grafana-dashboard-pod-total - configMap: name: grafana-dashboard-pods name: grafana-dashboard-pods @@ -171,3 +198,6 @@ spec: - configMap: name: grafana-dashboard-statefulset name: grafana-dashboard-statefulset + - configMap: + name: grafana-dashboard-workload-total + name: grafana-dashboard-workload-total diff --git a/manifests/prometheus-rules.yaml b/manifests/prometheus-rules.yaml index 007b354880fadaf9e0bb140ded29cc54a0af55dd..88a02050211c6790534f0a6d5bb07b528dd2dfbc 100644 --- a/manifests/prometheus-rules.yaml +++ b/manifests/prometheus-rules.yaml @@ -79,6 +79,22 @@ spec: rate(container_cpu_usage_seconds_total{job="kubelet", image!="", container!="POD"}[5m]) ) * on (namespace, pod) group_left(node) max by(namespace, pod, node) (kube_pod_info) record: node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate + - expr: | + container_memory_working_set_bytes{job="kubelet", image!=""} + * on (namespace, pod) group_left(node) max by(namespace, pod, node) (kube_pod_info) + record: node_namespace_pod_container:container_memory_working_set_bytes + - expr: | + container_memory_rss{job="kubelet", image!=""} + * on (namespace, pod) group_left(node) max by(namespace, pod, node) (kube_pod_info) + record: node_namespace_pod_container:container_memory_rss + - expr: | + container_memory_cache{job="kubelet", image!=""} + * on (namespace, pod) group_left(node) max by(namespace, pod, node) (kube_pod_info) + record: node_namespace_pod_container:container_memory_cache + - expr: | + container_memory_swap{job="kubelet", image!=""} + * on (namespace, pod) group_left(node) max by(namespace, pod, node) (kube_pod_info) + record: node_namespace_pod_container:container_memory_swap - expr: | sum(container_memory_usage_bytes{job="kubelet", image!="", container!="POD"}) by (namespace) record: namespace:container_memory_usage_bytes:sum @@ -583,6 +599,16 @@ spec: for: 15m labels: severity: critical + - alert: KubeContainerWaiting + annotations: + message: Pod {{ $labels.namespace }}/{{ $labels.pod }} container {{ $labels.container}} + has been in waiting state for longer than 1 hour. + runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecontainerwaiting + expr: | + sum by (namespace, pod, container) (kube_pod_container_status_waiting_reason{job="kube-state-metrics"}) > 0 + for: 1h + labels: + severity: warning - alert: KubeDaemonSetNotScheduled annotations: message: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset @@ -797,6 +823,14 @@ spec: for: 15m labels: severity: warning + - alert: KubeNodeUnreachable + annotations: + message: '{{ $labels.node }} is unreachable and some workloads may be rescheduled.' + runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubenodeunreachable + expr: | + kube_node_spec_taint{job="kube-state-metrics",key="node.kubernetes.io/unreachable",effect="NoSchedule"} == 1 + labels: + severity: warning - alert: KubeVersionMismatch annotations: message: There are {{ $value }} different semantic versions of Kubernetes