diff --git a/jsonnetfile.lock.json b/jsonnetfile.lock.json index b9304a998f3fd8db8b967eb17bf18081d2af42c3..475d5d93c926602076adaaf2a0125b991d86032a 100644 --- a/jsonnetfile.lock.json +++ b/jsonnetfile.lock.json @@ -59,8 +59,8 @@ "subdir": "" } }, - "version": "ec3e85f45b5691d54a02ab38ed654c3c9f736fe5", - "sum": "6KgRTpd101espi7a7CDpkqN0yaIPmENxxlAXqGcCWhk=" + "version": "e15ab56a4eea721d1e958888fd13c4b95af4d17d", + "sum": "voj/Pfit89sI+xBCyFV8oEV4UgnsaJZ2VrOU7s9jZe8=" }, { "source": { diff --git a/manifests/grafana-dashboardDefinitions.yaml b/manifests/grafana-dashboardDefinitions.yaml index 2ab0f97b863988137fef047a59e9c9d3a48906ad..6fd259eaf0d963fdf276faa1a8834168f29ac9e4 100644 --- a/manifests/grafana-dashboardDefinitions.yaml +++ b/manifests/grafana-dashboardDefinitions.yaml @@ -7199,124 +7199,10 @@ items: "showTitle": true, "title": "Rate of Packets Dropped", "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "kubernetes-mixin" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": null, - "name": "datasource", - "options": [ - - ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": null, - "current": { - "text": "", - "value": "" - }, - "datasource": "$datasource", - "hide": 2, - "includeAll": false, - "label": null, - "multi": false, - "name": "cluster", - "options": [ - - ], - "query": "label_values(node_cpu_seconds_total, cluster)", - "refresh": 2, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "UTC", - "title": "Kubernetes / Compute Resources / Cluster", - "uid": "efa86fd1d0c121a26444b636a3f509a8", - "version": 0 - } - kind: ConfigMap - metadata: - labels: - app.kubernetes.io/component: grafana - app.kubernetes.io/name: grafana - app.kubernetes.io/part-of: kube-prometheus - app.kubernetes.io/version: 7.5.4 - name: grafana-dashboard-k8s-resources-cluster - namespace: monitoring -- apiVersion: v1 - data: - k8s-resources-namespace.json: |- - { - "annotations": { - "list": [ - - ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "links": [ - - ], - "refresh": "10s", - "rows": [ + }, { "collapse": false, - "height": "100px", + "height": "250px", "panels": [ { "aliasColors": { @@ -7326,9 +7212,9 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "format": "percentunit", - "id": 1, + "decimals": -1, + "fill": 10, + "id": 20, "legend": { "avg": false, "current": false, @@ -7339,7 +7225,7 @@ items: "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], @@ -7352,28 +7238,31 @@ items: ], "spaceLength": 10, - "span": 3, - "stack": false, + "span": 6, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})", + "expr": "ceil(sum by(namespace) (rate(container_fs_reads_total{container!=\"\", cluster=\"$cluster\"}[5m]) + rate(container_fs_writes_total{container!=\"\", cluster=\"$cluster\"}[5m])))", "format": "time_series", - "instant": true, "intervalFactor": 2, - "refId": "A" + "legendFormat": "{{namespace}}", + "legendLink": null, + "step": 10 } ], - "thresholds": "70,80", + "thresholds": [ + + ], "timeFrom": null, "timeShift": null, - "title": "CPU Utilisation (from requests)", + "title": "IOPS(Reads+Writes)", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "type": "singlestat", + "type": "graph", "xaxis": { "buckets": null, "mode": "time", @@ -7410,9 +7299,8 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "format": "percentunit", - "id": 2, + "fill": 10, + "id": 21, "legend": { "avg": false, "current": false, @@ -7423,7 +7311,7 @@ items: "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], @@ -7436,28 +7324,31 @@ items: ], "spaceLength": 10, - "span": 3, - "stack": false, + "span": 6, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})", + "expr": "sum by(namespace) (rate(container_fs_reads_bytes_total{container!=\"\", cluster=\"$cluster\"}[5m]) + rate(container_fs_writes_bytes_total{container!=\"\", cluster=\"$cluster\"}[5m]))", "format": "time_series", - "instant": true, "intervalFactor": 2, - "refId": "A" + "legendFormat": "{{namespace}}", + "legendLink": null, + "step": 10 } ], - "thresholds": "70,80", + "thresholds": [ + + ], "timeFrom": null, "timeShift": null, - "title": "CPU Utilisation (from limits)", + "title": "ThroughPut(Read+Write)", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "type": "singlestat", + "type": "graph", "xaxis": { "buckets": null, "mode": "time", @@ -7469,7 +7360,7 @@ items: }, "yaxes": [ { - "format": "short", + "format": "Bps", "label": null, "logBase": 1, "max": null, @@ -7485,7 +7376,19 @@ items: "show": false } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Storage IO", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ { "aliasColors": { @@ -7495,8 +7398,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, - "format": "percentunit", - "id": 3, + "id": 22, "legend": { "avg": false, "current": false, @@ -7519,61 +7421,649 @@ items: "seriesOverrides": [ ], + "sort": { + "col": 4, + "desc": true + }, "spaceLength": 10, - "span": 3, + "span": 12, "stack": false, "steppedLine": false, - "targets": [ + "styles": [ { - "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) / sum(kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"})", - "format": "time_series", - "instant": true, - "intervalFactor": 2, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Memory Utilization (from requests)", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true + "alias": "IOPS(Reads)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": -1, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "short" }, { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { + "alias": "IOPS(Writes)", + "colorMode": null, + "colors": [ - }, + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": -1, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "IOPS(Reads + Writes)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": -1, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "Throughput(Read)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Throughput(Write)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Throughput(Read + Write)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Namespace", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTargetBlank": false, + "linkTooltip": "Drill down to pods", + "linkUrl": "./d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell", + "pattern": "namespace", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "sum by(namespace) (rate(container_fs_reads_total{container!=\"\", cluster=\"$cluster\"}[5m]))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sum by(namespace) (rate(container_fs_writes_total{container!=\"\", cluster=\"$cluster\"}[5m]))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum by(namespace) (rate(container_fs_reads_total{container!=\"\", cluster=\"$cluster\"}[5m]) + rate(container_fs_writes_total{container!=\"\", cluster=\"$cluster\"}[5m]))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum by(namespace) (rate(container_fs_reads_bytes_total{container!=\"\", cluster=\"$cluster\"}[5m]))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum by(namespace) (rate(container_fs_writes_bytes_total{container!=\"\", cluster=\"$cluster\"}[5m]))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + }, + { + "expr": "sum by(namespace) (rate(container_fs_reads_bytes_total{container!=\"\", cluster=\"$cluster\"}[5m]) + rate(container_fs_writes_bytes_total{container!=\"\", cluster=\"$cluster\"}[5m]))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Current Storage IO", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Storage IO - Distribution", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": "", + "value": "" + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(node_cpu_seconds_total, cluster)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "UTC", + "title": "Kubernetes / Compute Resources / Cluster", + "uid": "efa86fd1d0c121a26444b636a3f509a8", + "version": 0 + } + kind: ConfigMap + metadata: + labels: + app.kubernetes.io/component: grafana + app.kubernetes.io/name: grafana + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 7.5.4 + name: grafana-dashboard-k8s-resources-cluster + namespace: monitoring +- apiVersion: v1 + data: + k8s-resources-namespace.json: |- + { + "annotations": { + "list": [ + + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "100px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "percentunit", + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "CPU Utilisation (from requests)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "percentunit", + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "CPU Utilisation (from limits)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "percentunit", + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) / sum(kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"})", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Memory Utilization (from requests)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, "bars": false, "dashLength": 10, "dashes": false, @@ -8748,70 +9238,255 @@ items: "colorMode": null, "colors": [ - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + }, + { + "expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Current Network Usage", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Current Network Usage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ - ], - "type": "string", - "unit": "short" - } ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, "targets": [ { "expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "A", - "step": 10 - }, - { - "expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "B", - "step": 10 - }, - { - "expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (pod)", - "format": "table", - "instant": true, + "format": "time_series", "intervalFactor": 2, - "legendFormat": "", - "refId": "C", + "legendFormat": "{{pod}}", + "legendLink": null, "step": 10 - }, + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Receive Bandwidth", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ { - "expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "D", - "step": 10 + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true }, { - "expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "E", - "step": 10 - }, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 11, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ { - "expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (pod)", - "format": "table", - "instant": true, + "expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (pod)", + "format": "time_series", "intervalFactor": 2, - "legendFormat": "", - "refId": "F", + "legendFormat": "{{pod}}", + "legendLink": null, "step": 10 } ], @@ -8820,14 +9495,13 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Current Network Usage", + "title": "Transmit Bandwidth", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "transform": "table", - "type": "table", + "type": "graph", "xaxis": { "buckets": null, "mode": "time", @@ -8839,7 +9513,7 @@ items: }, "yaxes": [ { - "format": "short", + "format": "Bps", "label": null, "logBase": 1, "max": null, @@ -8861,7 +9535,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Current Network Usage", + "title": "Bandwidth", "titleSize": "h6" }, { @@ -8877,7 +9551,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 10, + "id": 12, "legend": { "avg": false, "current": false, @@ -8906,7 +9580,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (pod)", + "expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (pod)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", @@ -8919,7 +9593,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Receive Bandwidth", + "title": "Rate of Received Packets", "tooltip": { "shared": false, "sort": 0, @@ -8963,7 +9637,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 11, + "id": 13, "legend": { "avg": false, "current": false, @@ -8992,7 +9666,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (pod)", + "expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (pod)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", @@ -9005,7 +9679,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Transmit Bandwidth", + "title": "Rate of Transmitted Packets", "tooltip": { "shared": false, "sort": 0, @@ -9045,7 +9719,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Bandwidth", + "title": "Rate of Packets", "titleSize": "h6" }, { @@ -9061,7 +9735,93 @@ items: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 12, + "id": 14, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Received Packets Dropped", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 15, "legend": { "avg": false, "current": false, @@ -9090,7 +9850,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (pod)", + "expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (pod)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", @@ -9103,7 +9863,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Rate of Received Packets", + "title": "Rate of Transmitted Packets Dropped", "tooltip": { "shared": false, "sort": 0, @@ -9137,7 +9897,19 @@ items: "show": false } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Rate of Packets Dropped", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ { "aliasColors": { @@ -9146,8 +9918,9 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", + "decimals": -1, "fill": 10, - "id": 13, + "id": 16, "legend": { "avg": false, "current": false, @@ -9176,7 +9949,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (pod)", + "expr": "ceil(sum by(pod) (rate(container_fs_reads_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\"}[5m]) + rate(container_fs_writes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\"}[5m])))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", @@ -9189,7 +9962,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Rate of Transmitted Packets", + "title": "IOPS(Reads+Writes)", "tooltip": { "shared": false, "sort": 0, @@ -9207,7 +9980,7 @@ items: }, "yaxes": [ { - "format": "Bps", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -9223,19 +9996,7 @@ items: "show": false } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Rate of Packets", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ + }, { "aliasColors": { @@ -9245,7 +10006,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 14, + "id": 17, "legend": { "avg": false, "current": false, @@ -9274,7 +10035,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (pod)", + "expr": "sum by(pod) (rate(container_fs_reads_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\"}[5m]) + rate(container_fs_writes_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\"}[5m]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", @@ -9287,7 +10048,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Rate of Received Packets Dropped", + "title": "ThroughPut(Read+Write)", "tooltip": { "shared": false, "sort": 0, @@ -9321,7 +10082,19 @@ items: "show": false } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Storage IO", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ { "aliasColors": { @@ -9330,8 +10103,8 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 10, - "id": 15, + "fill": 1, + "id": 18, "legend": { "avg": false, "current": false, @@ -9342,7 +10115,7 @@ items: "values": false }, "lines": true, - "linewidth": 0, + "linewidth": 1, "links": [ ], @@ -9354,17 +10127,223 @@ items: "seriesOverrides": [ ], + "sort": { + "col": 4, + "desc": true + }, "spaceLength": 10, - "span": 6, - "stack": true, + "span": 12, + "stack": false, "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "IOPS(Reads)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": -1, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "IOPS(Writes)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": -1, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "IOPS(Reads + Writes)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": -1, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "Throughput(Read)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Throughput(Write)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Throughput(Read + Write)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Pod", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTargetBlank": false, + "linkTooltip": "Drill down to pods", + "linkUrl": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell", + "pattern": "pod", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], "targets": [ { - "expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (pod)", - "format": "time_series", + "expr": "sum by(pod) (rate(container_fs_reads_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\"}[5m]))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sum by(pod) (rate(container_fs_writes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\"}[5m]))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum by(pod) (rate(container_fs_reads_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\"}[5m]) + rate(container_fs_writes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\"}[5m]))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum by(pod) (rate(container_fs_reads_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\"}[5m]))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum by(pod) (rate(container_fs_writes_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\"}[5m]))", + "format": "table", + "instant": true, "intervalFactor": 2, - "legendFormat": "{{pod}}", - "legendLink": null, + "legendFormat": "", + "refId": "E", + "step": 10 + }, + { + "expr": "sum by(pod) (rate(container_fs_reads_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\"}[5m]) + rate(container_fs_writes_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\"}[5m]))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", "step": 10 } ], @@ -9373,13 +10352,14 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Rate of Transmitted Packets Dropped", + "title": "Current Storage IO", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "type": "graph", + "transform": "table", + "type": "table", "xaxis": { "buckets": null, "mode": "time", @@ -9391,7 +10371,7 @@ items: }, "yaxes": [ { - "format": "Bps", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -9413,7 +10393,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Rate of Packets Dropped", + "title": "Storage IO - Distribution", "titleSize": "h6" } ], @@ -10536,8 +11516,244 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 10, - "id": 1, + "fill": 10, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "requests", + "color": "#F2495C", + "fill": 0, + "hideTooltip": true, + "legend": true, + "linewidth": 2, + "stack": false + }, + { + "alias": "limits", + "color": "#FF9830", + "fill": 0, + "hideTooltip": true, + "legend": true, + "linewidth": 2, + "stack": false + } + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{namespace=\"$namespace\", pod=\"$pod\", cluster=\"$cluster\"}) by (container)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{container}}", + "legendLink": null, + "step": 10 + }, + { + "expr": "sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}\n)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "requests", + "legendLink": null, + "step": 10 + }, + { + "expr": "sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}\n)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "limits", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Usage", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU Usage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 2, + "legend": { + "avg": false, + "current": true, + "max": true, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(increase(container_cpu_cfs_throttled_periods_total{namespace=\"$namespace\", pod=\"$pod\", container!=\"\", cluster=\"$cluster\"}[5m])) by (container) /sum(increase(container_cpu_cfs_periods_total{namespace=\"$namespace\", pod=\"$pod\", container!=\"\", cluster=\"$cluster\"}[5m])) by (container)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{container}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 0.25, + "yaxis": "left" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Throttling", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU Throttling", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 3, "legend": { "avg": false, "current": false, @@ -10548,7 +11764,7 @@ items: "values": false }, "lines": true, - "linewidth": 0, + "linewidth": 1, "links": [ ], @@ -10558,52 +11774,193 @@ items: "points": false, "renderer": "flot", "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ { - "alias": "requests", - "color": "#F2495C", - "fill": 0, - "hideTooltip": true, - "legend": true, - "linewidth": 2, - "stack": false + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" }, { - "alias": "limits", - "color": "#FF9830", - "fill": 0, - "hideTooltip": true, - "legend": true, - "linewidth": 2, - "stack": false + "alias": "CPU Usage", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Requests", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Requests %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "CPU Limits", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Limits %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Container", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "container", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" } ], - "spaceLength": 10, - "span": 12, - "stack": true, - "steppedLine": false, "targets": [ { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{namespace=\"$namespace\", pod=\"$pod\", cluster=\"$cluster\"}) by (container)", - "format": "time_series", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "format": "table", + "instant": true, "intervalFactor": 2, - "legendFormat": "{{container}}", - "legendLink": null, + "legendFormat": "", + "refId": "A", "step": 10 }, { - "expr": "sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}\n)\n", - "format": "time_series", + "expr": "sum(kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}) by (container)", + "format": "table", + "instant": true, "intervalFactor": 2, - "legendFormat": "requests", - "legendLink": null, + "legendFormat": "", + "refId": "B", "step": 10 }, { - "expr": "sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}\n)\n", - "format": "time_series", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}) by (container)", + "format": "table", + "instant": true, "intervalFactor": 2, - "legendFormat": "limits", - "legendLink": null, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum(kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", "step": 10 } ], @@ -10612,13 +11969,14 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "CPU Usage", + "title": "CPU Quota", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "type": "graph", + "transform": "table", + "type": "table", "xaxis": { "buckets": null, "mode": "time", @@ -10652,7 +12010,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "CPU Usage", + "title": "CPU Quota", "titleSize": "h6" }, { @@ -10668,11 +12026,11 @@ items: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 2, + "id": 4, "legend": { "avg": false, - "current": true, - "max": true, + "current": false, + "max": false, "min": false, "show": true, "total": false, @@ -10689,7 +12047,26 @@ items: "points": false, "renderer": "flot", "seriesOverrides": [ - + { + "alias": "requests", + "color": "#F2495C", + "dashes": true, + "fill": 0, + "hideTooltip": true, + "legend": true, + "linewidth": 2, + "stack": false + }, + { + "alias": "limits", + "color": "#FF9830", + "dashes": true, + "fill": 0, + "hideTooltip": true, + "legend": true, + "linewidth": 2, + "stack": false + } ], "spaceLength": 10, "span": 12, @@ -10697,27 +12074,36 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(increase(container_cpu_cfs_throttled_periods_total{namespace=\"$namespace\", pod=\"$pod\", container!=\"\", cluster=\"$cluster\"}[5m])) by (container) /sum(increase(container_cpu_cfs_periods_total{namespace=\"$namespace\", pod=\"$pod\", container!=\"\", cluster=\"$cluster\"}[5m])) by (container)", + "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{container}}", + "legendLink": null, + "step": 10 + }, + { + "expr": "sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}\n)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "requests", + "legendLink": null, + "step": 10 + }, + { + "expr": "sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}\n)\n", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{container}}", + "legendFormat": "limits", "legendLink": null, "step": 10 } ], "thresholds": [ - { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 0.25, - "yaxis": "left" - } + ], "timeFrom": null, "timeShift": null, - "title": "CPU Throttling", + "title": "Memory Usage", "tooltip": { "shared": false, "sort": 0, @@ -10735,10 +12121,10 @@ items: }, "yaxes": [ { - "format": "percentunit", + "format": "bytes", "label": null, "logBase": 1, - "max": 1, + "max": null, "min": 0, "show": true }, @@ -10757,7 +12143,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "CPU Throttling", + "title": "Memory Usage", "titleSize": "h6" }, { @@ -10773,7 +12159,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 3, + "id": 5, "legend": { "avg": false, "current": false, @@ -10808,7 +12194,7 @@ items: "type": "hidden" }, { - "alias": "CPU Usage", + "alias": "Memory Usage", "colorMode": null, "colors": [ @@ -10824,10 +12210,10 @@ items: ], "type": "number", - "unit": "short" + "unit": "bytes" }, { - "alias": "CPU Requests", + "alias": "Memory Requests", "colorMode": null, "colors": [ @@ -10843,10 +12229,10 @@ items: ], "type": "number", - "unit": "short" + "unit": "bytes" }, { - "alias": "CPU Requests %", + "alias": "Memory Requests %", "colorMode": null, "colors": [ @@ -10865,7 +12251,7 @@ items: "unit": "percentunit" }, { - "alias": "CPU Limits", + "alias": "Memory Limits", "colorMode": null, "colors": [ @@ -10881,10 +12267,10 @@ items: ], "type": "number", - "unit": "short" + "unit": "bytes" }, { - "alias": "CPU Limits %", + "alias": "Memory Limits %", "colorMode": null, "colors": [ @@ -10902,6 +12288,63 @@ items: "type": "number", "unit": "percentunit" }, + { + "alias": "Memory Usage (RSS)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Usage (Cache)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #G", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Usage (Swap)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #H", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, { "alias": "Container", "colorMode": null, @@ -10939,7 +12382,7 @@ items: ], "targets": [ { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container)", "format": "table", "instant": true, "intervalFactor": 2, @@ -10948,7 +12391,7 @@ items: "step": 10 }, { - "expr": "sum(kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}) by (container)", + "expr": "sum(kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}) by (container)", "format": "table", "instant": true, "intervalFactor": 2, @@ -10957,7 +12400,7 @@ items: "step": 10 }, { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}) by (container)", + "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", image!=\"\"}) by (container) / sum(kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}) by (container)", "format": "table", "instant": true, "intervalFactor": 2, @@ -10966,7 +12409,7 @@ items: "step": 10 }, { - "expr": "sum(kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}) by (container)", + "expr": "sum(kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}) by (container)", "format": "table", "instant": true, "intervalFactor": 2, @@ -10975,13 +12418,40 @@ items: "step": 10 }, { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}) by (container)", + "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container) / sum(kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}) by (container)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "E", "step": 10 + }, + { + "expr": "sum(container_memory_rss{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 + }, + { + "expr": "sum(container_memory_cache{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "G", + "step": 10 + }, + { + "expr": "sum(container_memory_swap{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "H", + "step": 10 } ], "thresholds": [ @@ -10989,7 +12459,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "CPU Quota", + "title": "Memory Quota", "tooltip": { "shared": false, "sort": 0, @@ -11030,7 +12500,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "CPU Quota", + "title": "Memory Quota", "titleSize": "h6" }, { @@ -11046,7 +12516,8 @@ items: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 4, + "id": 6, + "interval": "1m", "legend": { "avg": false, "current": false, @@ -11067,53 +12538,105 @@ items: "points": false, "renderer": "flot", "seriesOverrides": [ - { - "alias": "requests", - "color": "#F2495C", - "dashes": true, - "fill": 0, - "hideTooltip": true, - "legend": true, - "linewidth": 2, - "stack": false - }, - { - "alias": "limits", - "color": "#FF9830", - "dashes": true, - "fill": 0, - "hideTooltip": true, - "legend": true, - "linewidth": 2, - "stack": false - } + ], "spaceLength": 10, - "span": 12, + "span": 6, "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container)", + "expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{container}}", + "legendFormat": "{{pod}}", "legendLink": null, "step": 10 - }, + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Receive Bandwidth", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ { - "expr": "sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}\n)\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "requests", - "legendLink": null, - "step": 10 + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true }, { - "expr": "sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}\n)\n", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 7, + "interval": "1m", + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "limits", + "legendFormat": "{{pod}}", "legendLink": null, "step": 10 } @@ -11123,7 +12646,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Memory Usage", + "title": "Transmit Bandwidth", "tooltip": { "shared": false, "sort": 0, @@ -11141,7 +12664,7 @@ items: }, "yaxes": [ { - "format": "bytes", + "format": "Bps", "label": null, "logBase": 1, "max": null, @@ -11163,7 +12686,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Memory Usage", + "title": "Bandwidth", "titleSize": "h6" }, { @@ -11178,8 +12701,9 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "id": 5, + "fill": 10, + "id": 8, + "interval": "1m", "legend": { "avg": false, "current": false, @@ -11190,7 +12714,7 @@ items: "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], @@ -11200,277 +12724,106 @@ items: "points": false, "renderer": "flot", "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "Memory Usage", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ - - ], - "type": "number", - "unit": "bytes" - }, - { - "alias": "Memory Requests", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #B", - "thresholds": [ - - ], - "type": "number", - "unit": "bytes" - }, - { - "alias": "Memory Requests %", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #C", - "thresholds": [ - - ], - "type": "number", - "unit": "percentunit" - }, - { - "alias": "Memory Limits", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #D", - "thresholds": [ - - ], - "type": "number", - "unit": "bytes" - }, - { - "alias": "Memory Limits %", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #E", - "thresholds": [ - - ], - "type": "number", - "unit": "percentunit" - }, - { - "alias": "Memory Usage (RSS)", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #F", - "thresholds": [ - - ], - "type": "number", - "unit": "bytes" - }, - { - "alias": "Memory Usage (Cache)", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #G", - "thresholds": [ - - ], - "type": "number", - "unit": "bytes" - }, - { - "alias": "Memory Usage (Swap)", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #H", - "thresholds": [ - - ], - "type": "number", - "unit": "bytes" - }, - { - "alias": "Container", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "container", - "thresholds": [ - - ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ - - ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "A", - "step": 10 - }, - { - "expr": "sum(kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}) by (container)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "B", - "step": 10 - }, - { - "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", image!=\"\"}) by (container) / sum(kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}) by (container)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "C", - "step": 10 - }, - { - "expr": "sum(kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}) by (container)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "D", - "step": 10 - }, + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ { - "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container) / sum(kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}) by (container)", - "format": "table", - "instant": true, + "expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)", + "format": "time_series", "intervalFactor": 2, - "legendFormat": "", - "refId": "E", + "legendFormat": "{{pod}}", + "legendLink": null, "step": 10 - }, + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Received Packets", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ { - "expr": "sum(container_memory_rss{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "F", - "step": 10 + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true }, { - "expr": "sum(container_memory_cache{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "G", - "step": 10 - }, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 9, + "interval": "1m", + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ { - "expr": "sum(container_memory_swap{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)", - "format": "table", - "instant": true, + "expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)", + "format": "time_series", "intervalFactor": 2, - "legendFormat": "", - "refId": "H", + "legendFormat": "{{pod}}", + "legendLink": null, "step": 10 } ], @@ -11479,14 +12832,13 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Memory Quota", + "title": "Rate of Transmitted Packets", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "transform": "table", - "type": "table", + "type": "graph", "xaxis": { "buckets": null, "mode": "time", @@ -11498,7 +12850,7 @@ items: }, "yaxes": [ { - "format": "short", + "format": "Bps", "label": null, "logBase": 1, "max": null, @@ -11520,7 +12872,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Memory Quota", + "title": "Rate of Packets", "titleSize": "h6" }, { @@ -11536,7 +12888,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 6, + "id": 10, "interval": "1m", "legend": { "avg": false, @@ -11566,7 +12918,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)", + "expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", @@ -11579,7 +12931,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Receive Bandwidth", + "title": "Rate of Received Packets Dropped", "tooltip": { "shared": false, "sort": 0, @@ -11623,7 +12975,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 7, + "id": 11, "interval": "1m", "legend": { "avg": false, @@ -11653,7 +13005,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)", + "expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", @@ -11666,7 +13018,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Transmit Bandwidth", + "title": "Rate of Transmitted Packets Dropped", "tooltip": { "shared": false, "sort": 0, @@ -11706,7 +13058,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Bandwidth", + "title": "Rate of Packets Dropped", "titleSize": "h6" }, { @@ -11721,9 +13073,9 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", + "decimals": -1, "fill": 10, - "id": 8, - "interval": "1m", + "id": 12, "legend": { "avg": false, "current": false, @@ -11752,10 +13104,18 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)", + "expr": "ceil(sum by(pod) (rate(container_fs_reads_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[5m])))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{pod}}", + "legendFormat": "Reads", + "legendLink": null, + "step": 10 + }, + { + "expr": "ceil(sum by(pod) (rate(container_fs_writes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[5m])))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Writes", "legendLink": null, "step": 10 } @@ -11765,7 +13125,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Rate of Received Packets", + "title": "IOPS", "tooltip": { "shared": false, "sort": 0, @@ -11783,7 +13143,7 @@ items: }, "yaxes": [ { - "format": "Bps", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -11809,8 +13169,114 @@ items: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 9, - "interval": "1m", + "id": 13, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(pod) (rate(container_fs_reads_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Reads", + "legendLink": null, + "step": 10 + }, + { + "expr": "sum by(pod) (rate(container_fs_writes_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Writes", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "ThroughPut", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Storage IO - Distribution(Pod - Read & Writes)", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "decimals": -1, + "fill": 10, + "id": 14, "legend": { "avg": false, "current": false, @@ -11839,10 +13305,10 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)", + "expr": "ceil(sum by(container) (rate(container_fs_reads_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\", pod=\"$pod\"}[5m]) + rate(container_fs_writes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\", pod=\"$pod\"}[5m])))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{pod}}", + "legendFormat": "{{container}}", "legendLink": null, "step": 10 } @@ -11852,7 +13318,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Rate of Transmitted Packets", + "title": "IOPS(Reads+Writes)", "tooltip": { "shared": false, "sort": 0, @@ -11870,7 +13336,7 @@ items: }, "yaxes": [ { - "format": "Bps", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -11886,19 +13352,7 @@ items: "show": false } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Rate of Packets", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ + }, { "aliasColors": { @@ -11908,8 +13362,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 10, - "interval": "1m", + "id": 15, "legend": { "avg": false, "current": false, @@ -11938,10 +13391,10 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)", + "expr": "sum by(container) (rate(container_fs_reads_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\", pod=\"$pod\"}[5m]) + rate(container_fs_writes_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\", pod=\"$pod\"}[5m]))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{pod}}", + "legendFormat": "{{container}}", "legendLink": null, "step": 10 } @@ -11951,7 +13404,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Rate of Received Packets Dropped", + "title": "ThroughPut(Read+Write)", "tooltip": { "shared": false, "sort": 0, @@ -11985,7 +13438,19 @@ items: "show": false } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Storage IO - Distribution(Containers)", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ { "aliasColors": { @@ -11994,9 +13459,8 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 10, - "id": 11, - "interval": "1m", + "fill": 1, + "id": 16, "legend": { "avg": false, "current": false, @@ -12007,7 +13471,7 @@ items: "values": false }, "lines": true, - "linewidth": 0, + "linewidth": 1, "links": [ ], @@ -12019,17 +13483,223 @@ items: "seriesOverrides": [ ], + "sort": { + "col": 4, + "desc": true + }, "spaceLength": 10, - "span": 6, - "stack": true, + "span": 12, + "stack": false, "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "IOPS(Reads)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": -1, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "IOPS(Writes)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": -1, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "IOPS(Reads + Writes)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": -1, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "Throughput(Read)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Throughput(Write)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Throughput(Read + Write)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Container", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "container", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], "targets": [ { - "expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)", - "format": "time_series", + "expr": "sum by(container) (rate(container_fs_reads_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\", pod=\"$pod\"}[5m]))", + "format": "table", + "instant": true, "intervalFactor": 2, - "legendFormat": "{{pod}}", - "legendLink": null, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sum by(container) (rate(container_fs_writes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\", pod=\"$pod\"}[5m]))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum by(container) (rate(container_fs_reads_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\", pod=\"$pod\"}[5m]) + rate(container_fs_writes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\", pod=\"$pod\"}[5m]))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum by(container) (rate(container_fs_reads_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\", pod=\"$pod\"}[5m]))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum by(container) (rate(container_fs_writes_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\", pod=\"$pod\"}[5m]))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + }, + { + "expr": "sum by(container) (rate(container_fs_reads_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\", pod=\"$pod\"}[5m]) + rate(container_fs_writes_bytes_total{container!=\"\", cluster=\"$cluster\",namespace=~\"$namespace\", pod=\"$pod\"}[5m]))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", "step": 10 } ], @@ -12038,13 +13708,14 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Rate of Transmitted Packets Dropped", + "title": "Current Storage IO", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "type": "graph", + "transform": "table", + "type": "table", "xaxis": { "buckets": null, "mode": "time", @@ -12056,7 +13727,7 @@ items: }, "yaxes": [ { - "format": "Bps", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -12078,7 +13749,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Rate of Packets Dropped", + "title": "Storage IO - Distribution", "titleSize": "h6" } ], diff --git a/manifests/kubernetes-prometheusRule.yaml b/manifests/kubernetes-prometheusRule.yaml index fda0691252da86329f373ee733abd90663b7e624..b3e966971aa00715fe8e80e293b464acdaefc646 100644 --- a/manifests/kubernetes-prometheusRule.yaml +++ b/manifests/kubernetes-prometheusRule.yaml @@ -214,19 +214,19 @@ spec: runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubehpareplicasmismatch summary: HPA has not matched descired number of replicas. expr: | - (kube_hpa_status_desired_replicas{job="kube-state-metrics"} + (kube_horizontalpodautoscaler_status_desired_replicas{job="kube-state-metrics"} != - kube_hpa_status_current_replicas{job="kube-state-metrics"}) + kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics"}) and - (kube_hpa_status_current_replicas{job="kube-state-metrics"} + (kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics"} > - kube_hpa_spec_min_replicas{job="kube-state-metrics"}) + kube_horizontalpodautoscaler_spec_min_replicas{job="kube-state-metrics"}) and - (kube_hpa_status_current_replicas{job="kube-state-metrics"} + (kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics"} < - kube_hpa_spec_max_replicas{job="kube-state-metrics"}) + kube_horizontalpodautoscaler_spec_max_replicas{job="kube-state-metrics"}) and - changes(kube_hpa_status_current_replicas{job="kube-state-metrics"}[15m]) == 0 + changes(kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics"}[15m]) == 0 for: 15m labels: severity: warning @@ -236,9 +236,9 @@ spec: runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubehpamaxedout summary: HPA is running at max replicas expr: | - kube_hpa_status_current_replicas{job="kube-state-metrics"} + kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics"} == - kube_hpa_spec_max_replicas{job="kube-state-metrics"} + kube_horizontalpodautoscaler_spec_max_replicas{job="kube-state-metrics"} for: 15m labels: severity: warning