diff --git a/docs/developing-prometheus-rules-and-grafana-dashboards.md b/docs/developing-prometheus-rules-and-grafana-dashboards.md index 99cf271c03406f93b14805192cf5252899b912d7..e0dbf7528117e76de5481bdfd33ed5955381d7cc 100644 --- a/docs/developing-prometheus-rules-and-grafana-dashboards.md +++ b/docs/developing-prometheus-rules-and-grafana-dashboards.md @@ -252,30 +252,32 @@ local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + { _config+:: { namespace: 'monitoring', }, - grafanaDashboards+:: { - 'my-dashboard.json': - dashboard.new('My Dashboard') - .addTemplate( - { - current: { - text: 'Prometheus', - value: 'Prometheus', + grafana+:: { + dashboards+:: { + 'my-dashboard.json': + dashboard.new('My Dashboard') + .addTemplate( + { + current: { + text: 'Prometheus', + value: 'Prometheus', + }, + hide: 0, + label: null, + name: 'datasource', + options: [], + query: 'prometheus', + refresh: 1, + regex: '', + type: 'datasource', }, - hide: 0, - label: null, - name: 'datasource', - options: [], - query: 'prometheus', - refresh: 1, - regex: '', - type: 'datasource', - }, - ) - .addRow( - row.new() - .addPanel(graphPanel.new('My Panel', span=6, datasource='$datasource') - .addTarget(prometheus.target('vector(1)'))) - ), + ) + .addRow( + row.new() + .addPanel(graphPanel.new('My Panel', span=6, datasource='$datasource') + .addTarget(prometheus.target('vector(1)'))) + ), + }, }, }; @@ -298,9 +300,14 @@ local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + { _config+:: { namespace: 'monitoring', }, - grafanaDashboards+:: { + grafanaDashboards+:: { // monitoring-mixin compatibility 'my-dashboard.json': (import 'example-grafana-dashboard.json'), }, + grafana+:: { + dashboards+:: { // use this method to import your dashboards to Grafana + 'my-dashboard.json': (import 'example-grafana-dashboard.json'), + }, + }, }; { ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } + @@ -319,8 +326,10 @@ local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + { _config+:: { namespace: 'monitoring', }, - rawGrafanaDashboards+:: { - 'my-dashboard.json': (importstr 'example-grafana-dashboard.json'), + grafana+:: { + rawDashboards+:: { + 'my-dashboard.json': (importstr 'example-grafana-dashboard.json'), + }, }, }; diff --git a/jsonnet/kube-prometheus/jsonnetfile.json b/jsonnet/kube-prometheus/jsonnetfile.json index c00bc857b2a725d61da3b4d59bdecbca10453c2d..50005ea29f0c19f1449f8ff4603fee600f9c7af0 100644 --- a/jsonnet/kube-prometheus/jsonnetfile.json +++ b/jsonnet/kube-prometheus/jsonnetfile.json @@ -26,7 +26,7 @@ "subdir": "jsonnet/prometheus-operator" } }, - "version": "release-0.39" + "version": "release-0.40" }, { "source": { diff --git a/jsonnetfile.lock.json b/jsonnetfile.lock.json index 6a8f854ed86b8237bb6a8639bb417ff987625376..4eb52890ff9438203938520c8a4e4d03dbfcc375 100644 --- a/jsonnetfile.lock.json +++ b/jsonnetfile.lock.json @@ -4,7 +4,7 @@ { "source": { "git": { - "remote": "https://github.com/brancz/kubernetes-grafana", + "remote": "https://github.com/brancz/kubernetes-grafana.git", "subdir": "grafana" } }, @@ -14,47 +14,47 @@ { "source": { "git": { - "remote": "https://github.com/coreos/etcd", + "remote": "https://github.com/coreos/etcd.git", "subdir": "Documentation/etcd-mixin" } }, - "version": "1166b1f195efae31439c7b3c913b4ef02e7df889", - "sum": "Ko3qhNfC2vN/houLh6C0Ryacjv70gl0DVPGU/PQ4OD0=" + "version": "d8c8f903eee10b8391abaef7758c38b2cd393c55", + "sum": "pk7mLpdUrHuJKkj2vhD6LGMU7P+oYYooBXAeZyZa398=" }, { "source": { "git": { - "remote": "https://github.com/coreos/prometheus-operator", + "remote": "https://github.com/coreos/prometheus-operator.git", "subdir": "jsonnet/prometheus-operator" } }, - "version": "d0a871b710de7b764c05ced98dbd1eb32a681790", - "sum": "cIOKRTNBUOl3a+QsaA/NjClmZAhyVJHlDFReKlXJBAs=" + "version": "e31c69f9b5c6555e0f4a5c1f39d0f03182dd6b41", + "sum": "WggWVWZ+CBEUThQCztSaRELbtqdXf9s3OFzf06HbYNA=" }, { "source": { "git": { - "remote": "https://github.com/grafana/grafonnet-lib", + "remote": "https://github.com/grafana/grafonnet-lib.git", "subdir": "grafonnet" } }, - "version": "906768d46973e022594d3f03d82c5a51d86de2cc", - "sum": "J3Vp0EVbxTObr6KydLXsi4Rc0ssNVAEuwLc0NQ+4wqU=" + "version": "8fb95bd89990e493a8534205ee636bfcb8db67bd", + "sum": "tDuuSKE9f4Ew2bjBM33Rs6behLEAzkmKkShSt+jpAak=" }, { "source": { "git": { - "remote": "https://github.com/grafana/jsonnet-libs", + "remote": "https://github.com/grafana/jsonnet-libs.git", "subdir": "grafana-builder" } }, - "version": "cb6bc2780a39afbbf9d4ee64fec8d1152023aee9", + "version": "881db2241f0c5007c3e831caf34b0c645202b4ab", "sum": "slxrtftVDiTlQK22ertdfrg4Epnq97gdrLI63ftUfaE=" }, { "source": { "git": { - "remote": "https://github.com/ksonnet/ksonnet-lib", + "remote": "https://github.com/ksonnet/ksonnet-lib.git", "subdir": "" } }, @@ -65,62 +65,62 @@ { "source": { "git": { - "remote": "https://github.com/kubernetes-monitoring/kubernetes-mixin", + "remote": "https://github.com/kubernetes-monitoring/kubernetes-mixin.git", "subdir": "" } }, - "version": "3cc34f995c31ed6e1e92024fed1912d63569c39f", - "sum": "r5Fg4KgiBtsFPCCHtM3Cb4CEgnizLyK97srDNAcjr+Y=" + "version": "b61c5a34051f8f57284a08fe78ad8a45b430252b", + "sum": "7Hx/5eNm7ubLTsdrpk3b2+e/FLR3XOa4HCukmbRUCAY=" }, { "source": { "git": { - "remote": "https://github.com/kubernetes-monitoring/kubernetes-mixin", + "remote": "https://github.com/kubernetes-monitoring/kubernetes-mixin.git", "subdir": "lib/promgrafonnet" } }, - "version": "3cc34f995c31ed6e1e92024fed1912d63569c39f", + "version": "b61c5a34051f8f57284a08fe78ad8a45b430252b", "sum": "VhgBM39yv0f4bKv8VfGg4FXkg573evGDRalip9ypKbc=" }, { "source": { "git": { - "remote": "https://github.com/kubernetes/kube-state-metrics", + "remote": "https://github.com/kubernetes/kube-state-metrics.git", "subdir": "jsonnet/kube-state-metrics" } }, - "version": "52fe3a268bd78c8f32a03361e28fdf23c41512c5", + "version": "d667979ed55ad1c4db44d331b51d646f5b903aa7", "sum": "cJjGZaLBjcIGrLHZLjRPU9c3KL+ep9rZTb9dbALSKqA=" }, { "source": { "git": { - "remote": "https://github.com/kubernetes/kube-state-metrics", + "remote": "https://github.com/kubernetes/kube-state-metrics.git", "subdir": "jsonnet/kube-state-metrics-mixin" } }, - "version": "52fe3a268bd78c8f32a03361e28fdf23c41512c5", - "sum": "E1GGavnf9PCWBm4WVrxWnc0FIj72UcbcweqGioWrOdU=" + "version": "d667979ed55ad1c4db44d331b51d646f5b903aa7", + "sum": "o5avaguRsfFwYFNen00ZEsub1x4i8Z/ZZ2QoEjFMff8=" }, { "source": { "git": { - "remote": "https://github.com/prometheus/node_exporter", + "remote": "https://github.com/prometheus/node_exporter.git", "subdir": "docs/node-mixin" } }, - "version": "d4d2e1db98152ab6c94dc9a12a997950e0be2416", - "sum": "ZwrC0+4o1xD6+oPBu1p+rBXLlf6pMBD9rT8ygyl2aW0=" + "version": "08ce3c6dd430deb51798826701a395e460620d60", + "sum": "3jFV2qsc/GZe2GADswTYqxxP2zGOiANTj73W/VNFGqc=" }, { "source": { "git": { - "remote": "https://github.com/prometheus/prometheus", + "remote": "https://github.com/prometheus/prometheus.git", "subdir": "documentation/prometheus-mixin" } }, - "version": "209d4bb8a1491f4535cc6d991681e7dc03bb1d56", - "sum": "kRb3XBTe/AALDcaTFfyuiKqzhxtLvihBkVkvJ5cUd/I=", + "version": "8d3c2f6829d73be15a6684f9324917e72fbf1a31", + "sum": "lEzhZ8gllSfAO4kmXeTwl4W0anapIeFd5GCaCNuDe18=", "name": "prometheus" }, { diff --git a/manifests/grafana-dashboardDefinitions.yaml b/manifests/grafana-dashboardDefinitions.yaml index ca9ea0033078325dbf160ef17a7d322587188594..8033c7d295bde48f2ce6917e93a213799fbb801c 100644 --- a/manifests/grafana-dashboardDefinitions.yaml +++ b/manifests/grafana-dashboardDefinitions.yaml @@ -22,6 +22,24 @@ items: "id": null, "links": [ + ], + "panels": [ + { + "content": "The SLO (service level objective) and other metrics displayed on this dashboard are for informational purposes only.", + "datasource": null, + "description": "The SLO (service level objective) and other metrics displayed on this dashboard are for informational purposes only.", + "gridPos": { + "h": 2, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 2, + "mode": "markdown", + "span": 12, + "title": "Notice", + "type": "text" + } ], "refresh": "10s", "rows": [ @@ -40,6 +58,7 @@ items: ], "datasource": "$datasource", "decimals": 3, + "description": "How many percent of requests (both read and write) in 30 days have been answered successfully and fast enough?", "format": "percentunit", "gauge": { "maxValue": 100, @@ -51,7 +70,7 @@ items: "gridPos": { }, - "id": 2, + "id": 3, "interval": null, "links": [ @@ -99,7 +118,7 @@ items: } ], "thresholds": "", - "title": "Availability (30d) > 99.000", + "title": "Availability (30d) > 99.000%", "tooltip": { "shared": false }, @@ -123,11 +142,12 @@ items: "dashes": false, "datasource": "$datasource", "decimals": 3, - "fill": 1, + "description": "How much error budget is left looking at our 0.990% availability gurantees?", + "fill": 10, "gridPos": { }, - "id": 3, + "id": 4, "legend": { "alignAsTable": false, "avg": false, @@ -136,6 +156,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -171,7 +192,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "ErrorBudget (30d) > 99.000", + "title": "ErrorBudget (30d) > 99.000%", "tooltip": { "shared": false, "sort": 0, @@ -232,6 +253,7 @@ items: ], "datasource": "$datasource", "decimals": 3, + "description": "How many percent of read requests (LIST,GET) in 30 days have been answered successfully and fast enough?", "format": "percentunit", "gauge": { "maxValue": 100, @@ -243,7 +265,7 @@ items: "gridPos": { }, - "id": 4, + "id": 5, "interval": null, "links": [ @@ -314,11 +336,12 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, + "description": "How many read requests (LIST,GET) per second do the apiservers get by code?", + "fill": 10, "gridPos": { }, - "id": 5, + "id": 6, "legend": { "alignAsTable": false, "avg": false, @@ -327,6 +350,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -342,18 +366,33 @@ items: "renderer": "flot", "repeat": null, "seriesOverrides": [ - + { + "alias": "/2../i", + "color": "#56A64B" + }, + { + "alias": "/3../i", + "color": "#F2CC0C" + }, + { + "alias": "/4../i", + "color": "#3274D9" + }, + { + "alias": "/5../i", + "color": "#E02F44" + } ], "spaceLength": 10, "span": 3, - "stack": false, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(code_resource:apiserver_request_total:rate5m{verb=\"read\"})", + "expr": "sum by (code) (code_resource:apiserver_request_total:rate5m{verb=\"read\"})", "format": "time_series", "intervalFactor": 2, - "legendFormat": "", + "legendFormat": "{{ code }}", "refId": "A" } ], @@ -405,11 +444,12 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", + "description": "How many percent of read requests (LIST,GET) per second are returned with errors (5xx)?", "fill": 1, "gridPos": { }, - "id": 6, + "id": 7, "legend": { "alignAsTable": false, "avg": false, @@ -418,6 +458,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -475,7 +516,7 @@ items: "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true }, { @@ -483,7 +524,7 @@ items: "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true } ] @@ -496,11 +537,12 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", + "description": "How many seconds is the 99th percentile for reading (LIST|GET) a given resource?", "fill": 1, "gridPos": { }, - "id": 7, + "id": 8, "legend": { "alignAsTable": false, "avg": false, @@ -509,6 +551,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -603,6 +646,7 @@ items: ], "datasource": "$datasource", "decimals": 3, + "description": "How many percent of write requests (POST|PUT|PATCH|DELETE) in 30 days have been answered successfully and fast enough?", "format": "percentunit", "gauge": { "maxValue": 100, @@ -614,7 +658,7 @@ items: "gridPos": { }, - "id": 8, + "id": 9, "interval": null, "links": [ @@ -685,11 +729,12 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, + "description": "How many write requests (POST|PUT|PATCH|DELETE) per second do the apiservers get by code?", + "fill": 10, "gridPos": { }, - "id": 9, + "id": 10, "legend": { "alignAsTable": false, "avg": false, @@ -698,6 +743,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -713,18 +759,33 @@ items: "renderer": "flot", "repeat": null, "seriesOverrides": [ - + { + "alias": "/2../i", + "color": "#56A64B" + }, + { + "alias": "/3../i", + "color": "#F2CC0C" + }, + { + "alias": "/4../i", + "color": "#3274D9" + }, + { + "alias": "/5../i", + "color": "#E02F44" + } ], "spaceLength": 10, "span": 3, - "stack": false, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(code_resource:apiserver_request_total:rate5m{verb=\"write\"})", + "expr": "sum by (code) (code_resource:apiserver_request_total:rate5m{verb=\"write\"})", "format": "time_series", "intervalFactor": 2, - "legendFormat": "", + "legendFormat": "{{ code }}", "refId": "A" } ], @@ -776,11 +837,12 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", + "description": "How many percent of write requests (POST|PUT|PATCH|DELETE) per second are returned with errors (5xx)?", "fill": 1, "gridPos": { }, - "id": 10, + "id": 11, "legend": { "alignAsTable": false, "avg": false, @@ -789,6 +851,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -846,7 +909,7 @@ items: "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true }, { @@ -854,7 +917,7 @@ items: "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true } ] @@ -867,11 +930,12 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", + "description": "How many seconds is the 99th percentile for writing (POST|PUT|PATCH|DELETE) a given resource?", "fill": 1, "gridPos": { }, - "id": 11, + "id": 12, "legend": { "alignAsTable": false, "avg": false, @@ -880,6 +944,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -963,90 +1028,6 @@ items: "collapse": false, "collapsed": false, "panels": [ - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - - }, - "id": 12, - "interval": null, - "links": [ - - ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 2, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "sum(up{job=\"apiserver\", cluster=\"$cluster\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": "", - "title": "Up", - "tooltip": { - "shared": false - }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "min" - }, { "aliasColors": { @@ -1060,222 +1041,6 @@ items: }, "id": 13, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 5, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(apiserver_request_total{job=\"apiserver\", instance=~\"$instance\",code=~\"2..\", cluster=\"$cluster\"}[5m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "2xx", - "refId": "A" - }, - { - "expr": "sum(rate(apiserver_request_total{job=\"apiserver\", instance=~\"$instance\",code=~\"3..\", cluster=\"$cluster\"}[5m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "3xx", - "refId": "B" - }, - { - "expr": "sum(rate(apiserver_request_total{job=\"apiserver\", instance=~\"$instance\",code=~\"4..\", cluster=\"$cluster\"}[5m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "4xx", - "refId": "C" - }, - { - "expr": "sum(rate(apiserver_request_total{job=\"apiserver\", instance=~\"$instance\",code=~\"5..\", cluster=\"$cluster\"}[5m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "5xx", - "refId": "D" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "RPC Rate", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "ops", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "ops", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { - - }, - "id": 14, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 5, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\", instance=~\"$instance\", verb!=\"WATCH\", cluster=\"$cluster\"}[5m])) by (verb, le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{verb}}", - "refId": "A" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Request duration 99th quantile", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": false, - "collapsed": false, - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { - - }, - "id": 15, "legend": { "alignAsTable": false, "avg": false, @@ -1284,6 +1049,7 @@ items: "min": false, "rightSide": false, "show": false, + "sideWidth": null, "total": false, "values": false }, @@ -1302,7 +1068,7 @@ items: ], "spaceLength": 10, - "span": 6, + "span": 4, "stack": false, "steppedLine": false, "targets": [ @@ -1366,7 +1132,7 @@ items: "gridPos": { }, - "id": 16, + "id": 14, "legend": { "alignAsTable": false, "avg": false, @@ -1375,6 +1141,7 @@ items: "min": false, "rightSide": false, "show": false, + "sideWidth": null, "total": false, "values": false }, @@ -1393,7 +1160,7 @@ items: ], "spaceLength": 10, - "span": 6, + "span": 4, "stack": false, "steppedLine": false, "targets": [ @@ -1457,7 +1224,7 @@ items: "gridPos": { }, - "id": 17, + "id": 15, "legend": { "alignAsTable": true, "avg": false, @@ -1466,6 +1233,7 @@ items: "min": false, "rightSide": true, "show": true, + "sideWidth": null, "total": false, "values": true }, @@ -1484,7 +1252,7 @@ items: ], "spaceLength": 10, - "span": 12, + "span": 4, "stack": false, "steppedLine": false, "targets": [ @@ -1561,7 +1329,7 @@ items: "gridPos": { }, - "id": 18, + "id": 16, "legend": { "alignAsTable": false, "avg": false, @@ -1570,6 +1338,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -1652,7 +1421,7 @@ items: "gridPos": { }, - "id": 19, + "id": 17, "legend": { "alignAsTable": false, "avg": false, @@ -1661,6 +1430,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -1750,7 +1520,7 @@ items: "gridPos": { }, - "id": 20, + "id": 18, "legend": { "alignAsTable": false, "avg": false, @@ -1759,6 +1529,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -1861,7 +1632,7 @@ items: "gridPos": { }, - "id": 21, + "id": 19, "legend": { "alignAsTable": false, "avg": false, @@ -1870,6 +1641,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -1952,7 +1724,7 @@ items: "gridPos": { }, - "id": 22, + "id": 20, "legend": { "alignAsTable": false, "avg": false, @@ -1961,6 +1733,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -2043,7 +1816,7 @@ items: "gridPos": { }, - "id": 23, + "id": 21, "legend": { "alignAsTable": false, "avg": false, @@ -2052,6 +1825,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -2239,7 +2013,7 @@ items: "30d" ] }, - "timezone": "", + "timezone": "UTC", "title": "Kubernetes / API server", "uid": "09ec8aa1e996d6ffcd6817bbaff4db1b", "version": 0 @@ -2327,6 +2101,7 @@ items: "min": false, "rightSide": true, "show": true, + "sideWidth": null, "sort": "current", "sortDesc": true, "total": false, @@ -2428,6 +2203,7 @@ items: "min": false, "rightSide": true, "show": true, + "sideWidth": null, "sort": "current", "sortDesc": true, "total": false, @@ -2867,6 +2643,7 @@ items: "min": false, "rightSide": true, "show": true, + "sideWidth": null, "sort": "current", "sortDesc": true, "total": false, @@ -2968,6 +2745,7 @@ items: "min": false, "rightSide": true, "show": true, + "sideWidth": null, "sort": "current", "sortDesc": true, "total": false, @@ -3099,6 +2877,7 @@ items: "min": true, "rightSide": true, "show": true, + "sideWidth": null, "total": false, "values": true }, @@ -3198,6 +2977,7 @@ items: "min": true, "rightSide": true, "show": true, + "sideWidth": null, "total": false, "values": true }, @@ -3308,6 +3088,7 @@ items: "min": true, "rightSide": true, "show": true, + "sideWidth": null, "total": false, "values": true }, @@ -3407,6 +3188,7 @@ items: "min": true, "rightSide": true, "show": true, + "sideWidth": null, "total": false, "values": true }, @@ -3526,6 +3308,7 @@ items: "min": true, "rightSide": true, "show": true, + "sideWidth": null, "total": false, "values": true }, @@ -3625,6 +3408,7 @@ items: "min": true, "rightSide": true, "show": true, + "sideWidth": null, "total": false, "values": true }, @@ -3724,6 +3508,7 @@ items: "min": true, "rightSide": true, "show": true, + "sideWidth": null, "total": false, "values": true }, @@ -3827,6 +3612,7 @@ items: "min": true, "rightSide": true, "show": true, + "sideWidth": null, "total": false, "values": true }, @@ -4052,7 +3838,7 @@ items: "30d" ] }, - "timezone": "", + "timezone": "UTC", "title": "Kubernetes / Networking / Cluster", "uid": "ff635a025bcfea7bc3dd4f508990a3e9", "version": 0 @@ -4195,6 +3981,7 @@ items: "min": false, "rightSide": true, "show": true, + "sideWidth": null, "total": false, "values": true }, @@ -4299,6 +4086,7 @@ items: "min": false, "rightSide": true, "show": true, + "sideWidth": null, "total": false, "values": true }, @@ -4403,6 +4191,7 @@ items: "min": false, "rightSide": true, "show": true, + "sideWidth": null, "total": false, "values": true }, @@ -4507,6 +4296,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -4619,6 +4409,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -4723,6 +4514,7 @@ items: "min": false, "rightSide": true, "show": true, + "sideWidth": null, "total": false, "values": true }, @@ -4827,6 +4619,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -4918,6 +4711,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -5009,6 +4803,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -5169,7 +4964,7 @@ items: "30d" ] }, - "timezone": "", + "timezone": "UTC", "title": "Kubernetes / Controller Manager", "uid": "72e0e05bef5099e5f049b05fdc429ed4", "version": 0 @@ -5211,6 +5006,7 @@ items: "fill": 1, "format": "percentunit", "id": 1, + "interval": "1m", "legend": { "avg": false, "current": false, @@ -6019,7 +5815,7 @@ items: ], "targets": [ { - "expr": "count(mixin_pod_workload{cluster=\"$cluster\"}) by (namespace)", + "expr": "sum(kube_pod_owner{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -6438,7 +6234,7 @@ items: ], "targets": [ { - "expr": "count(mixin_pod_workload{cluster=\"$cluster\"}) by (namespace)", + "expr": "sum(kube_pod_owner{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -6564,6 +6360,7 @@ items: "datasource": "$datasource", "fill": 1, "id": 11, + "interval": "1m", "legend": { "avg": false, "current": false, @@ -7652,33 +7449,6 @@ items: "regex": "", "type": "datasource" }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 2, - "includeAll": false, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ - - ], - "query": "label_values(node_cpu_seconds_total, cluster)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, { "allValue": null, "current": { @@ -7737,7 +7507,7 @@ items: "30d" ] }, - "timezone": "", + "timezone": "UTC", "title": "Kubernetes / Compute Resources / Cluster", "uid": "efa86fd1d0c121a26444b636a3f509a8", "version": 0 @@ -9007,6 +8777,7 @@ items: "datasource": "$datasource", "fill": 1, "id": 9, + "interval": "1m", "legend": { "avg": false, "current": false, @@ -9984,7 +9755,7 @@ items: "30d" ] }, - "timezone": "", + "timezone": "UTC", "title": "Kubernetes / Compute Resources / Namespace (Pods)", "uid": "85a562078cdf77779eaa1add43ccec1e", "version": 0 @@ -10053,7 +9824,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", node=\"$node\"}) by (pod)", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", @@ -10282,7 +10053,7 @@ items: ], "targets": [ { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", node=\"$node\"}) by (pod)", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -10291,7 +10062,7 @@ items: "step": 10 }, { - "expr": "sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", node=\"$node\"}) by (pod)", + "expr": "sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -10300,7 +10071,7 @@ items: "step": 10 }, { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", node=\"$node\"}) by (pod) / sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", node=\"$node\"}) by (pod)", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", node=~\"$node\"}) by (pod) / sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -10309,7 +10080,7 @@ items: "step": 10 }, { - "expr": "sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", node=\"$node\"}) by (pod)", + "expr": "sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -10318,7 +10089,7 @@ items: "step": 10 }, { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", node=\"$node\"}) by (pod) / sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", node=\"$node\"}) by (pod)", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", node=~\"$node\"}) by (pod) / sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -10418,7 +10189,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=\"$node\", container!=\"\"}) by (pod)", + "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\", container!=\"\"}) by (pod)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", @@ -10701,7 +10472,7 @@ items: ], "targets": [ { - "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=\"$node\",container!=\"\"}) by (pod)", + "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -10710,7 +10481,7 @@ items: "step": 10 }, { - "expr": "sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", node=\"$node\"}) by (pod)", + "expr": "sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -10719,7 +10490,7 @@ items: "step": 10 }, { - "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=\"$node\",container!=\"\"}) by (pod) / sum(kube_pod_container_resource_requests_memory_bytes{node=\"$node\"}) by (pod)", + "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod) / sum(kube_pod_container_resource_requests_memory_bytes{node=~\"$node\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -10728,7 +10499,7 @@ items: "step": 10 }, { - "expr": "sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", node=\"$node\"}) by (pod)", + "expr": "sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -10737,7 +10508,7 @@ items: "step": 10 }, { - "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=\"$node\",container!=\"\"}) by (pod) / sum(kube_pod_container_resource_limits_memory_bytes{node=\"$node\"}) by (pod)", + "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod) / sum(kube_pod_container_resource_limits_memory_bytes{node=~\"$node\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -10746,7 +10517,7 @@ items: "step": 10 }, { - "expr": "sum(node_namespace_pod_container:container_memory_rss{cluster=\"$cluster\", node=\"$node\",container!=\"\"}) by (pod)", + "expr": "sum(node_namespace_pod_container:container_memory_rss{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -10755,7 +10526,7 @@ items: "step": 10 }, { - "expr": "sum(node_namespace_pod_container:container_memory_cache{cluster=\"$cluster\", node=\"$node\",container!=\"\"}) by (pod)", + "expr": "sum(node_namespace_pod_container:container_memory_cache{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -10764,7 +10535,7 @@ items: "step": 10 }, { - "expr": "sum(node_namespace_pod_container:container_memory_swap{cluster=\"$cluster\", node=\"$node\",container!=\"\"}) by (pod)", + "expr": "sum(node_namespace_pod_container:container_memory_swap{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -10883,7 +10654,7 @@ items: "hide": 0, "includeAll": false, "label": null, - "multi": false, + "multi": true, "name": "node", "options": [ @@ -10931,7 +10702,7 @@ items: "30d" ] }, - "timezone": "", + "timezone": "UTC", "title": "Kubernetes / Compute Resources / Node (Pods)", "uid": "200ac8fdbfbb74b39aff88118e4d1c2c", "version": 0 @@ -11956,6 +11727,7 @@ items: "datasource": "$datasource", "fill": 10, "id": 6, + "interval": "1m", "legend": { "avg": false, "current": false, @@ -12054,6 +11826,7 @@ items: "datasource": "$datasource", "fill": 10, "id": 7, + "interval": "1m", "legend": { "avg": false, "current": false, @@ -12152,6 +11925,7 @@ items: "datasource": "$datasource", "fill": 10, "id": 8, + "interval": "1m", "legend": { "avg": false, "current": false, @@ -12250,6 +12024,7 @@ items: "datasource": "$datasource", "fill": 10, "id": 9, + "interval": "1m", "legend": { "avg": false, "current": false, @@ -12348,6 +12123,7 @@ items: "datasource": "$datasource", "fill": 10, "id": 10, + "interval": "1m", "legend": { "avg": false, "current": false, @@ -12446,6 +12222,7 @@ items: "datasource": "$datasource", "fill": 10, "id": 11, + "interval": "1m", "legend": { "avg": false, "current": false, @@ -12666,7 +12443,7 @@ items: "30d" ] }, - "timezone": "", + "timezone": "UTC", "title": "Kubernetes / Compute Resources / Pod", "uid": "6581e46e4e5c7ba40a07646395ef7b23", "version": 0 @@ -13437,6 +13214,7 @@ items: "datasource": "$datasource", "fill": 1, "id": 5, + "interval": "1m", "legend": { "avg": false, "current": false, @@ -14664,7 +14442,7 @@ items: "30d" ] }, - "timezone": "", + "timezone": "UTC", "title": "Kubernetes / Compute Resources / Workload", "uid": "a164a7f0339f99e89cea5cb47e9be617", "version": 0 @@ -15595,6 +15373,7 @@ items: "datasource": "$datasource", "fill": 1, "id": 5, + "interval": "1m", "legend": { "avg": false, "current": false, @@ -16621,7 +16400,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod) \ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "expr": "(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{workload}}", @@ -16818,7 +16597,7 @@ items: "30d" ] }, - "timezone": "", + "timezone": "UTC", "title": "Kubernetes / Compute Resources / Namespace (Workloads)", "uid": "a87fb0d919ec0ea5f6543124e16c42a5", "version": 0 @@ -17394,6 +17173,7 @@ items: "min": false, "rightSide": true, "show": true, + "sideWidth": null, "total": false, "values": true }, @@ -17485,6 +17265,7 @@ items: "min": false, "rightSide": true, "show": true, + "sideWidth": null, "total": false, "values": true }, @@ -17589,6 +17370,7 @@ items: "min": false, "rightSide": true, "show": true, + "sideWidth": null, "total": false, "values": true }, @@ -17693,6 +17475,7 @@ items: "min": false, "rightSide": true, "show": true, + "sideWidth": null, "total": false, "values": true }, @@ -17791,6 +17574,7 @@ items: "min": false, "rightSide": true, "show": true, + "sideWidth": null, "total": false, "values": true }, @@ -17904,6 +17688,7 @@ items: "min": false, "rightSide": true, "show": true, + "sideWidth": null, "total": false, "values": true }, @@ -17997,6 +17782,7 @@ items: "min": false, "rightSide": true, "show": true, + "sideWidth": null, "total": false, "values": true }, @@ -18103,6 +17889,7 @@ items: "min": false, "rightSide": true, "show": true, + "sideWidth": null, "total": false, "values": true }, @@ -18207,6 +17994,7 @@ items: "min": false, "rightSide": true, "show": true, + "sideWidth": null, "total": false, "values": true }, @@ -18298,6 +18086,7 @@ items: "min": false, "rightSide": true, "show": true, + "sideWidth": null, "total": false, "values": true }, @@ -18403,6 +18192,7 @@ items: "min": false, "rightSide": true, "show": true, + "sideWidth": null, "total": false, "values": true }, @@ -18494,6 +18284,7 @@ items: "min": false, "rightSide": true, "show": true, + "sideWidth": null, "total": false, "values": true }, @@ -18598,6 +18389,7 @@ items: "min": false, "rightSide": true, "show": true, + "sideWidth": null, "total": false, "values": true }, @@ -18702,6 +18494,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -18827,6 +18620,7 @@ items: "min": false, "rightSide": true, "show": true, + "sideWidth": null, "total": false, "values": true }, @@ -18931,6 +18725,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -19022,6 +18817,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -19113,6 +18909,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -19299,7 +19096,7 @@ items: "30d" ] }, - "timezone": "", + "timezone": "UTC", "title": "Kubernetes / Kubelet", "uid": "3138fa155d5915769fbded898ac09fd9", "version": 0 @@ -19927,6 +19724,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -20026,6 +19824,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -20136,6 +19935,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -20235,6 +20035,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -20354,6 +20155,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -20453,6 +20255,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -20706,7 +20509,7 @@ items: "30d" ] }, - "timezone": "", + "timezone": "UTC", "title": "Kubernetes / Networking / Namespace (Pods)", "uid": "8b7a8b326d7a6f1f04244066368c67af", "version": 0 @@ -20794,6 +20597,7 @@ items: "min": false, "rightSide": true, "show": true, + "sideWidth": null, "sort": "current", "sortDesc": true, "total": false, @@ -20895,6 +20699,7 @@ items: "min": false, "rightSide": true, "show": true, + "sideWidth": null, "sort": "current", "sortDesc": true, "total": false, @@ -21334,6 +21139,7 @@ items: "min": false, "rightSide": true, "show": true, + "sideWidth": null, "sort": "current", "sortDesc": true, "total": false, @@ -21435,6 +21241,7 @@ items: "min": false, "rightSide": true, "show": true, + "sideWidth": null, "sort": "current", "sortDesc": true, "total": false, @@ -21566,6 +21373,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -21665,6 +21473,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -21775,6 +21584,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -21874,6 +21684,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -21993,6 +21804,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -22092,6 +21904,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -22377,7 +22190,7 @@ items: "30d" ] }, - "timezone": "", + "timezone": "UTC", "title": "Kubernetes / Networking / Namespace (Workload)", "uid": "bbb2a765a623ae38130206c7d94a160f", "version": 0 @@ -23325,7 +23138,7 @@ items: "30d" ] }, - "timezone": "", + "timezone": "UTC", "title": "USE Method / Cluster", "uid": "3e97d1d02672cdd0861f4c97c64f89b2", "version": 0 @@ -24300,7 +24113,7 @@ items: "30d" ] }, - "timezone": "", + "timezone": "UTC", "title": "USE Method / Node", "uid": "fac67cfbe174d3ef53eb473d73d9212f", "version": 0 @@ -24359,6 +24172,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -24451,6 +24265,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -24576,6 +24391,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -24785,6 +24601,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -24900,6 +24717,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -25018,6 +24836,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -25110,6 +24929,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -25271,7 +25091,7 @@ items: "30d" ] }, - "timezone": "", + "timezone": "UTC", "title": "Nodes", "uid": "fa49a4706d07a042595b664c87fb33ea", "version": 0 @@ -25330,6 +25150,7 @@ items: "min": true, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": true }, @@ -25525,6 +25346,7 @@ items: "min": true, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": true }, @@ -25828,7 +25650,7 @@ items: "30d" ] }, - "timezone": "", + "timezone": "UTC", "title": "Kubernetes / Persistent Volumes", "uid": "919b92a8e8041bd567af9edab12c840c", "version": 0 @@ -26191,6 +26013,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -26290,6 +26113,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -26400,6 +26224,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -26499,6 +26324,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -26618,6 +26444,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -26717,6 +26544,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -27002,7 +26830,7 @@ items: "30d" ] }, - "timezone": "", + "timezone": "UTC", "title": "Kubernetes / Networking / Pod", "uid": "7a18067ce943a40ae25454675c19ff5c", "version": 0 @@ -27061,6 +26889,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -27152,6 +26981,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -27256,6 +27086,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -27360,6 +27191,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -27452,6 +27284,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -27543,6 +27376,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -27634,6 +27468,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -27738,6 +27573,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -27829,6 +27665,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -27933,6 +27770,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -28024,6 +27862,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -28128,6 +27967,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -28219,6 +28059,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -28310,6 +28151,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -28401,6 +28243,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -29975,6 +29818,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -30066,6 +29910,7 @@ items: "min": false, "rightSide": true, "show": true, + "sideWidth": null, "total": false, "values": true }, @@ -30170,6 +30015,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -30261,6 +30107,7 @@ items: "min": false, "rightSide": true, "show": true, + "sideWidth": null, "total": false, "values": true }, @@ -30365,6 +30212,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -30477,6 +30325,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -30581,6 +30430,7 @@ items: "min": false, "rightSide": true, "show": true, + "sideWidth": null, "total": false, "values": true }, @@ -30685,6 +30535,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -30776,6 +30627,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -30867,6 +30719,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -31027,7 +30880,7 @@ items: "30d" ] }, - "timezone": "", + "timezone": "UTC", "title": "Kubernetes / Proxy", "uid": "632e265de029684c40b21cb76bca4f94", "version": 0 @@ -31170,6 +31023,7 @@ items: "min": false, "rightSide": true, "show": true, + "sideWidth": null, "total": false, "values": true }, @@ -31282,6 +31136,7 @@ items: "min": false, "rightSide": true, "show": true, + "sideWidth": null, "total": false, "values": true }, @@ -31407,6 +31262,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -31519,6 +31375,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -31623,6 +31480,7 @@ items: "min": false, "rightSide": true, "show": true, + "sideWidth": null, "total": false, "values": true }, @@ -31727,6 +31585,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -31818,6 +31677,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -31909,6 +31769,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -32069,7 +31930,7 @@ items: "30d" ] }, - "timezone": "", + "timezone": "UTC", "title": "Kubernetes / Scheduler", "uid": "2e6b6a3b4bddf1427b3a55aa1311c656", "version": 0 @@ -32740,6 +32601,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -32979,7 +32841,7 @@ items: "30d" ] }, - "timezone": "", + "timezone": "UTC", "title": "Kubernetes / StatefulSets", "uid": "a31c1f46e6f727cb37c0d731a7245005", "version": 0 @@ -33067,6 +32929,7 @@ items: "min": false, "rightSide": true, "show": true, + "sideWidth": null, "sort": "current", "sortDesc": true, "total": false, @@ -33168,6 +33031,7 @@ items: "min": false, "rightSide": true, "show": true, + "sideWidth": null, "sort": "current", "sortDesc": true, "total": false, @@ -33280,6 +33144,7 @@ items: "min": false, "rightSide": true, "show": true, + "sideWidth": null, "sort": "current", "sortDesc": true, "total": false, @@ -33381,6 +33246,7 @@ items: "min": false, "rightSide": true, "show": true, + "sideWidth": null, "sort": "current", "sortDesc": true, "total": false, @@ -33512,6 +33378,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -33611,6 +33478,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -33721,6 +33589,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -33820,6 +33689,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -33939,6 +33809,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -34038,6 +33909,7 @@ items: "min": false, "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -34355,7 +34227,7 @@ items: "30d" ] }, - "timezone": "", + "timezone": "UTC", "title": "Kubernetes / Networking / Workload", "uid": "728bf77cc1166d2f3133bf25846876cc", "version": 0 diff --git a/manifests/prometheus-operator-serviceMonitor.yaml b/manifests/prometheus-operator-serviceMonitor.yaml index 3d7f7e572cbada5c4ec639d1f474a3b9320045fc..39e48aa775489eea6bd1daf3f1656328377bc4d7 100644 --- a/manifests/prometheus-operator-serviceMonitor.yaml +++ b/manifests/prometheus-operator-serviceMonitor.yaml @@ -4,7 +4,7 @@ metadata: labels: app.kubernetes.io/component: controller app.kubernetes.io/name: prometheus-operator - app.kubernetes.io/version: v0.39.0 + app.kubernetes.io/version: v0.40.0 name: prometheus-operator namespace: monitoring spec: @@ -19,4 +19,4 @@ spec: matchLabels: app.kubernetes.io/component: controller app.kubernetes.io/name: prometheus-operator - app.kubernetes.io/version: v0.39.0 + app.kubernetes.io/version: v0.40.0 diff --git a/manifests/prometheus-rules.yaml b/manifests/prometheus-rules.yaml index 34b445e801944d9ed9055ec0d5ee272fadb9a857..141fc8ab15568415515838a17c74ff00d1c35610 100644 --- a/manifests/prometheus-rules.yaml +++ b/manifests/prometheus-rules.yaml @@ -74,7 +74,7 @@ spec: sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[1d])) - ( - sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="resource",le="0.1"}[1d])) + + sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[1d])) + sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[1d])) + sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[1d])) ) @@ -95,7 +95,7 @@ spec: sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[1h])) - ( - sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="resource",le="0.1"}[1h])) + + sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[1h])) + sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[1h])) + sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[1h])) ) @@ -116,7 +116,7 @@ spec: sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[2h])) - ( - sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="resource",le="0.1"}[2h])) + + sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[2h])) + sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[2h])) + sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[2h])) ) @@ -137,7 +137,7 @@ spec: sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[30m])) - ( - sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="resource",le="0.1"}[30m])) + + sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[30m])) + sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[30m])) + sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[30m])) ) @@ -158,7 +158,7 @@ spec: sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[3d])) - ( - sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="resource",le="0.1"}[3d])) + + sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[3d])) + sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[3d])) + sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[3d])) ) @@ -179,7 +179,7 @@ spec: sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[5m])) - ( - sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="resource",le="0.1"}[5m])) + + sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[5m])) + sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[5m])) + sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[5m])) ) @@ -200,7 +200,7 @@ spec: sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[6h])) - ( - sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="resource",le="0.1"}[6h])) + + sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[6h])) + sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[6h])) + sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[6h])) ) @@ -326,6 +326,51 @@ spec: labels: verb: write record: apiserver_request:burnrate6h + - expr: | + sum by (code,resource) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[5m])) + labels: + verb: read + record: code_resource:apiserver_request_total:rate5m + - expr: | + sum by (code,resource) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m])) + labels: + verb: write + record: code_resource:apiserver_request_total:rate5m + - expr: | + histogram_quantile(0.99, sum by (le, resource) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET"}[5m]))) > 0 + labels: + quantile: "0.99" + verb: read + record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile + - expr: | + histogram_quantile(0.99, sum by (le, resource) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m]))) > 0 + labels: + quantile: "0.99" + verb: write + record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile + - expr: | + sum(rate(apiserver_request_duration_seconds_sum{subresource!="log",verb!~"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT"}[5m])) without(instance, pod) + / + sum(rate(apiserver_request_duration_seconds_count{subresource!="log",verb!~"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT"}[5m])) without(instance, pod) + record: cluster:apiserver_request_duration_seconds:mean5m + - expr: | + histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",subresource!="log",verb!~"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT"}[5m])) without(instance, pod)) + labels: + quantile: "0.99" + record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile + - expr: | + histogram_quantile(0.9, sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",subresource!="log",verb!~"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT"}[5m])) without(instance, pod)) + labels: + quantile: "0.9" + record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile + - expr: | + histogram_quantile(0.5, sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",subresource!="log",verb!~"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT"}[5m])) without(instance, pod)) + labels: + quantile: "0.5" + record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile + - interval: 3m + name: kube-apiserver-availability.rules + rules: - expr: | 1 - ( ( @@ -339,13 +384,13 @@ spec: sum(increase(apiserver_request_duration_seconds_count{verb=~"LIST|GET"}[30d])) - ( - sum(increase(apiserver_request_duration_seconds_bucket{verb=~"LIST|GET",scope="resource",le="0.1"}[30d])) + + sum(increase(apiserver_request_duration_seconds_bucket{verb=~"LIST|GET",scope=~"resource|",le="0.1"}[30d])) + sum(increase(apiserver_request_duration_seconds_bucket{verb=~"LIST|GET",scope="namespace",le="0.5"}[30d])) + sum(increase(apiserver_request_duration_seconds_bucket{verb=~"LIST|GET",scope="cluster",le="5"}[30d])) ) ) + # errors - sum(code:apiserver_request_total:increase30d{code=~"5.."}) + sum(code:apiserver_request_total:increase30d{code=~"5.."} or vector(0)) ) / sum(code:apiserver_request_total:increase30d) @@ -358,13 +403,13 @@ spec: - ( # too slow - sum(increase(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="resource",le="0.1"}[30d])) + + sum(increase(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[30d])) + sum(increase(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[30d])) + sum(increase(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[30d])) ) + # errors - sum(code:apiserver_request_total:increase30d{verb="read",code=~"5.."}) + sum(code:apiserver_request_total:increase30d{verb="read",code=~"5.."} or vector(0)) ) / sum(code:apiserver_request_total:increase30d{verb="read"}) @@ -381,7 +426,7 @@ spec: ) + # errors - sum(code:apiserver_request_total:increase30d{verb="write",code=~"5.."}) + sum(code:apiserver_request_total:increase30d{verb="write",code=~"5.."} or vector(0)) ) / sum(code:apiserver_request_total:increase30d{verb="write"}) @@ -389,60 +434,87 @@ spec: verb: write record: apiserver_request:availability30d - expr: | - sum by (code, verb) (increase(apiserver_request_total{job="apiserver"}[30d])) + sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"2.."}[30d])) record: code_verb:apiserver_request_total:increase30d - expr: | - sum by (code) (code_verb:apiserver_request_total:increase30d{verb=~"LIST|GET"}) - labels: - verb: read - record: code:apiserver_request_total:increase30d + sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"2.."}[30d])) + record: code_verb:apiserver_request_total:increase30d - expr: | - sum by (code) (code_verb:apiserver_request_total:increase30d{verb=~"POST|PUT|PATCH|DELETE"}) - labels: - verb: write - record: code:apiserver_request_total:increase30d + sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"2.."}[30d])) + record: code_verb:apiserver_request_total:increase30d - expr: | - sum by (code,resource) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[5m])) - labels: - verb: read - record: code_resource:apiserver_request_total:rate5m + sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"2.."}[30d])) + record: code_verb:apiserver_request_total:increase30d - expr: | - sum by (code,resource) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m])) - labels: - verb: write - record: code_resource:apiserver_request_total:rate5m + sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"2.."}[30d])) + record: code_verb:apiserver_request_total:increase30d - expr: | - histogram_quantile(0.99, sum by (le, resource) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET"}[5m]))) > 0 - labels: - quantile: "0.99" - verb: read - record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile + sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"2.."}[30d])) + record: code_verb:apiserver_request_total:increase30d - expr: | - histogram_quantile(0.99, sum by (le, resource) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m]))) > 0 - labels: - quantile: "0.99" - verb: write - record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile + sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"3.."}[30d])) + record: code_verb:apiserver_request_total:increase30d - expr: | - sum(rate(apiserver_request_duration_seconds_sum{subresource!="log",verb!~"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT"}[5m])) without(instance, pod) - / - sum(rate(apiserver_request_duration_seconds_count{subresource!="log",verb!~"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT"}[5m])) without(instance, pod) - record: cluster:apiserver_request_duration_seconds:mean5m + sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"3.."}[30d])) + record: code_verb:apiserver_request_total:increase30d - expr: | - histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",subresource!="log",verb!~"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT"}[5m])) without(instance, pod)) - labels: - quantile: "0.99" - record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile + sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"3.."}[30d])) + record: code_verb:apiserver_request_total:increase30d - expr: | - histogram_quantile(0.9, sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",subresource!="log",verb!~"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT"}[5m])) without(instance, pod)) + sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"3.."}[30d])) + record: code_verb:apiserver_request_total:increase30d + - expr: | + sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"3.."}[30d])) + record: code_verb:apiserver_request_total:increase30d + - expr: | + sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"3.."}[30d])) + record: code_verb:apiserver_request_total:increase30d + - expr: | + sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"4.."}[30d])) + record: code_verb:apiserver_request_total:increase30d + - expr: | + sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"4.."}[30d])) + record: code_verb:apiserver_request_total:increase30d + - expr: | + sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"4.."}[30d])) + record: code_verb:apiserver_request_total:increase30d + - expr: | + sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"4.."}[30d])) + record: code_verb:apiserver_request_total:increase30d + - expr: | + sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"4.."}[30d])) + record: code_verb:apiserver_request_total:increase30d + - expr: | + sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"4.."}[30d])) + record: code_verb:apiserver_request_total:increase30d + - expr: | + sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"5.."}[30d])) + record: code_verb:apiserver_request_total:increase30d + - expr: | + sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"5.."}[30d])) + record: code_verb:apiserver_request_total:increase30d + - expr: | + sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"5.."}[30d])) + record: code_verb:apiserver_request_total:increase30d + - expr: | + sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"5.."}[30d])) + record: code_verb:apiserver_request_total:increase30d + - expr: | + sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"5.."}[30d])) + record: code_verb:apiserver_request_total:increase30d + - expr: | + sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"5.."}[30d])) + record: code_verb:apiserver_request_total:increase30d + - expr: | + sum by (code) (code_verb:apiserver_request_total:increase30d{verb=~"LIST|GET"}) labels: - quantile: "0.9" - record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile + verb: read + record: code:apiserver_request_total:increase30d - expr: | - histogram_quantile(0.5, sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",subresource!="log",verb!~"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT"}[5m])) without(instance, pod)) + sum by (code) (code_verb:apiserver_request_total:increase30d{verb=~"POST|PUT|PATCH|DELETE"}) labels: - quantile: "0.5" - record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile + verb: write + record: code:apiserver_request_total:increase30d - name: k8s.rules rules: - expr: | @@ -452,31 +524,31 @@ spec: sum by (cluster, namespace, pod, container) ( rate(container_cpu_usage_seconds_total{job="kubelet", metrics_path="/metrics/cadvisor", image!="", container!="POD"}[5m]) ) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) ( - 1, max by(cluster, namespace, pod, node) (kube_pod_info) + 1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=""}) ) record: node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate - expr: | container_memory_working_set_bytes{job="kubelet", metrics_path="/metrics/cadvisor", image!=""} * on (namespace, pod) group_left(node) topk by(namespace, pod) (1, - max by(namespace, pod, node) (kube_pod_info) + max by(namespace, pod, node) (kube_pod_info{node!=""}) ) record: node_namespace_pod_container:container_memory_working_set_bytes - expr: | container_memory_rss{job="kubelet", metrics_path="/metrics/cadvisor", image!=""} * on (namespace, pod) group_left(node) topk by(namespace, pod) (1, - max by(namespace, pod, node) (kube_pod_info) + max by(namespace, pod, node) (kube_pod_info{node!=""}) ) record: node_namespace_pod_container:container_memory_rss - expr: | container_memory_cache{job="kubelet", metrics_path="/metrics/cadvisor", image!=""} * on (namespace, pod) group_left(node) topk by(namespace, pod) (1, - max by(namespace, pod, node) (kube_pod_info) + max by(namespace, pod, node) (kube_pod_info{node!=""}) ) record: node_namespace_pod_container:container_memory_cache - expr: | container_memory_swap{job="kubelet", metrics_path="/metrics/cadvisor", image!=""} * on (namespace, pod) group_left(node) topk by(namespace, pod) (1, - max by(namespace, pod, node) (kube_pod_info) + max by(namespace, pod, node) (kube_pod_info{node!=""}) ) record: node_namespace_pod_container:container_memory_swap - expr: | @@ -591,12 +663,12 @@ spec: - name: node.rules rules: - expr: | - sum(min(kube_pod_info) by (cluster, node)) + sum(min(kube_pod_info{node!=""}) by (cluster, node)) record: ':kube_pod_info_node_count:' - expr: | topk by(namespace, pod) (1, max by (node, namespace, pod) ( - label_replace(kube_pod_info{job="kube-state-metrics"}, "pod", "$1", "pod", "(.*)") + label_replace(kube_pod_info{job="kube-state-metrics",node!=""}, "pod", "$1", "pod", "(.*)") )) record: 'node_namespace_pod:kube_pod_info:' - expr: | @@ -849,13 +921,22 @@ spec: severity: warning - alert: NodeHighNumberConntrackEntriesUsed annotations: - description: '{{ $value | humanizePercentage }} of conntrack entries are used' + description: '{{ $value | humanizePercentage }} of conntrack entries are used.' runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodehighnumberconntrackentriesused - summary: Number of conntrack are getting close to the limit + summary: Number of conntrack are getting close to the limit. expr: | (node_nf_conntrack_entries / node_nf_conntrack_entries_limit) > 0.75 labels: severity: warning + - alert: NodeTextFileCollectorScrapeError + annotations: + description: Node Exporter text file collector failed to scrape. + runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodetextfilecollectorscrapeerror + summary: Node Exporter text file collector failed to scrape. + expr: | + node_textfile_scrape_error{job="node-exporter"} == 1 + labels: + severity: warning - alert: NodeClockSkewDetected annotations: message: Clock on {{ $labels.instance }} is out of sync by more than 300s. @@ -896,20 +977,26 @@ spec: }}) is restarting {{ printf "%.2f" $value }} times / 5 minutes. runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepodcrashlooping expr: | - rate(kube_pod_container_status_restarts_total{job="kube-state-metrics"}[15m]) * 60 * 5 > 0 + rate(kube_pod_container_status_restarts_total{job="kube-state-metrics"}[5m]) * 60 * 5 > 0 for: 15m labels: - severity: critical + severity: warning - alert: KubePodNotReady annotations: message: Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in a non-ready state for longer than 15 minutes. runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepodnotready expr: | - sum by (namespace, pod) (max by(namespace, pod) (kube_pod_status_phase{job="kube-state-metrics", phase=~"Pending|Unknown"}) * on(namespace, pod) group_left(owner_kind) max by(namespace, pod, owner_kind) (kube_pod_owner{owner_kind!="Job"})) > 0 + sum by (namespace, pod) ( + max by(namespace, pod) ( + kube_pod_status_phase{job="kube-state-metrics", phase=~"Pending|Unknown"} + ) * on(namespace, pod) group_left(owner_kind) topk by(namespace, pod) ( + 1, max by(namespace, pod, owner_kind) (kube_pod_owner{owner_kind!="Job"}) + ) + ) > 0 for: 15m labels: - severity: critical + severity: warning - alert: KubeDeploymentGenerationMismatch annotations: message: Deployment generation for {{ $labels.namespace }}/{{ $labels.deployment @@ -922,7 +1009,7 @@ spec: kube_deployment_metadata_generation{job="kube-state-metrics"} for: 15m labels: - severity: critical + severity: warning - alert: KubeDeploymentReplicasMismatch annotations: message: Deployment {{ $labels.namespace }}/{{ $labels.deployment }} has not @@ -940,7 +1027,7 @@ spec: ) for: 15m labels: - severity: critical + severity: warning - alert: KubeStatefulSetReplicasMismatch annotations: message: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} has @@ -958,7 +1045,7 @@ spec: ) for: 15m labels: - severity: critical + severity: warning - alert: KubeStatefulSetGenerationMismatch annotations: message: StatefulSet generation for {{ $labels.namespace }}/{{ $labels.statefulset @@ -971,27 +1058,33 @@ spec: kube_statefulset_metadata_generation{job="kube-state-metrics"} for: 15m labels: - severity: critical + severity: warning - alert: KubeStatefulSetUpdateNotRolledOut annotations: message: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} update has not been rolled out. runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetupdatenotrolledout expr: | - max without (revision) ( - kube_statefulset_status_current_revision{job="kube-state-metrics"} - unless - kube_statefulset_status_update_revision{job="kube-state-metrics"} - ) - * ( - kube_statefulset_replicas{job="kube-state-metrics"} - != - kube_statefulset_status_replicas_updated{job="kube-state-metrics"} + max without (revision) ( + kube_statefulset_status_current_revision{job="kube-state-metrics"} + unless + kube_statefulset_status_update_revision{job="kube-state-metrics"} + ) + * + ( + kube_statefulset_replicas{job="kube-state-metrics"} + != + kube_statefulset_status_replicas_updated{job="kube-state-metrics"} + ) + ) and ( + changes(kube_statefulset_status_replicas_updated{job="kube-state-metrics"}[5m]) + == + 0 ) for: 15m labels: - severity: critical + severity: warning - alert: KubeDaemonSetRolloutStuck annotations: message: Only {{ $value | humanizePercentage }} of the desired Pods of DaemonSet @@ -1003,7 +1096,7 @@ spec: kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics"} < 1.00 for: 15m labels: - severity: critical + severity: warning - alert: KubeContainerWaiting annotations: message: Pod {{ $labels.namespace }}/{{ $labels.pod }} container {{ $labels.container}} @@ -1254,7 +1347,9 @@ spec: sum(apiserver_request:burnrate5m) > (14.40 * 0.01000) for: 2m labels: + long: 1h severity: critical + short: 5m - alert: KubeAPIErrorBudgetBurn annotations: message: The API server is burning too much error budget @@ -1265,7 +1360,9 @@ spec: sum(apiserver_request:burnrate30m) > (6.00 * 0.01000) for: 15m labels: + long: 6h severity: critical + short: 30m - alert: KubeAPIErrorBudgetBurn annotations: message: The API server is burning too much error budget @@ -1276,7 +1373,9 @@ spec: sum(apiserver_request:burnrate2h) > (3.00 * 0.01000) for: 1h labels: + long: 1d severity: warning + short: 2h - alert: KubeAPIErrorBudgetBurn annotations: message: The API server is burning too much error budget @@ -1287,7 +1386,9 @@ spec: sum(apiserver_request:burnrate6h) > (1.00 * 0.01000) for: 3h labels: + long: 3d severity: warning + short: 6h - name: kubernetes-system-apiserver rules: - alert: KubeAPILatencyHigh @@ -1296,6 +1397,10 @@ spec: {{ $labels.verb }} {{ $labels.resource }}. runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapilatencyhigh expr: | + cluster_quantile:apiserver_request_duration_seconds:histogram_quantile{job="apiserver",quantile="0.99"} + > + 1 + and on (verb,resource) ( cluster:apiserver_request_duration_seconds:mean5m{job="apiserver"} > @@ -1307,10 +1412,6 @@ spec: ) ) > on (verb) group_left() 1.2 * avg by (verb) (cluster:apiserver_request_duration_seconds:mean5m{job="apiserver"} >= 0) - and on (verb,resource) - cluster_quantile:apiserver_request_duration_seconds:histogram_quantile{job="apiserver",quantile="0.99"} - > - 1 for: 5m labels: severity: warning @@ -1391,8 +1492,7 @@ spec: message: '{{ $labels.node }} is unreachable and some workloads may be rescheduled.' runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubenodeunreachable expr: | - kube_node_spec_taint{job="kube-state-metrics",key="node.kubernetes.io/unreachable",effect="NoSchedule"} == 1 - for: 2m + (kube_node_spec_taint{job="kube-state-metrics",key="node.kubernetes.io/unreachable",effect="NoSchedule"} unless ignoring(key,value) kube_node_spec_taint{job="kube-state-metrics",key="ToBeDeletedByClusterAutoscaler"}) == 1 labels: severity: warning - alert: KubeletTooManyPods diff --git a/manifests/setup/prometheus-operator-0prometheusCustomResourceDefinition.yaml b/manifests/setup/prometheus-operator-0prometheusCustomResourceDefinition.yaml index d391b6b4e2f763d6edb1b7b1d9f198f71b38ec10..3c2126c834b4ec8d49c80ea197ebe65ef53af392 100644 --- a/manifests/setup/prometheus-operator-0prometheusCustomResourceDefinition.yaml +++ b/manifests/setup/prometheus-operator-0prometheusCustomResourceDefinition.yaml @@ -2177,6 +2177,15 @@ spec: of origin for each alert and metric that is user created. The label value will always be the namespace of the object that is being created. type: string + enforcedSampleLimit: + description: EnforcedSampleLimit defines global limit on number of + scraped samples that will be accepted. This overrides any SampleLimit + set per ServiceMonitor or/and PodMonitor. It is meant to be used + by admins to enforce the SampleLimit to keep overall number of samples/series + under the desired limit. Note that if SampleLimit is lower that + value will be taken instead. + format: int64 + type: integer evaluationInterval: description: Interval between consecutive evaluations. type: string @@ -3428,6 +3437,27 @@ spec: instance name. Defaults to the value of `prometheus`. External label will _not_ be added when value is set to empty string (`""`). type: string + prometheusRulesExcludedFromEnforce: + description: PrometheusRulesExcludedFromEnforce - list of prometheus + rules to be excluded from enforcing of adding namespace labels. + Works only if enforcedNamespaceLabel set to true. Make sure both + ruleNamespace and ruleName are set for each pair + items: + description: PrometheusRuleExcludeConfig enables users to configure + excluded PrometheusRule names and their namespaces to be ignored + while enforcing namespace label for alerts and metrics. + properties: + ruleName: + description: RuleNamespace - name of excluded rule + type: string + ruleNamespace: + description: RuleNamespace - namespace of excluded rule + type: string + required: + - ruleName + - ruleNamespace + type: object + type: array query: description: QuerySpec defines the query command line flags when starting Prometheus. @@ -4114,6 +4144,10 @@ spec: scrapeInterval: description: Interval between consecutive scrapes. type: string + scrapeTimeout: + description: Number of seconds to wait for target to respond before + erroring. + type: string secrets: description: Secrets is a list of Secrets in the same namespace as the Prometheus object, which shall be mounted into the Prometheus @@ -4762,6 +4796,12 @@ spec: logLevel: description: LogLevel for Thanos sidecar to be configured with. type: string + minTime: + description: MinTime for Thanos sidecar to be configured with. + Option can be a constant time in RFC3339 format or time duration + relative to current time, such as -1d or 2h45m. Valid duration + units are ms, s, m, h, d, w, y. + type: string objectStorageConfig: description: ObjectStorageConfig configures object storage in Thanos. diff --git a/manifests/setup/prometheus-operator-0thanosrulerCustomResourceDefinition.yaml b/manifests/setup/prometheus-operator-0thanosrulerCustomResourceDefinition.yaml index 3396c98885617a43fd3477dd1f1c67c4d537a32b..74db7b3a8e4206672cf367e21b494b836a47ee66 100644 --- a/manifests/setup/prometheus-operator-0thanosrulerCustomResourceDefinition.yaml +++ b/manifests/setup/prometheus-operator-0thanosrulerCustomResourceDefinition.yaml @@ -2998,6 +2998,27 @@ spec: priorityClassName: description: Priority class assigned to the Pods type: string + prometheusRulesExcludedFromEnforce: + description: PrometheusRulesExcludedFromEnforce - list of Prometheus + rules to be excluded from enforcing of adding namespace labels. + Works only if enforcedNamespaceLabel set to true. Make sure both + ruleNamespace and ruleName are set for each pair + items: + description: PrometheusRuleExcludeConfig enables users to configure + excluded PrometheusRule names and their namespaces to be ignored + while enforcing namespace label for alerts and metrics. + properties: + ruleName: + description: RuleNamespace - name of excluded rule + type: string + ruleNamespace: + description: RuleNamespace - namespace of excluded rule + type: string + required: + - ruleName + - ruleNamespace + type: object + type: array queryConfig: description: Define configuration for connecting to thanos query instances. If this is defined, the QueryEndpoints field will be ignored. Maps diff --git a/manifests/setup/prometheus-operator-clusterRole.yaml b/manifests/setup/prometheus-operator-clusterRole.yaml index 6afaa774935ff4ded6bf773ddd4cd83d9961c9d4..b3d490a797638d31def47bd34f7d653b7575315d 100644 --- a/manifests/setup/prometheus-operator-clusterRole.yaml +++ b/manifests/setup/prometheus-operator-clusterRole.yaml @@ -4,7 +4,7 @@ metadata: labels: app.kubernetes.io/component: controller app.kubernetes.io/name: prometheus-operator - app.kubernetes.io/version: v0.39.0 + app.kubernetes.io/version: v0.40.0 name: prometheus-operator rules: - apiGroups: diff --git a/manifests/setup/prometheus-operator-clusterRoleBinding.yaml b/manifests/setup/prometheus-operator-clusterRoleBinding.yaml index c493a746c20718ed5629c5033289d8f01cc1aeaf..5ac1066ff657e96d015e1e8e008627b72812cf3d 100644 --- a/manifests/setup/prometheus-operator-clusterRoleBinding.yaml +++ b/manifests/setup/prometheus-operator-clusterRoleBinding.yaml @@ -4,7 +4,7 @@ metadata: labels: app.kubernetes.io/component: controller app.kubernetes.io/name: prometheus-operator - app.kubernetes.io/version: v0.39.0 + app.kubernetes.io/version: v0.40.0 name: prometheus-operator roleRef: apiGroup: rbac.authorization.k8s.io diff --git a/manifests/setup/prometheus-operator-deployment.yaml b/manifests/setup/prometheus-operator-deployment.yaml index 1bfb05c4c56961f5f844cd60577ed53856882f58..c00043183a8667004a69d390e299e601b9e7e7a2 100644 --- a/manifests/setup/prometheus-operator-deployment.yaml +++ b/manifests/setup/prometheus-operator-deployment.yaml @@ -4,7 +4,7 @@ metadata: labels: app.kubernetes.io/component: controller app.kubernetes.io/name: prometheus-operator - app.kubernetes.io/version: v0.39.0 + app.kubernetes.io/version: v0.40.0 name: prometheus-operator namespace: monitoring spec: @@ -18,15 +18,15 @@ spec: labels: app.kubernetes.io/component: controller app.kubernetes.io/name: prometheus-operator - app.kubernetes.io/version: v0.39.0 + app.kubernetes.io/version: v0.40.0 spec: containers: - args: - --kubelet-service=kube-system/kubelet - --logtostderr=true - --config-reloader-image=jimmidyson/configmap-reload:v0.3.0 - - --prometheus-config-reloader=quay.io/coreos/prometheus-config-reloader:v0.39.0 - image: quay.io/coreos/prometheus-operator:v0.39.0 + - --prometheus-config-reloader=quay.io/coreos/prometheus-config-reloader:v0.40.0 + image: quay.io/coreos/prometheus-operator:v0.40.0 name: prometheus-operator ports: - containerPort: 8080 diff --git a/manifests/setup/prometheus-operator-service.yaml b/manifests/setup/prometheus-operator-service.yaml index c3c3debb2cfbf7aa4b123931fd2d5c59116cf0d7..3f8dd96e04332ed17f61331d1d27c87b0a96c345 100644 --- a/manifests/setup/prometheus-operator-service.yaml +++ b/manifests/setup/prometheus-operator-service.yaml @@ -4,7 +4,7 @@ metadata: labels: app.kubernetes.io/component: controller app.kubernetes.io/name: prometheus-operator - app.kubernetes.io/version: v0.39.0 + app.kubernetes.io/version: v0.40.0 name: prometheus-operator namespace: monitoring spec: diff --git a/manifests/setup/prometheus-operator-serviceAccount.yaml b/manifests/setup/prometheus-operator-serviceAccount.yaml index 39d14d95e3e285a320654ab69d25199767178b9a..c0bffc96cd258c3857773b0e6bc695966dfe9b71 100644 --- a/manifests/setup/prometheus-operator-serviceAccount.yaml +++ b/manifests/setup/prometheus-operator-serviceAccount.yaml @@ -4,6 +4,6 @@ metadata: labels: app.kubernetes.io/component: controller app.kubernetes.io/name: prometheus-operator - app.kubernetes.io/version: v0.39.0 + app.kubernetes.io/version: v0.40.0 name: prometheus-operator namespace: monitoring