From 630e75e6c07c89f69d0e2f60083bc3dca1a3f983 Mon Sep 17 00:00:00 2001 From: Prometheus Operator Bot <prom-op-bot@users.noreply.github.com> Date: Mon, 10 Jul 2023 07:41:19 +0000 Subject: [PATCH] [bot] [main] Automated version update --- jsonnetfile.lock.json | 28 +- manifests/grafana-dashboardDefinitions.yaml | 651 ++++++------------ manifests/nodeExporter-prometheusRule.yaml | 105 ++- ...managerConfigCustomResourceDefinition.yaml | 289 ++++++++ ...0scrapeconfigCustomResourceDefinition.yaml | 230 +++++++ 5 files changed, 837 insertions(+), 466 deletions(-) diff --git a/jsonnetfile.lock.json b/jsonnetfile.lock.json index 151f6429..6763ce5f 100644 --- a/jsonnetfile.lock.json +++ b/jsonnetfile.lock.json @@ -18,7 +18,7 @@ "subdir": "contrib/mixin" } }, - "version": "739239f131ddef625902a93746883902629d542d", + "version": "82f6cb4635fe7db3e74b6e332b7078cdfed7b9c1", "sum": "GdePvMDfLQcVhwzk/Ephi/jC27ywGObLB5t0eC0lXd4=" }, { @@ -58,8 +58,8 @@ "subdir": "grafana-builder" } }, - "version": "48da1834254f19d592a33ccfee18159af96be6f3", - "sum": "wp/L/9smcsHIiy24DH5WWMv2fcSckN2Lw/m7qDszaWU=" + "version": "f33dff93db677a32303630c3e0910cf6d46a92cc", + "sum": "RJjFQa1n8CDbB6m1fBQQzPye7jhOhGTUbma27Gil81I=" }, { "source": { @@ -78,7 +78,7 @@ "subdir": "jsonnet/kube-state-metrics" } }, - "version": "3a5c76d8b8d489b8694b2c4cd30256616ef06e33", + "version": "bb6e9f42f8bac32ed6e50b6932cb2ab7fc9307ef", "sum": "+dOzAK+fwsFf97uZpjcjTcEJEC1H8hh/j8f5uIQK/5g=" }, { @@ -88,7 +88,7 @@ "subdir": "jsonnet/kube-state-metrics-mixin" } }, - "version": "3a5c76d8b8d489b8694b2c4cd30256616ef06e33", + "version": "bb6e9f42f8bac32ed6e50b6932cb2ab7fc9307ef", "sum": "qclI7LwucTjBef3PkGBkKxF0mfZPbHnn4rlNWKGtR4c=" }, { @@ -98,7 +98,7 @@ "subdir": "jsonnet/mixin" } }, - "version": "eb5594b6d79b56ac4334ef7ebbe8f9e6e2118822", + "version": "5457c2546cf16e6ab73a1646d09fd990febc7583", "sum": "n3flMIzlADeyygb0uipZ4KPp2uNSjdtkrwgHjTC7Ca4=", "name": "prometheus-operator-mixin" }, @@ -109,8 +109,8 @@ "subdir": "jsonnet/prometheus-operator" } }, - "version": "eb5594b6d79b56ac4334ef7ebbe8f9e6e2118822", - "sum": "7WhWigpP6FgMuWd54jzWP/XV1jGhWHBE4Jew8Dx8FmI=" + "version": "5457c2546cf16e6ab73a1646d09fd990febc7583", + "sum": "9aENv3HT5qI9RuHh5tGpmQHx2r8YP4Vog5U+ggLg5f0=" }, { "source": { @@ -119,7 +119,7 @@ "subdir": "doc/alertmanager-mixin" } }, - "version": "a85979e19d24490322d5ce342301d17b0f13dcc5", + "version": "487db1383b8cc5c2867c77f110431605bb8ce247", "sum": "PsK+V7oETCPKu2gLoPfqY0wwPKH9TzhNj6o2xezjjXc=", "name": "alertmanager" }, @@ -130,8 +130,8 @@ "subdir": "docs/node-mixin" } }, - "version": "a11de2ede5b92809fa054066901572d26b8deefc", - "sum": "aFUI56y6Y8EpniS4cfYqrSaHFnxeomIw4S4+Sz8yPtQ=" + "version": "f4344579d51b7058d07b6697768b6aef4d0fee50", + "sum": "By6n6U10hYDogUsyhsaKZehbhzxBZZobJloiKyKadgM=" }, { "source": { @@ -140,7 +140,7 @@ "subdir": "documentation/prometheus-mixin" } }, - "version": "446dff01ea1368ae631d9e96611255b6c98b7d82", + "version": "26c354de0b46d14f922f5d37c24513596dff56b0", "sum": "WkRzFpnseUc/Ev8I2QBLxAC4vkPwLHeOGUw5QemCsMU=", "name": "prometheus" }, @@ -151,7 +151,7 @@ "subdir": "config/crd/bases" } }, - "version": "0cc188220949f081ce85f97894ca90c0b24861df", + "version": "afdc0cf501eb0ce9a4ca3551f50538ddd669c391", "sum": "MK8+uumteRncS0hkyjocvU2vdtlGbfBRPcU0/mJnU2M=" }, { @@ -161,7 +161,7 @@ "subdir": "mixin" } }, - "version": "5d695e9226e4360c450a2dbf3076f79835829dd1", + "version": "ca308b0d51a07987671804b4e06487f3b4173b51", "sum": "WhheqsiX0maUXByZFsb9xhCEsGXK2955bPmPPf1x+Cs=", "name": "thanos-mixin" }, diff --git a/manifests/grafana-dashboardDefinitions.yaml b/manifests/grafana-dashboardDefinitions.yaml index 3cd8cda6..4868f5a0 100644 --- a/manifests/grafana-dashboardDefinitions.yaml +++ b/manifests/grafana-dashboardDefinitions.yaml @@ -6640,8 +6640,7 @@ items: "format": "time_series", "intervalFactor": 2, "legendFormat": "{{namespace}}", - "legendLink": null, - "step": 10 + "legendLink": null } ], "thresholds": [ @@ -6917,8 +6916,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "A", - "step": 10 + "refId": "A" }, { "expr": "count(avg(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\"}) by (workload, namespace)) by (namespace)", @@ -6926,8 +6924,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "B", - "step": 10 + "refId": "B" }, { "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\"}) by (namespace)", @@ -6935,8 +6932,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "C", - "step": 10 + "refId": "C" }, { "expr": "sum(namespace_cpu:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)", @@ -6944,8 +6940,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "D", - "step": 10 + "refId": "D" }, { "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\"}) by (namespace) / sum(namespace_cpu:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)", @@ -6953,8 +6948,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "E", - "step": 10 + "refId": "E" }, { "expr": "sum(namespace_cpu:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)", @@ -6962,8 +6956,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "F", - "step": 10 + "refId": "F" }, { "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\"}) by (namespace) / sum(namespace_cpu:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)", @@ -6971,8 +6964,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "G", - "step": 10 + "refId": "G" } ], "thresholds": [ @@ -7073,8 +7065,7 @@ items: "format": "time_series", "intervalFactor": 2, "legendFormat": "{{namespace}}", - "legendLink": null, - "step": 10 + "legendLink": null } ], "thresholds": [ @@ -7350,8 +7341,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "A", - "step": 10 + "refId": "A" }, { "expr": "count(avg(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\"}) by (workload, namespace)) by (namespace)", @@ -7359,8 +7349,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "B", - "step": 10 + "refId": "B" }, { "expr": "sum(container_memory_rss{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", container!=\"\"}) by (namespace)", @@ -7368,8 +7357,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "C", - "step": 10 + "refId": "C" }, { "expr": "sum(namespace_memory:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)", @@ -7377,8 +7365,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "D", - "step": 10 + "refId": "D" }, { "expr": "sum(container_memory_rss{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(namespace_memory:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)", @@ -7386,8 +7373,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "E", - "step": 10 + "refId": "E" }, { "expr": "sum(namespace_memory:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)", @@ -7395,8 +7381,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "F", - "step": 10 + "refId": "F" }, { "expr": "sum(container_memory_rss{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(namespace_memory:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)", @@ -7404,8 +7389,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "G", - "step": 10 + "refId": "G" } ], "thresholds": [ @@ -7663,8 +7647,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "A", - "step": 10 + "refId": "A" }, { "expr": "sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", @@ -7672,8 +7655,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "B", - "step": 10 + "refId": "B" }, { "expr": "sum(irate(container_network_receive_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", @@ -7681,8 +7663,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "C", - "step": 10 + "refId": "C" }, { "expr": "sum(irate(container_network_transmit_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", @@ -7690,8 +7671,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "D", - "step": 10 + "refId": "D" }, { "expr": "sum(irate(container_network_receive_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", @@ -7699,8 +7679,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "E", - "step": 10 + "refId": "E" }, { "expr": "sum(irate(container_network_transmit_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", @@ -7708,8 +7687,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "F", - "step": 10 + "refId": "F" } ], "thresholds": [ @@ -7810,8 +7788,7 @@ items: "format": "time_series", "intervalFactor": 2, "legendFormat": "{{namespace}}", - "legendLink": null, - "step": 10 + "legendLink": null } ], "thresholds": [ @@ -7899,8 +7876,7 @@ items: "format": "time_series", "intervalFactor": 2, "legendFormat": "{{namespace}}", - "legendLink": null, - "step": 10 + "legendLink": null } ], "thresholds": [ @@ -8000,8 +7976,7 @@ items: "format": "time_series", "intervalFactor": 2, "legendFormat": "{{namespace}}", - "legendLink": null, - "step": 10 + "legendLink": null } ], "thresholds": [ @@ -8089,8 +8064,7 @@ items: "format": "time_series", "intervalFactor": 2, "legendFormat": "{{namespace}}", - "legendLink": null, - "step": 10 + "legendLink": null } ], "thresholds": [ @@ -8190,8 +8164,7 @@ items: "format": "time_series", "intervalFactor": 2, "legendFormat": "{{namespace}}", - "legendLink": null, - "step": 10 + "legendLink": null } ], "thresholds": [ @@ -8279,8 +8252,7 @@ items: "format": "time_series", "intervalFactor": 2, "legendFormat": "{{namespace}}", - "legendLink": null, - "step": 10 + "legendLink": null } ], "thresholds": [ @@ -8380,8 +8352,7 @@ items: "format": "time_series", "intervalFactor": 2, "legendFormat": "{{namespace}}", - "legendLink": null, - "step": 10 + "legendLink": null } ], "thresholds": [ @@ -8469,8 +8440,7 @@ items: "format": "time_series", "intervalFactor": 2, "legendFormat": "{{namespace}}", - "legendLink": null, - "step": 10 + "legendLink": null } ], "thresholds": [ @@ -8571,8 +8541,7 @@ items: "format": "time_series", "intervalFactor": 2, "legendFormat": "{{namespace}}", - "legendLink": null, - "step": 10 + "legendLink": null } ], "thresholds": [ @@ -8660,8 +8629,7 @@ items: "format": "time_series", "intervalFactor": 2, "legendFormat": "{{namespace}}", - "legendLink": null, - "step": 10 + "legendLink": null } ], "thresholds": [ @@ -8922,8 +8890,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "A", - "step": 10 + "refId": "A" }, { "expr": "sum by(namespace) (rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))", @@ -8931,8 +8898,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "B", - "step": 10 + "refId": "B" }, { "expr": "sum by(namespace) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))", @@ -8940,8 +8906,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "C", - "step": 10 + "refId": "C" }, { "expr": "sum by(namespace) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))", @@ -8949,8 +8914,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "D", - "step": 10 + "refId": "D" }, { "expr": "sum by(namespace) (rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))", @@ -8958,8 +8922,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "E", - "step": 10 + "refId": "E" }, { "expr": "sum by(namespace) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))", @@ -8967,8 +8930,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "F", - "step": 10 + "refId": "F" } ], "thresholds": [ @@ -9717,8 +9679,7 @@ items: "format": "time_series", "intervalFactor": 2, "legendFormat": "{{cluster}}", - "legendLink": null, - "step": 10 + "legendLink": null } ], "thresholds": [ @@ -9956,8 +9917,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "A", - "step": 10 + "refId": "A" }, { "expr": "sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\", resource=\"cpu\"}) by (cluster)", @@ -9965,8 +9925,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "B", - "step": 10 + "refId": "B" }, { "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate) by (cluster) / sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\", resource=\"cpu\"}) by (cluster)", @@ -9974,8 +9933,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "C", - "step": 10 + "refId": "C" }, { "expr": "sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", resource=\"cpu\"}) by (cluster)", @@ -9983,8 +9941,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "D", - "step": 10 + "refId": "D" }, { "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate) by (cluster) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", resource=\"cpu\"}) by (cluster)", @@ -9992,8 +9949,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "E", - "step": 10 + "refId": "E" } ], "thresholds": [ @@ -10094,8 +10050,7 @@ items: "format": "time_series", "intervalFactor": 2, "legendFormat": "{{cluster}}", - "legendLink": null, - "step": 10 + "legendLink": null } ], "thresholds": [ @@ -10333,8 +10288,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "A", - "step": 10 + "refId": "A" }, { "expr": "sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\", resource=\"memory\"}) by (cluster)", @@ -10342,8 +10296,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "B", - "step": 10 + "refId": "B" }, { "expr": "sum(container_memory_rss{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\"}) by (cluster) / sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\", resource=\"memory\"}) by (cluster)", @@ -10351,8 +10304,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "C", - "step": 10 + "refId": "C" }, { "expr": "sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", resource=\"memory\"}) by (cluster)", @@ -10360,8 +10312,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "D", - "step": 10 + "refId": "D" }, { "expr": "sum(container_memory_rss{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\"}) by (cluster) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", resource=\"memory\"}) by (cluster)", @@ -10369,8 +10320,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "E", - "step": 10 + "refId": "E" } ], "thresholds": [ @@ -10939,24 +10889,21 @@ items: "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", - "legendLink": null, - "step": 10 + "legendLink": null }, { "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"requests.cpu\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "quota - requests", - "legendLink": null, - "step": 10 + "legendLink": null }, { "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"limits.cpu\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "quota - limits", - "legendLink": null, - "step": 10 + "legendLink": null } ], "thresholds": [ @@ -11194,8 +11141,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "A", - "step": 10 + "refId": "A" }, { "expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", @@ -11203,8 +11149,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "B", - "step": 10 + "refId": "B" }, { "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", @@ -11212,8 +11157,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "C", - "step": 10 + "refId": "C" }, { "expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", @@ -11221,8 +11165,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "D", - "step": 10 + "refId": "D" }, { "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", @@ -11230,8 +11173,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "E", - "step": 10 + "refId": "E" } ], "thresholds": [ @@ -11353,24 +11295,21 @@ items: "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", - "legendLink": null, - "step": 10 + "legendLink": null }, { "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"requests.memory\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "quota - requests", - "legendLink": null, - "step": 10 + "legendLink": null }, { "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"limits.memory\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "quota - limits", - "legendLink": null, - "step": 10 + "legendLink": null } ], "thresholds": [ @@ -11665,8 +11604,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "A", - "step": 10 + "refId": "A" }, { "expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", @@ -11674,8 +11612,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "B", - "step": 10 + "refId": "B" }, { "expr": "sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", @@ -11683,8 +11620,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "C", - "step": 10 + "refId": "C" }, { "expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", @@ -11692,8 +11628,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "D", - "step": 10 + "refId": "D" }, { "expr": "sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", @@ -11701,8 +11636,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "E", - "step": 10 + "refId": "E" }, { "expr": "sum(container_memory_rss{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)", @@ -11710,8 +11644,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "F", - "step": 10 + "refId": "F" }, { "expr": "sum(container_memory_cache{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)", @@ -11719,8 +11652,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "G", - "step": 10 + "refId": "G" }, { "expr": "sum(container_memory_swap{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)", @@ -11728,8 +11660,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "H", - "step": 10 + "refId": "H" } ], "thresholds": [ @@ -11987,8 +11918,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "A", - "step": 10 + "refId": "A" }, { "expr": "sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", @@ -11996,8 +11926,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "B", - "step": 10 + "refId": "B" }, { "expr": "sum(irate(container_network_receive_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", @@ -12005,8 +11934,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "C", - "step": 10 + "refId": "C" }, { "expr": "sum(irate(container_network_transmit_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", @@ -12014,8 +11942,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "D", - "step": 10 + "refId": "D" }, { "expr": "sum(irate(container_network_receive_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", @@ -12023,8 +11950,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "E", - "step": 10 + "refId": "E" }, { "expr": "sum(irate(container_network_transmit_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", @@ -12032,8 +11958,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "F", - "step": 10 + "refId": "F" } ], "thresholds": [ @@ -12134,8 +12059,7 @@ items: "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", - "legendLink": null, - "step": 10 + "legendLink": null } ], "thresholds": [ @@ -12223,8 +12147,7 @@ items: "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", - "legendLink": null, - "step": 10 + "legendLink": null } ], "thresholds": [ @@ -12324,8 +12247,7 @@ items: "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", - "legendLink": null, - "step": 10 + "legendLink": null } ], "thresholds": [ @@ -12413,8 +12335,7 @@ items: "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", - "legendLink": null, - "step": 10 + "legendLink": null } ], "thresholds": [ @@ -12514,8 +12435,7 @@ items: "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", - "legendLink": null, - "step": 10 + "legendLink": null } ], "thresholds": [ @@ -12603,8 +12523,7 @@ items: "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", - "legendLink": null, - "step": 10 + "legendLink": null } ], "thresholds": [ @@ -12705,8 +12624,7 @@ items: "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", - "legendLink": null, - "step": 10 + "legendLink": null } ], "thresholds": [ @@ -12794,8 +12712,7 @@ items: "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", - "legendLink": null, - "step": 10 + "legendLink": null } ], "thresholds": [ @@ -13056,8 +12973,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "A", - "step": 10 + "refId": "A" }, { "expr": "sum by(pod) (rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", @@ -13065,8 +12981,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "B", - "step": 10 + "refId": "B" }, { "expr": "sum by(pod) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", @@ -13074,8 +12989,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "C", - "step": 10 + "refId": "C" }, { "expr": "sum by(pod) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", @@ -13083,8 +12997,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "D", - "step": 10 + "refId": "D" }, { "expr": "sum by(pod) (rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", @@ -13092,8 +13005,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "E", - "step": 10 + "refId": "E" }, { "expr": "sum by(pod) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", @@ -13101,8 +13013,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "F", - "step": 10 + "refId": "F" } ], "thresholds": [ @@ -13354,16 +13265,14 @@ items: "format": "time_series", "intervalFactor": 2, "legendFormat": "max capacity", - "legendLink": null, - "step": 10 + "legendLink": null }, { "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", - "legendLink": null, - "step": 10 + "legendLink": null } ], "thresholds": [ @@ -13601,8 +13510,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "A", - "step": 10 + "refId": "A" }, { "expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", @@ -13610,8 +13518,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "B", - "step": 10 + "refId": "B" }, { "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", node=~\"$node\"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", @@ -13619,8 +13526,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "C", - "step": 10 + "refId": "C" }, { "expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", @@ -13628,8 +13534,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "D", - "step": 10 + "refId": "D" }, { "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", node=~\"$node\"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", @@ -13637,8 +13542,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "E", - "step": 10 + "refId": "E" } ], "thresholds": [ @@ -13749,16 +13653,14 @@ items: "format": "time_series", "intervalFactor": 2, "legendFormat": "max capacity", - "legendLink": null, - "step": 10 + "legendLink": null }, { "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\", container!=\"\"}) by (pod)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", - "legendLink": null, - "step": 10 + "legendLink": null } ], "thresholds": [ @@ -14053,8 +13955,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "A", - "step": 10 + "refId": "A" }, { "expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", @@ -14062,8 +13963,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "B", - "step": 10 + "refId": "B" }, { "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", @@ -14071,8 +13971,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "C", - "step": 10 + "refId": "C" }, { "expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", @@ -14080,8 +13979,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "D", - "step": 10 + "refId": "D" }, { "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", @@ -14089,8 +13987,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "E", - "step": 10 + "refId": "E" }, { "expr": "sum(node_namespace_pod_container:container_memory_rss{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod)", @@ -14098,8 +13995,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "F", - "step": 10 + "refId": "F" }, { "expr": "sum(node_namespace_pod_container:container_memory_cache{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod)", @@ -14107,8 +14003,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "G", - "step": 10 + "refId": "G" }, { "expr": "sum(node_namespace_pod_container:container_memory_swap{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod)", @@ -14116,8 +14011,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "H", - "step": 10 + "refId": "H" } ], "thresholds": [ @@ -14376,24 +14270,21 @@ items: "format": "time_series", "intervalFactor": 2, "legendFormat": "{{container}}", - "legendLink": null, - "step": 10 + "legendLink": null }, { "expr": "sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}\n)\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "requests", - "legendLink": null, - "step": 10 + "legendLink": null }, { "expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}\n)\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "limits", - "legendLink": null, - "step": 10 + "legendLink": null } ], "thresholds": [ @@ -14493,8 +14384,7 @@ items: "format": "time_series", "intervalFactor": 2, "legendFormat": "{{container}}", - "legendLink": null, - "step": 10 + "legendLink": null } ], "thresholds": [ @@ -14739,8 +14629,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "A", - "step": 10 + "refId": "A" }, { "expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", @@ -14748,8 +14637,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "B", - "step": 10 + "refId": "B" }, { "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", @@ -14757,8 +14645,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "C", - "step": 10 + "refId": "C" }, { "expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", @@ -14766,8 +14653,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "D", - "step": 10 + "refId": "D" }, { "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", @@ -14775,8 +14661,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "E", - "step": 10 + "refId": "E" } ], "thresholds": [ @@ -14896,24 +14781,21 @@ items: "format": "time_series", "intervalFactor": 2, "legendFormat": "{{container}}", - "legendLink": null, - "step": 10 + "legendLink": null }, { "expr": "sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}\n)\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "requests", - "legendLink": null, - "step": 10 + "legendLink": null }, { "expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}\n)\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "limits", - "legendLink": null, - "step": 10 + "legendLink": null } ], "thresholds": [ @@ -15208,8 +15090,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "A", - "step": 10 + "refId": "A" }, { "expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", @@ -15217,8 +15098,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "B", - "step": 10 + "refId": "B" }, { "expr": "sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", image!=\"\"}) by (container) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", @@ -15226,8 +15106,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "C", - "step": 10 + "refId": "C" }, { "expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", @@ -15235,8 +15114,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "D", - "step": 10 + "refId": "D" }, { "expr": "sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", @@ -15244,8 +15122,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "E", - "step": 10 + "refId": "E" }, { "expr": "sum(container_memory_rss{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)", @@ -15253,8 +15130,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "F", - "step": 10 + "refId": "F" }, { "expr": "sum(container_memory_cache{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)", @@ -15262,8 +15138,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "G", - "step": 10 + "refId": "G" }, { "expr": "sum(container_memory_swap{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)", @@ -15271,8 +15146,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "H", - "step": 10 + "refId": "H" } ], "thresholds": [ @@ -15373,8 +15247,7 @@ items: "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", - "legendLink": null, - "step": 10 + "legendLink": null } ], "thresholds": [ @@ -15462,8 +15335,7 @@ items: "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", - "legendLink": null, - "step": 10 + "legendLink": null } ], "thresholds": [ @@ -15563,8 +15435,7 @@ items: "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", - "legendLink": null, - "step": 10 + "legendLink": null } ], "thresholds": [ @@ -15652,8 +15523,7 @@ items: "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", - "legendLink": null, - "step": 10 + "legendLink": null } ], "thresholds": [ @@ -15753,8 +15623,7 @@ items: "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", - "legendLink": null, - "step": 10 + "legendLink": null } ], "thresholds": [ @@ -15842,8 +15711,7 @@ items: "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", - "legendLink": null, - "step": 10 + "legendLink": null } ], "thresholds": [ @@ -15944,16 +15812,14 @@ items: "format": "time_series", "intervalFactor": 2, "legendFormat": "Reads", - "legendLink": null, - "step": 10 + "legendLink": null }, { "expr": "ceil(sum by(pod) (rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])))", "format": "time_series", "intervalFactor": 2, "legendFormat": "Writes", - "legendLink": null, - "step": 10 + "legendLink": null } ], "thresholds": [ @@ -16041,16 +15907,14 @@ items: "format": "time_series", "intervalFactor": 2, "legendFormat": "Reads", - "legendLink": null, - "step": 10 + "legendLink": null }, { "expr": "sum by(pod) (rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "Writes", - "legendLink": null, - "step": 10 + "legendLink": null } ], "thresholds": [ @@ -16151,8 +16015,7 @@ items: "format": "time_series", "intervalFactor": 2, "legendFormat": "{{container}}", - "legendLink": null, - "step": 10 + "legendLink": null } ], "thresholds": [ @@ -16240,8 +16103,7 @@ items: "format": "time_series", "intervalFactor": 2, "legendFormat": "{{container}}", - "legendLink": null, - "step": 10 + "legendLink": null } ], "thresholds": [ @@ -16502,8 +16364,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "A", - "step": 10 + "refId": "A" }, { "expr": "sum by(container) (rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\",device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))", @@ -16511,8 +16372,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "B", - "step": 10 + "refId": "B" }, { "expr": "sum by(container) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))", @@ -16520,8 +16380,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "C", - "step": 10 + "refId": "C" }, { "expr": "sum by(container) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))", @@ -16529,8 +16388,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "D", - "step": 10 + "refId": "D" }, { "expr": "sum by(container) (rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))", @@ -16538,8 +16396,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "E", - "step": 10 + "refId": "E" }, { "expr": "sum by(container) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))", @@ -16547,8 +16404,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "F", - "step": 10 + "refId": "F" } ], "thresholds": [ @@ -16817,8 +16673,7 @@ items: "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", - "legendLink": null, - "step": 10 + "legendLink": null } ], "thresholds": [ @@ -17056,8 +16911,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "A", - "step": 10 + "refId": "A" }, { "expr": "sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", @@ -17065,8 +16919,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "B", - "step": 10 + "refId": "B" }, { "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", @@ -17074,8 +16927,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "C", - "step": 10 + "refId": "C" }, { "expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", @@ -17083,8 +16935,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "D", - "step": 10 + "refId": "D" }, { "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", @@ -17092,8 +16943,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "E", - "step": 10 + "refId": "E" } ], "thresholds": [ @@ -17194,8 +17044,7 @@ items: "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", - "legendLink": null, - "step": 10 + "legendLink": null } ], "thresholds": [ @@ -17433,8 +17282,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "A", - "step": 10 + "refId": "A" }, { "expr": "sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", @@ -17442,8 +17290,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "B", - "step": 10 + "refId": "B" }, { "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", @@ -17451,8 +17298,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "C", - "step": 10 + "refId": "C" }, { "expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", @@ -17460,8 +17306,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "D", - "step": 10 + "refId": "D" }, { "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", @@ -17469,8 +17314,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "E", - "step": 10 + "refId": "E" } ], "thresholds": [ @@ -17728,8 +17572,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "A", - "step": 10 + "refId": "A" }, { "expr": "(sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", @@ -17737,8 +17580,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "B", - "step": 10 + "refId": "B" }, { "expr": "(sum(irate(container_network_receive_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", @@ -17746,8 +17588,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "C", - "step": 10 + "refId": "C" }, { "expr": "(sum(irate(container_network_transmit_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", @@ -17755,8 +17596,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "D", - "step": 10 + "refId": "D" }, { "expr": "(sum(irate(container_network_receive_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", @@ -17764,8 +17604,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "E", - "step": 10 + "refId": "E" }, { "expr": "(sum(irate(container_network_transmit_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", @@ -17773,8 +17612,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "F", - "step": 10 + "refId": "F" } ], "thresholds": [ @@ -17875,8 +17713,7 @@ items: "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", - "legendLink": null, - "step": 10 + "legendLink": null } ], "thresholds": [ @@ -17964,8 +17801,7 @@ items: "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", - "legendLink": null, - "step": 10 + "legendLink": null } ], "thresholds": [ @@ -18065,8 +17901,7 @@ items: "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", - "legendLink": null, - "step": 10 + "legendLink": null } ], "thresholds": [ @@ -18154,8 +17989,7 @@ items: "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", - "legendLink": null, - "step": 10 + "legendLink": null } ], "thresholds": [ @@ -18255,8 +18089,7 @@ items: "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", - "legendLink": null, - "step": 10 + "legendLink": null } ], "thresholds": [ @@ -18344,8 +18177,7 @@ items: "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", - "legendLink": null, - "step": 10 + "legendLink": null } ], "thresholds": [ @@ -18445,8 +18277,7 @@ items: "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", - "legendLink": null, - "step": 10 + "legendLink": null } ], "thresholds": [ @@ -18534,8 +18365,7 @@ items: "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", - "legendLink": null, - "step": 10 + "legendLink": null } ], "thresholds": [ @@ -18851,24 +18681,21 @@ items: "format": "time_series", "intervalFactor": 2, "legendFormat": "{{workload}} - {{workload_type}}", - "legendLink": null, - "step": 10 + "legendLink": null }, { "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"requests.cpu\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "quota - requests", - "legendLink": null, - "step": 10 + "legendLink": null }, { "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"limits.cpu\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "quota - limits", - "legendLink": null, - "step": 10 + "legendLink": null } ], "thresholds": [ @@ -19144,8 +18971,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "A", - "step": 10 + "refId": "A" }, { "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", @@ -19153,8 +18979,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "B", - "step": 10 + "refId": "B" }, { "expr": "sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", @@ -19162,8 +18987,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "C", - "step": 10 + "refId": "C" }, { "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", @@ -19171,8 +18995,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "D", - "step": 10 + "refId": "D" }, { "expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", @@ -19180,8 +19003,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "E", - "step": 10 + "refId": "E" }, { "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", @@ -19189,8 +19011,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "F", - "step": 10 + "refId": "F" } ], "thresholds": [ @@ -19312,24 +19133,21 @@ items: "format": "time_series", "intervalFactor": 2, "legendFormat": "{{workload}} - {{workload_type}}", - "legendLink": null, - "step": 10 + "legendLink": null }, { "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"requests.memory\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "quota - requests", - "legendLink": null, - "step": 10 + "legendLink": null }, { "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"limits.memory\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "quota - limits", - "legendLink": null, - "step": 10 + "legendLink": null } ], "thresholds": [ @@ -19605,8 +19423,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "A", - "step": 10 + "refId": "A" }, { "expr": "sum(\n container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", @@ -19614,8 +19431,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "B", - "step": 10 + "refId": "B" }, { "expr": "sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", @@ -19623,8 +19439,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "C", - "step": 10 + "refId": "C" }, { "expr": "sum(\n container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", @@ -19632,8 +19447,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "D", - "step": 10 + "refId": "D" }, { "expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", @@ -19641,8 +19455,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "E", - "step": 10 + "refId": "E" }, { "expr": "sum(\n container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", @@ -19650,8 +19463,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "F", - "step": 10 + "refId": "F" } ], "thresholds": [ @@ -19928,8 +19740,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "A", - "step": 10 + "refId": "A" }, { "expr": "(sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n", @@ -19937,8 +19748,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "B", - "step": 10 + "refId": "B" }, { "expr": "(sum(irate(container_network_receive_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n", @@ -19946,8 +19756,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "C", - "step": 10 + "refId": "C" }, { "expr": "(sum(irate(container_network_transmit_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n", @@ -19955,8 +19764,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "D", - "step": 10 + "refId": "D" }, { "expr": "(sum(irate(container_network_receive_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n", @@ -19964,8 +19772,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "E", - "step": 10 + "refId": "E" }, { "expr": "(sum(irate(container_network_transmit_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n", @@ -19973,8 +19780,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "F", - "step": 10 + "refId": "F" } ], "thresholds": [ @@ -20075,8 +19881,7 @@ items: "format": "time_series", "intervalFactor": 2, "legendFormat": "{{workload}}", - "legendLink": null, - "step": 10 + "legendLink": null } ], "thresholds": [ @@ -20164,8 +19969,7 @@ items: "format": "time_series", "intervalFactor": 2, "legendFormat": "{{workload}}", - "legendLink": null, - "step": 10 + "legendLink": null } ], "thresholds": [ @@ -20265,8 +20069,7 @@ items: "format": "time_series", "intervalFactor": 2, "legendFormat": "{{workload}}", - "legendLink": null, - "step": 10 + "legendLink": null } ], "thresholds": [ @@ -20354,8 +20157,7 @@ items: "format": "time_series", "intervalFactor": 2, "legendFormat": "{{workload}}", - "legendLink": null, - "step": 10 + "legendLink": null } ], "thresholds": [ @@ -20455,8 +20257,7 @@ items: "format": "time_series", "intervalFactor": 2, "legendFormat": "{{workload}}", - "legendLink": null, - "step": 10 + "legendLink": null } ], "thresholds": [ @@ -20544,8 +20345,7 @@ items: "format": "time_series", "intervalFactor": 2, "legendFormat": "{{workload}}", - "legendLink": null, - "step": 10 + "legendLink": null } ], "thresholds": [ @@ -20645,8 +20445,7 @@ items: "format": "time_series", "intervalFactor": 2, "legendFormat": "{{workload}}", - "legendLink": null, - "step": 10 + "legendLink": null } ], "thresholds": [ @@ -20734,8 +20533,7 @@ items: "format": "time_series", "intervalFactor": 2, "legendFormat": "{{workload}}", - "legendLink": null, - "step": 10 + "legendLink": null } ], "thresholds": [ @@ -34244,8 +34042,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "A", - "step": 10 + "refId": "A" }, { "expr": "max by (job, instance) (time() - process_start_time_seconds{job=~\"$job\", instance=~\"$instance\"})", @@ -34253,8 +34050,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", - "refId": "B", - "step": 10 + "refId": "B" } ], "thresholds": [ @@ -34352,8 +34148,7 @@ items: "format": "time_series", "intervalFactor": 2, "legendFormat": "{{scrape_job}}", - "legendLink": null, - "step": 10 + "legendLink": null } ], "thresholds": [ @@ -34438,8 +34233,7 @@ items: "format": "time_series", "intervalFactor": 2, "legendFormat": "Targets", - "legendLink": null, - "step": 10 + "legendLink": null } ], "thresholds": [ @@ -34536,8 +34330,7 @@ items: "format": "time_series", "intervalFactor": 2, "legendFormat": "{{interval}} configured", - "legendLink": null, - "step": 10 + "legendLink": null } ], "thresholds": [ @@ -34622,40 +34415,35 @@ items: "format": "time_series", "intervalFactor": 2, "legendFormat": "exceeded body size limit: {{job}}", - "legendLink": null, - "step": 10 + "legendLink": null }, { "expr": "sum by (job) (rate(prometheus_target_scrapes_exceeded_sample_limit_total[1m]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "exceeded sample limit: {{job}}", - "legendLink": null, - "step": 10 + "legendLink": null }, { "expr": "sum by (job) (rate(prometheus_target_scrapes_sample_duplicate_timestamp_total[1m]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "duplicate timestamp: {{job}}", - "legendLink": null, - "step": 10 + "legendLink": null }, { "expr": "sum by (job) (rate(prometheus_target_scrapes_sample_out_of_bounds_total[1m]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "out of bounds: {{job}}", - "legendLink": null, - "step": 10 + "legendLink": null }, { "expr": "sum by (job) (rate(prometheus_target_scrapes_sample_out_of_order_total[1m]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "out of order: {{job}}", - "legendLink": null, - "step": 10 + "legendLink": null } ], "thresholds": [ @@ -34740,8 +34528,7 @@ items: "format": "time_series", "intervalFactor": 2, "legendFormat": "{{job}} {{instance}}", - "legendLink": null, - "step": 10 + "legendLink": null } ], "thresholds": [ @@ -34838,8 +34625,7 @@ items: "format": "time_series", "intervalFactor": 2, "legendFormat": "{{job}} {{instance}} head series", - "legendLink": null, - "step": 10 + "legendLink": null } ], "thresholds": [ @@ -34924,8 +34710,7 @@ items: "format": "time_series", "intervalFactor": 2, "legendFormat": "{{job}} {{instance}} head chunks", - "legendLink": null, - "step": 10 + "legendLink": null } ], "thresholds": [ @@ -35022,8 +34807,7 @@ items: "format": "time_series", "intervalFactor": 2, "legendFormat": "{{job}} {{instance}}", - "legendLink": null, - "step": 10 + "legendLink": null } ], "thresholds": [ @@ -35108,8 +34892,7 @@ items: "format": "time_series", "intervalFactor": 2, "legendFormat": "{{slice}}", - "legendLink": null, - "step": 10 + "legendLink": null } ], "thresholds": [ diff --git a/manifests/nodeExporter-prometheusRule.yaml b/manifests/nodeExporter-prometheusRule.yaml index 839cc89d..f6b32735 100644 --- a/manifests/nodeExporter-prometheusRule.yaml +++ b/manifests/nodeExporter-prometheusRule.yaml @@ -16,7 +16,7 @@ spec: rules: - alert: NodeFilesystemSpaceFillingUp annotations: - description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left and is filling up. + description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left and is filling up. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemspacefillingup summary: Filesystem is predicted to run out of space within the next 24 hours. expr: | @@ -32,7 +32,7 @@ spec: severity: warning - alert: NodeFilesystemSpaceFillingUp annotations: - description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left and is filling up fast. + description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left and is filling up fast. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemspacefillingup summary: Filesystem is predicted to run out of space within the next 4 hours. expr: | @@ -48,7 +48,7 @@ spec: severity: critical - alert: NodeFilesystemAlmostOutOfSpace annotations: - description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left. + description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutofspace summary: Filesystem has less than 5% space left. expr: | @@ -62,7 +62,7 @@ spec: severity: warning - alert: NodeFilesystemAlmostOutOfSpace annotations: - description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left. + description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutofspace summary: Filesystem has less than 3% space left. expr: | @@ -76,7 +76,7 @@ spec: severity: critical - alert: NodeFilesystemFilesFillingUp annotations: - description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left and is filling up. + description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left and is filling up. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemfilesfillingup summary: Filesystem is predicted to run out of inodes within the next 24 hours. expr: | @@ -92,7 +92,7 @@ spec: severity: warning - alert: NodeFilesystemFilesFillingUp annotations: - description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left and is filling up fast. + description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left and is filling up fast. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemfilesfillingup summary: Filesystem is predicted to run out of inodes within the next 4 hours. expr: | @@ -108,7 +108,7 @@ spec: severity: critical - alert: NodeFilesystemAlmostOutOfFiles annotations: - description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left. + description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutoffiles summary: Filesystem has less than 5% inodes left. expr: | @@ -122,7 +122,7 @@ spec: severity: warning - alert: NodeFilesystemAlmostOutOfFiles annotations: - description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left. + description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutoffiles summary: Filesystem has less than 3% inodes left. expr: | @@ -140,7 +140,7 @@ spec: runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodenetworkreceiveerrs summary: Network interface is reporting many receive errors. expr: | - rate(node_network_receive_errs_total[2m]) / rate(node_network_receive_packets_total[2m]) > 0.01 + rate(node_network_receive_errs_total{job="node-exporter"}[2m]) / rate(node_network_receive_packets_total{job="node-exporter"}[2m]) > 0.01 for: 1h labels: severity: warning @@ -150,7 +150,7 @@ spec: runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodenetworktransmiterrs summary: Network interface is reporting many transmit errors. expr: | - rate(node_network_transmit_errs_total[2m]) / rate(node_network_transmit_packets_total[2m]) > 0.01 + rate(node_network_transmit_errs_total{job="node-exporter"}[2m]) / rate(node_network_transmit_packets_total{job="node-exporter"}[2m]) > 0.01 for: 1h labels: severity: warning @@ -160,12 +160,12 @@ spec: runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodehighnumberconntrackentriesused summary: Number of conntrack are getting close to the limit. expr: | - (node_nf_conntrack_entries / node_nf_conntrack_entries_limit) > 0.75 + (node_nf_conntrack_entries{job="node-exporter"} / node_nf_conntrack_entries_limit) > 0.75 labels: severity: warning - alert: NodeTextFileCollectorScrapeError annotations: - description: Node Exporter text file collector failed to scrape. + description: Node Exporter text file collector on {{ $labels.instance }} failed to scrape. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodetextfilecollectorscrapeerror summary: Node Exporter text file collector failed to scrape. expr: | @@ -174,7 +174,7 @@ spec: severity: warning - alert: NodeClockSkewDetected annotations: - description: Clock on {{ $labels.instance }} is out of sync by more than 0.05s. Ensure NTP is configured correctly on this host. + description: Clock at {{ $labels.instance }} is out of sync by more than 0.05s. Ensure NTP is configured correctly on this host. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodeclockskewdetected summary: Clock skew detected. expr: | @@ -194,7 +194,7 @@ spec: severity: warning - alert: NodeClockNotSynchronising annotations: - description: Clock on {{ $labels.instance }} is not synchronising. Ensure NTP is configured on this host. + description: Clock at {{ $labels.instance }} is not synchronising. Ensure NTP is configured on this host. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodeclocknotsynchronising summary: Clock not synchronising. expr: | @@ -206,9 +206,9 @@ spec: severity: warning - alert: NodeRAIDDegraded annotations: - description: RAID array '{{ $labels.device }}' on {{ $labels.instance }} is in degraded state due to one or more disks failures. Number of spare drives is insufficient to fix issue automatically. + description: RAID array '{{ $labels.device }}' at {{ $labels.instance }} is in degraded state due to one or more disks failures. Number of spare drives is insufficient to fix issue automatically. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/noderaiddegraded - summary: RAID Array is degraded + summary: RAID Array is degraded. expr: | node_md_disks_required{job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"} - ignoring (state) (node_md_disks{state="active",job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}) > 0 for: 15m @@ -216,9 +216,9 @@ spec: severity: critical - alert: NodeRAIDDiskFailure annotations: - description: At least one device in RAID array on {{ $labels.instance }} failed. Array '{{ $labels.device }}' needs attention and possibly a disk swap. + description: At least one device in RAID array at {{ $labels.instance }} failed. Array '{{ $labels.device }}' needs attention and possibly a disk swap. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/noderaiddiskfailure - summary: Failed device in RAID array + summary: Failed device in RAID array. expr: | node_md_disks{state="failed",job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"} > 0 labels: @@ -247,6 +247,75 @@ spec: for: 15m labels: severity: critical + - alert: NodeCPUHighUsage + annotations: + description: | + CPU usage at {{ $labels.instance }} has been above 90% for the last 15 minutes, is currently at {{ printf "%.2f" $value }}%. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodecpuhighusage + summary: High CPU usage. + expr: | + sum without(mode) (avg without (cpu) (rate(node_cpu_seconds_total{job="node-exporter", mode!="idle"}[2m]))) * 100 > 90 + for: 15m + labels: + severity: info + - alert: NodeSystemSaturation + annotations: + description: | + System load per core at {{ $labels.instance }} has been above 2 for the last 15 minutes, is currently at {{ printf "%.2f" $value }}. + This might indicate this instance resources saturation and can cause it becoming unresponsive. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodesystemsaturation + summary: System saturated, load per core is very high. + expr: | + node_load1{job="node-exporter"} + / count without (cpu, mode) (node_cpu_seconds_total{job="node-exporter", mode="idle"}) > 2 + for: 15m + labels: + severity: warning + - alert: NodeMemoryMajorPagesFaults + annotations: + description: | + Memory major pages are occurring at very high rate at {{ $labels.instance }}, 500 major page faults per second for the last 15 minutes, is currently at {{ printf "%.2f" $value }}. + Please check that there is enough memory available at this instance. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodememorymajorpagesfaults + summary: Memory major page faults are occurring at very high rate. + expr: | + rate(node_vmstat_pgmajfault{job="node-exporter"}[5m]) > 500 + for: 15m + labels: + severity: warning + - alert: NodeMemoryHighUtilization + annotations: + description: | + Memory is filling up at {{ $labels.instance }}, has been above 90% for the last 15 minutes, is currently at {{ printf "%.2f" $value }}%. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodememoryhighutilization + summary: Host is running out of memory. + expr: | + 100 - (node_memory_MemAvailable_bytes{job="node-exporter"} / node_memory_MemTotal_bytes{job="node-exporter"} * 100) > 90 + for: 15m + labels: + severity: warning + - alert: NodeDiskIOSaturation + annotations: + description: | + Disk IO queue (aqu-sq) is high on {{ $labels.device }} at {{ $labels.instance }}, has been above 10 for the last 15 minutes, is currently at {{ printf "%.2f" $value }}. + This symptom might indicate disk saturation. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodediskiosaturation + summary: Disk IO queue is high. + expr: | + rate(node_disk_io_time_weighted_seconds_total{job="node-exporter", device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}[5m]) > 10 + for: 30m + labels: + severity: warning + - alert: NodeSystemdServiceFailed + annotations: + description: Systemd service {{ $labels.name }} has entered failed state at {{ $labels.instance }} + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodesystemdservicefailed + summary: Systemd service has entered failed state. + expr: | + node_systemd_unit_state{job="node-exporter", state="failed"} == 1 + for: 5m + labels: + severity: warning - name: node-exporter.rules rules: - expr: | diff --git a/manifests/setup/0alertmanagerConfigCustomResourceDefinition.yaml b/manifests/setup/0alertmanagerConfigCustomResourceDefinition.yaml index 48b5bf2e..afd9dd1b 100644 --- a/manifests/setup/0alertmanagerConfigCustomResourceDefinition.yaml +++ b/manifests/setup/0alertmanagerConfigCustomResourceDefinition.yaml @@ -175,6 +175,295 @@ spec: items: description: Receiver defines one or more notification integrations. properties: + discordConfigs: + description: List of Discord configurations. + items: + description: DiscordConfig configures notifications via Discord. See https://prometheus.io/docs/alerting/latest/configuration/#discord_config + properties: + apiURL: + description: The secret's key that contains the Discord webhook URL. The secret needs to be in the same namespace as the AlertmanagerConfig object and accessible by the Prometheus Operator. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + httpConfig: + description: HTTP client configuration. + properties: + authorization: + description: Authorization header configuration for the client. This is mutually exclusive with BasicAuth and is only available starting from Alertmanager v0.22+. + properties: + credentials: + description: Selects a key of a Secret in the namespace that contains the credentials for authentication. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: + description: "Defines the authentication type. The value is case-insensitive. \n \"Basic\" is not a supported value. \n Default: \"Bearer\"" + type: string + type: object + basicAuth: + description: BasicAuth for the client. This is mutually exclusive with Authorization. If both are defined, BasicAuth takes precedence. + properties: + password: + description: The secret in the service monitor namespace that contains the password for authentication. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + username: + description: The secret in the service monitor namespace that contains the username for authentication. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + bearerTokenSecret: + description: The secret's key that contains the bearer token to be used by the client for authentication. The secret needs to be in the same namespace as the AlertmanagerConfig object and accessible by the Prometheus Operator. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + followRedirects: + description: FollowRedirects specifies whether the client should follow HTTP 3xx redirects. + type: boolean + oauth2: + description: OAuth2 client credentials used to fetch a token for the targets. + properties: + clientId: + description: The secret or configmap containing the OAuth2 client id + properties: + configMap: + description: ConfigMap containing data to use for the targets. + properties: + key: + description: The key to select. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the ConfigMap or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + secret: + description: Secret containing data to use for the targets. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + clientSecret: + description: The secret containing the OAuth2 client secret + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + endpointParams: + additionalProperties: + type: string + description: Parameters to append to the token URL + type: object + scopes: + description: OAuth2 scopes used for the token request + items: + type: string + type: array + tokenUrl: + description: The URL to fetch the token from + minLength: 1 + type: string + required: + - clientId + - clientSecret + - tokenUrl + type: object + proxyURL: + description: Optional proxy URL. + type: string + tlsConfig: + description: TLS configuration for the client. + properties: + ca: + description: Certificate authority used when verifying server certificates. + properties: + configMap: + description: ConfigMap containing data to use for the targets. + properties: + key: + description: The key to select. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the ConfigMap or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + secret: + description: Secret containing data to use for the targets. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + cert: + description: Client certificate to present when doing client-authentication. + properties: + configMap: + description: ConfigMap containing data to use for the targets. + properties: + key: + description: The key to select. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the ConfigMap or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + secret: + description: Secret containing data to use for the targets. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + insecureSkipVerify: + description: Disable target certificate validation. + type: boolean + keySecret: + description: Secret containing the client key file for the targets. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + serverName: + description: Used to verify the hostname for the targets. + type: string + type: object + type: object + message: + description: The template of the message's body. + type: string + sendResolved: + description: Whether or not to notify about resolved alerts. + type: boolean + title: + description: The template of the message's title. + type: string + required: + - apiURL + type: object + type: array emailConfigs: description: List of Email configurations. items: diff --git a/manifests/setup/0scrapeconfigCustomResourceDefinition.yaml b/manifests/setup/0scrapeconfigCustomResourceDefinition.yaml index 760e1467..fb07c93f 100644 --- a/manifests/setup/0scrapeconfigCustomResourceDefinition.yaml +++ b/manifests/setup/0scrapeconfigCustomResourceDefinition.yaml @@ -188,6 +188,104 @@ spec: description: RefreshInterval configures the refresh interval at which Prometheus will re-query the endpoint to update the target list. pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$ type: string + tlsConfig: + description: TLS configuration applying to the target HTTP endpoint. + properties: + ca: + description: Certificate authority used when verifying server certificates. + properties: + configMap: + description: ConfigMap containing data to use for the targets. + properties: + key: + description: The key to select. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the ConfigMap or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + secret: + description: Secret containing data to use for the targets. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + cert: + description: Client certificate to present when doing client-authentication. + properties: + configMap: + description: ConfigMap containing data to use for the targets. + properties: + key: + description: The key to select. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the ConfigMap or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + secret: + description: Secret containing data to use for the targets. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + insecureSkipVerify: + description: Disable target certificate validation. + type: boolean + keySecret: + description: Secret containing the client key file for the targets. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + serverName: + description: Used to verify the hostname for the targets. + type: string + type: object url: description: URL from which the targets are fetched. minLength: 1 @@ -197,6 +295,32 @@ spec: - url type: object type: array + kubernetesSDConfigs: + description: KubernetesSDConfigs defines a list of Kubernetes service discovery configurations. + items: + description: KubernetesSDConfig allows retrieving scrape targets from Kubernetes' REST API. See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#kubernetes_sd_config + properties: + role: + description: Role of the Kubernetes entities that should be discovered. Currently the only supported role is "Node". + enum: + - Node + type: string + required: + - role + type: object + type: array + labelLimit: + description: Per-scrape limit on number of labels that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + format: int64 + type: integer + labelNameLengthLimit: + description: Per-scrape limit on length of labels name that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + format: int64 + type: integer + labelValueLengthLimit: + description: Per-scrape limit on length of labels value that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + format: int64 + type: integer metricsPath: description: MetricsPath HTTP path to scrape for metrics. If empty, Prometheus uses the default value (e.g. /metrics). type: string @@ -257,6 +381,10 @@ spec: type: string type: object type: array + sampleLimit: + description: SampleLimit defines per-scrape limit on number of scraped samples that will be accepted. + format: int64 + type: integer scheme: description: Configures the protocol scheme used for requests. If empty, Prometheus uses HTTP by default. enum: @@ -282,6 +410,108 @@ spec: type: array type: object type: array + targetLimit: + description: TargetLimit defines a limit on the number of scraped targets that will be accepted. + format: int64 + type: integer + tlsConfig: + description: TLS configuration to use on every scrape request + properties: + ca: + description: Certificate authority used when verifying server certificates. + properties: + configMap: + description: ConfigMap containing data to use for the targets. + properties: + key: + description: The key to select. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the ConfigMap or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + secret: + description: Secret containing data to use for the targets. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + cert: + description: Client certificate to present when doing client-authentication. + properties: + configMap: + description: ConfigMap containing data to use for the targets. + properties: + key: + description: The key to select. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the ConfigMap or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + secret: + description: Secret containing data to use for the targets. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + insecureSkipVerify: + description: Disable target certificate validation. + type: boolean + keySecret: + description: Secret containing the client key file for the targets. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + serverName: + description: Used to verify the hostname for the targets. + type: string + type: object type: object required: - spec -- GitLab