From cd0f3c641e566eb676974c6a33caa2b958e97108 Mon Sep 17 00:00:00 2001 From: Simon Pasquier <spasquie@redhat.com> Date: Tue, 10 Dec 2019 16:44:59 +0100 Subject: [PATCH] regenerate Signed-off-by: Simon Pasquier <spasquie@redhat.com> --- manifests/grafana-dashboardDefinitions.yaml | 228 +++++++++++++------- manifests/prometheus-rules.yaml | 61 ++++-- 2 files changed, 198 insertions(+), 91 deletions(-) diff --git a/manifests/grafana-dashboardDefinitions.yaml b/manifests/grafana-dashboardDefinitions.yaml index 5cfa21f5..26b21ab1 100644 --- a/manifests/grafana-dashboardDefinitions.yaml +++ b/manifests/grafana-dashboardDefinitions.yaml @@ -269,7 +269,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\", instance=~\"$instance\"}[5m])) by (verb, le))", + "expr": "histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\", instance=~\"$instance\", verb!=\"WATCH\"}[5m])) by (verb, le))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{verb}}", @@ -1350,7 +1350,7 @@ items: "label": "Others", "threshold": 0 }, - "datasource": "prometheus", + "datasource": "$datasource", "fontSize": "80%", "format": "Bps", "gridPos": { @@ -1395,7 +1395,7 @@ items: "label": "Others", "threshold": 0 }, - "datasource": "prometheus", + "datasource": "$datasource", "fontSize": "80%", "format": "Bps", "gridPos": { @@ -1473,7 +1473,7 @@ items: "value": "" } ], - "datasource": "prometheus", + "datasource": "$datasource", "fill": 1, "fontSize": "90%", "gridPos": { @@ -1776,7 +1776,7 @@ items: "label": "Others", "threshold": 0 }, - "datasource": "prometheus", + "datasource": "$datasource", "fontSize": "80%", "format": "Bps", "gridPos": { @@ -1821,7 +1821,7 @@ items: "label": "Others", "threshold": 0 }, - "datasource": "prometheus", + "datasource": "$datasource", "fontSize": "80%", "format": "Bps", "gridPos": { @@ -1893,7 +1893,7 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": "$datasource", "fill": 2, "gridPos": { "h": 9, @@ -1992,7 +1992,7 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": "$datasource", "fill": 2, "gridPos": { "h": 9, @@ -2102,7 +2102,7 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": "$datasource", "fill": 2, "gridPos": { "h": 9, @@ -2201,7 +2201,7 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": "$datasource", "fill": 2, "gridPos": { "h": 9, @@ -2320,7 +2320,7 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": "$datasource", "fill": 2, "gridPos": { "h": 9, @@ -2419,7 +2419,7 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": "$datasource", "fill": 2, "gridPos": { "h": 9, @@ -2541,7 +2541,7 @@ items: "text": "5m", "value": "5m" }, - "datasource": "prometheus", + "datasource": "$datasource", "hide": 0, "includeAll": false, "label": null, @@ -2586,7 +2586,7 @@ items: "text": "5m", "value": "5m" }, - "datasource": "prometheus", + "datasource": "$datasource", "hide": 2, "includeAll": false, "label": null, @@ -2611,6 +2611,22 @@ items: "tagsQuery": "", "type": "interval", "useTags": false + }, + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" } ] }, @@ -6279,7 +6295,7 @@ items: "text": "5m", "value": "5m" }, - "datasource": "prometheus", + "datasource": "$datasource", "hide": 2, "includeAll": false, "label": null, @@ -8143,7 +8159,7 @@ items: "text": "5m", "value": "5m" }, - "datasource": "prometheus", + "datasource": "$datasource", "hide": 2, "includeAll": false, "label": null, @@ -10703,7 +10719,7 @@ items: "text": "5m", "value": "5m" }, - "datasource": "prometheus", + "datasource": "$datasource", "hide": 2, "includeAll": false, "label": null, @@ -12736,7 +12752,7 @@ items: "text": "5m", "value": "5m" }, - "datasource": "prometheus", + "datasource": "$datasource", "hide": 2, "includeAll": false, "label": null, @@ -14823,7 +14839,7 @@ items: "text": "5m", "value": "5m" }, - "datasource": "prometheus", + "datasource": "$datasource", "hide": 2, "includeAll": false, "label": null, @@ -17464,7 +17480,7 @@ items: "rgba(237, 129, 40, 0.89)", "#d44a3a" ], - "datasource": "prometheus", + "datasource": "$datasource", "decimals": 0, "format": "time_series", "gauge": { @@ -17591,7 +17607,7 @@ items: "rgba(237, 129, 40, 0.89)", "#d44a3a" ], - "datasource": "prometheus", + "datasource": "$datasource", "decimals": 0, "format": "time_series", "gauge": { @@ -17744,7 +17760,7 @@ items: "value": "" } ], - "datasource": "prometheus", + "datasource": "$datasource", "fill": 1, "fontSize": "100%", "gridPos": { @@ -18000,7 +18016,7 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": "$datasource", "fill": 2, "gridPos": { "h": 9, @@ -18099,7 +18115,7 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": "$datasource", "fill": 2, "gridPos": { "h": 9, @@ -18209,7 +18225,7 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": "$datasource", "fill": 2, "gridPos": { "h": 10, @@ -18308,7 +18324,7 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": "$datasource", "fill": 2, "gridPos": { "h": 10, @@ -18427,7 +18443,7 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": "$datasource", "fill": 2, "gridPos": { "h": 10, @@ -18526,7 +18542,7 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": "$datasource", "fill": 2, "gridPos": { "h": 10, @@ -18639,6 +18655,22 @@ items: ], "templating": { "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, { "allValue": ".+", "auto": false, @@ -18648,7 +18680,7 @@ items: "text": "kube-system", "value": "kube-system" }, - "datasource": "prometheus", + "datasource": "$datasource", "definition": "label_values(container_network_receive_packets_total, namespace)", "hide": 0, "includeAll": true, @@ -18680,7 +18712,7 @@ items: "text": "5m", "value": "5m" }, - "datasource": "prometheus", + "datasource": "$datasource", "hide": 0, "includeAll": false, "label": null, @@ -18725,7 +18757,7 @@ items: "text": "5m", "value": "5m" }, - "datasource": "prometheus", + "datasource": "$datasource", "hide": 2, "includeAll": false, "label": null, @@ -18854,7 +18886,7 @@ items: "label": "Others", "threshold": 0 }, - "datasource": "prometheus", + "datasource": "$datasource", "fontSize": "80%", "format": "Bps", "gridPos": { @@ -18899,7 +18931,7 @@ items: "label": "Others", "threshold": 0 }, - "datasource": "prometheus", + "datasource": "$datasource", "fontSize": "80%", "format": "Bps", "gridPos": { @@ -18977,7 +19009,7 @@ items: "value": "" } ], - "datasource": "prometheus", + "datasource": "$datasource", "fill": 1, "fontSize": "90%", "gridPos": { @@ -19280,7 +19312,7 @@ items: "label": "Others", "threshold": 0 }, - "datasource": "prometheus", + "datasource": "$datasource", "fontSize": "80%", "format": "Bps", "gridPos": { @@ -19325,7 +19357,7 @@ items: "label": "Others", "threshold": 0 }, - "datasource": "prometheus", + "datasource": "$datasource", "fontSize": "80%", "format": "Bps", "gridPos": { @@ -19397,7 +19429,7 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": "$datasource", "fill": 2, "gridPos": { "h": 9, @@ -19496,7 +19528,7 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": "$datasource", "fill": 2, "gridPos": { "h": 9, @@ -19606,7 +19638,7 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": "$datasource", "fill": 2, "gridPos": { "h": 9, @@ -19705,7 +19737,7 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": "$datasource", "fill": 2, "gridPos": { "h": 9, @@ -19824,7 +19856,7 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": "$datasource", "fill": 2, "gridPos": { "h": 9, @@ -19923,7 +19955,7 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": "$datasource", "fill": 2, "gridPos": { "h": 9, @@ -20036,6 +20068,22 @@ items: ], "templating": { "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, { "allValue": null, "auto": false, @@ -20045,7 +20093,7 @@ items: "text": "kube-system", "value": "kube-system" }, - "datasource": "prometheus", + "datasource": "$datasource", "definition": "label_values(container_network_receive_packets_total, namespace)", "hide": 0, "includeAll": false, @@ -20077,7 +20125,7 @@ items: "text": "deployment", "value": "deployment" }, - "datasource": "prometheus", + "datasource": "$datasource", "definition": "label_values(mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\"}, workload_type)", "hide": 0, "includeAll": false, @@ -20109,7 +20157,7 @@ items: "text": "5m", "value": "5m" }, - "datasource": "prometheus", + "datasource": "$datasource", "hide": 0, "includeAll": false, "label": null, @@ -20154,7 +20202,7 @@ items: "text": "5m", "value": "5m" }, - "datasource": "prometheus", + "datasource": "$datasource", "hide": 2, "includeAll": false, "label": null, @@ -23733,7 +23781,7 @@ items: "rgba(237, 129, 40, 0.89)", "#d44a3a" ], - "datasource": "prometheus", + "datasource": "$datasource", "decimals": 0, "format": "time_series", "gauge": { @@ -23860,7 +23908,7 @@ items: "rgba(237, 129, 40, 0.89)", "#d44a3a" ], - "datasource": "prometheus", + "datasource": "$datasource", "decimals": 0, "format": "time_series", "gauge": { @@ -24006,7 +24054,7 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": "$datasource", "fill": 2, "gridPos": { "h": 9, @@ -24105,7 +24153,7 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": "$datasource", "fill": 2, "gridPos": { "h": 9, @@ -24215,7 +24263,7 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": "$datasource", "fill": 2, "gridPos": { "h": 10, @@ -24314,7 +24362,7 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": "$datasource", "fill": 2, "gridPos": { "h": 10, @@ -24433,7 +24481,7 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": "$datasource", "fill": 2, "gridPos": { "h": 10, @@ -24532,7 +24580,7 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": "$datasource", "fill": 2, "gridPos": { "h": 10, @@ -24645,6 +24693,22 @@ items: ], "templating": { "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, { "allValue": ".+", "auto": false, @@ -24654,7 +24718,7 @@ items: "text": "kube-system", "value": "kube-system" }, - "datasource": "prometheus", + "datasource": "$datasource", "definition": "label_values(container_network_receive_packets_total, namespace)", "hide": 0, "includeAll": true, @@ -24686,7 +24750,7 @@ items: "text": "", "value": "" }, - "datasource": "prometheus", + "datasource": "$datasource", "definition": "label_values(container_network_receive_packets_total{namespace=~\"$namespace\"}, pod)", "hide": 0, "includeAll": false, @@ -24718,7 +24782,7 @@ items: "text": "5m", "value": "5m" }, - "datasource": "prometheus", + "datasource": "$datasource", "hide": 0, "includeAll": false, "label": null, @@ -24763,7 +24827,7 @@ items: "text": "5m", "value": "5m" }, - "datasource": "prometheus", + "datasource": "$datasource", "hide": 2, "includeAll": false, "label": null, @@ -24915,7 +24979,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum by(container) (container_memory_usage_bytes{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\", container!=\"POD\"})", + "expr": "sum by(container) (container_memory_working_set_bytes{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\", container!=\"POD\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "Current: {{ container }}", @@ -31293,7 +31357,7 @@ items: "label": "Others", "threshold": 0 }, - "datasource": "prometheus", + "datasource": "$datasource", "fontSize": "80%", "format": "Bps", "gridPos": { @@ -31338,7 +31402,7 @@ items: "label": "Others", "threshold": 0 }, - "datasource": "prometheus", + "datasource": "$datasource", "fontSize": "80%", "format": "Bps", "gridPos": { @@ -31394,7 +31458,7 @@ items: "label": "Others", "threshold": 0 }, - "datasource": "prometheus", + "datasource": "$datasource", "fontSize": "80%", "format": "Bps", "gridPos": { @@ -31439,7 +31503,7 @@ items: "label": "Others", "threshold": 0 }, - "datasource": "prometheus", + "datasource": "$datasource", "fontSize": "80%", "format": "Bps", "gridPos": { @@ -31511,7 +31575,7 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": "$datasource", "fill": 2, "gridPos": { "h": 9, @@ -31610,7 +31674,7 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": "$datasource", "fill": 2, "gridPos": { "h": 9, @@ -31720,7 +31784,7 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": "$datasource", "fill": 2, "gridPos": { "h": 9, @@ -31819,7 +31883,7 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": "$datasource", "fill": 2, "gridPos": { "h": 9, @@ -31938,7 +32002,7 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": "$datasource", "fill": 2, "gridPos": { "h": 9, @@ -32037,7 +32101,7 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": "$datasource", "fill": 2, "gridPos": { "h": 9, @@ -32150,6 +32214,22 @@ items: ], "templating": { "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, { "allValue": ".+", "auto": false, @@ -32159,7 +32239,7 @@ items: "text": "kube-system", "value": "kube-system" }, - "datasource": "prometheus", + "datasource": "$datasource", "definition": "label_values(container_network_receive_packets_total, namespace)", "hide": 0, "includeAll": true, @@ -32191,7 +32271,7 @@ items: "text": "", "value": "" }, - "datasource": "prometheus", + "datasource": "$datasource", "definition": "label_values(mixin_pod_workload{namespace=~\"$namespace\"}, workload)", "hide": 0, "includeAll": false, @@ -32223,7 +32303,7 @@ items: "text": "deployment", "value": "deployment" }, - "datasource": "prometheus", + "datasource": "$datasource", "definition": "label_values(mixin_pod_workload{namespace=~\"$namespace\", workload=~\"$workload\"}, workload_type)", "hide": 0, "includeAll": false, @@ -32255,7 +32335,7 @@ items: "text": "5m", "value": "5m" }, - "datasource": "prometheus", + "datasource": "$datasource", "hide": 0, "includeAll": false, "label": null, @@ -32300,7 +32380,7 @@ items: "text": "5m", "value": "5m" }, - "datasource": "prometheus", + "datasource": "$datasource", "hide": 2, "includeAll": false, "label": null, diff --git a/manifests/prometheus-rules.yaml b/manifests/prometheus-rules.yaml index 93334a9e..ae67997e 100644 --- a/manifests/prometheus-rules.yaml +++ b/manifests/prometheus-rules.yaml @@ -68,17 +68,22 @@ spec: - name: kube-apiserver.rules rules: - expr: | - histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver"}[5m])) without(instance, pod)) + sum(rate(apiserver_request_duration_seconds_sum{subresource!="log",verb!~"LIST|WATCH|WATCHLIST|PROXY|CONNECT"}[5m])) without(instance, pod) + / + sum(rate(apiserver_request_duration_seconds_count{subresource!="log",verb!~"LIST|WATCH|WATCHLIST|PROXY|CONNECT"}[5m])) without(instance, pod) + record: cluster:apiserver_request_duration_seconds:mean5m + - expr: | + histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",subresource!="log",verb!~"LIST|WATCH|WATCHLIST|PROXY|CONNECT"}[5m])) without(instance, pod)) labels: quantile: "0.99" record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile - expr: | - histogram_quantile(0.9, sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver"}[5m])) without(instance, pod)) + histogram_quantile(0.9, sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",subresource!="log",verb!~"LIST|WATCH|WATCHLIST|PROXY|CONNECT"}[5m])) without(instance, pod)) labels: quantile: "0.9" record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile - expr: | - histogram_quantile(0.5, sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver"}[5m])) without(instance, pod)) + histogram_quantile(0.5, sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",subresource!="log",verb!~"LIST|WATCH|WATCHLIST|PROXY|CONNECT"}[5m])) without(instance, pod)) labels: quantile: "0.5" record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile @@ -112,17 +117,25 @@ spec: sum(container_memory_usage_bytes{job="kubelet", image!="", container!="POD"}) by (namespace) record: namespace:container_memory_usage_bytes:sum - expr: | - sum by (namespace, label_name) ( - sum(kube_pod_container_resource_requests_memory_bytes{job="kube-state-metrics"} * on (endpoint, instance, job, namespace, pod, service) group_left(phase) (kube_pod_status_phase{phase=~"Pending|Running"} == 1)) by (namespace, pod) - * on (namespace, pod) - group_left(label_name) kube_pod_labels{job="kube-state-metrics"} + sum by (namespace) ( + sum by (namespace, pod) ( + max by (namespace, pod, container) ( + kube_pod_container_resource_requests_memory_bytes{job="kube-state-metrics"} + ) * on(namespace, pod) group_left() max by (namespace, pod) ( + kube_pod_status_phase{phase=~"Pending|Running"} == 1 + ) + ) ) record: namespace:kube_pod_container_resource_requests_memory_bytes:sum - expr: | - sum by (namespace, label_name) ( - sum(kube_pod_container_resource_requests_cpu_cores{job="kube-state-metrics"} * on (endpoint, instance, job, namespace, pod, service) group_left(phase) (kube_pod_status_phase{phase=~"Pending|Running"} == 1)) by (namespace, pod) - * on (namespace, pod) - group_left(label_name) kube_pod_labels{job="kube-state-metrics"} + sum by (namespace) ( + sum by (namespace, pod) ( + max by (namespace, pod, container) ( + kube_pod_container_resource_requests_cpu_cores{job="kube-state-metrics"} + ) * on(namespace, pod) group_left() max by (namespace, pod) ( + kube_pod_status_phase{phase=~"Pending|Running"} == 1 + ) + ) ) record: namespace:kube_pod_container_resource_requests_cpu_cores:sum - expr: | @@ -425,7 +438,7 @@ spec: state for longer than 15 minutes. runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepodnotready expr: | - sum by (namespace, pod) (kube_pod_status_phase{job="kube-state-metrics", phase=~"Failed|Pending|Unknown"} * on(namespace, pod) group_left(owner_kind) kube_pod_owner{owner_kind!="Job"}) > 0 + sum by (namespace, pod) (max by(namespace, pod) (kube_pod_status_phase{job="kube-state-metrics", phase=~"Pending|Unknown"}) * on(namespace, pod) group_left(owner_kind) max by(namespace, pod, owner_kind) (kube_pod_owner{owner_kind!="Job"})) > 0 for: 15m labels: severity: critical @@ -753,12 +766,26 @@ spec: rules: - alert: KubeAPILatencyHigh annotations: - message: The API server has a 99th percentile latency of {{ $value }} seconds - for {{ $labels.verb }} {{ $labels.resource }}. + message: The API server has an abnormal latency of {{ $value }} seconds for + {{ $labels.verb }} {{ $labels.resource }}. runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapilatencyhigh expr: | - cluster_quantile:apiserver_request_duration_seconds:histogram_quantile{job="apiserver",quantile="0.99",subresource!="log",verb!~"LIST|WATCH|WATCHLIST|PROXY|CONNECT"} > 1 - for: 10m + ( + cluster:apiserver_request_duration_seconds:mean5m{job="apiserver"} + > + on (verb) group_left() + ( + avg by (verb) (cluster:apiserver_request_duration_seconds:mean5m{job="apiserver"} >= 0) + + + 2*stddev by (verb) (cluster:apiserver_request_duration_seconds:mean5m{job="apiserver"} >= 0) + ) + ) > on (verb) group_left() + 1.2 * avg by (verb) (cluster:apiserver_request_duration_seconds:mean5m{job="apiserver"} >= 0) + and on (verb,resource) + cluster_quantile:apiserver_request_duration_seconds:histogram_quantile{job="apiserver",quantile="0.99"} + > + 1 + for: 5m labels: severity: warning - alert: KubeAPILatencyHigh @@ -767,7 +794,7 @@ spec: for {{ $labels.verb }} {{ $labels.resource }}. runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapilatencyhigh expr: | - cluster_quantile:apiserver_request_duration_seconds:histogram_quantile{job="apiserver",quantile="0.99",subresource!="log",verb!~"LIST|WATCH|WATCHLIST|PROXY|CONNECT"} > 4 + cluster_quantile:apiserver_request_duration_seconds:histogram_quantile{job="apiserver",quantile="0.99"} > 4 for: 10m labels: severity: critical -- GitLab