Skip to content
Snippets Groups Projects
Commit 5ca8d2b9 authored by github-actions[bot]'s avatar github-actions[bot]
Browse files

assets,site/content: daily assets regeneration

parent 3e4f5310
Branches
Tags
No related merge requests found
...@@ -169,12 +169,13 @@ groups: ...@@ -169,12 +169,13 @@ groups:
severity: warning severity: warning
- alert: KubeContainerWaiting - alert: KubeContainerWaiting
annotations: annotations:
description: pod/{{ $labels.pod }} in namespace {{ $labels.namespace }} on container description: 'pod/{{ $labels.pod }} in namespace {{ $labels.namespace }} on
{{ $labels.container}} has been in waiting state for longer than 1 hour. container {{ $labels.container}} has been in waiting state for longer than
1 hour. (reason: "{{ $labels.reason }}").'
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecontainerwaiting runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecontainerwaiting
summary: Pod container waiting longer than 1 hour summary: Pod container waiting longer than 1 hour
expr: | expr: |
sum by (namespace, pod, container, cluster) (kube_pod_container_status_waiting_reason{job="kube-state-metrics"}) > 0 kube_pod_container_status_waiting_reason{reason!="CrashLoopBackOff", job="kube-state-metrics"} > 0
for: 1h for: 1h
labels: labels:
severity: warning severity: warning
...@@ -365,9 +366,9 @@ groups: ...@@ -365,9 +366,9 @@ groups:
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-cputhrottlinghigh runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-cputhrottlinghigh
summary: Processes experience elevated CPU throttling. summary: Processes experience elevated CPU throttling.
expr: | expr: |
sum(increase(container_cpu_cfs_throttled_periods_total{container!="", }[5m])) by (cluster, container, pod, namespace) sum(increase(container_cpu_cfs_throttled_periods_total{container!="", job="cadvisor", }[5m])) without (id, metrics_path, name, image, endpoint, job, node)
/ /
sum(increase(container_cpu_cfs_periods_total{}[5m])) by (cluster, container, pod, namespace) sum(increase(container_cpu_cfs_periods_total{job="cadvisor", }[5m])) without (id, metrics_path, name, image, endpoint, job, node)
> ( 25 / 100 ) > ( 25 / 100 )
for: 15m for: 15m
labels: labels:
...@@ -573,7 +574,9 @@ groups: ...@@ -573,7 +574,9 @@ groups:
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration
summary: Client certificate is about to expire. summary: Client certificate is about to expire.
expr: | expr: |
apiserver_client_certificate_expiration_seconds_count{job="kube-apiserver"} > 0 and on(cluster, job) histogram_quantile(0.01, sum by (cluster, job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="kube-apiserver"}[5m]))) < 604800 histogram_quantile(0.01, sum without (namespace, service, endpoint) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="kube-apiserver"}[5m]))) < 604800
and
on(job, cluster, instance) apiserver_client_certificate_expiration_seconds_count{job="kube-apiserver"} > 0
for: 5m for: 5m
labels: labels:
severity: warning severity: warning
...@@ -584,7 +587,9 @@ groups: ...@@ -584,7 +587,9 @@ groups:
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration
summary: Client certificate is about to expire. summary: Client certificate is about to expire.
expr: | expr: |
apiserver_client_certificate_expiration_seconds_count{job="kube-apiserver"} > 0 and on(cluster, job) histogram_quantile(0.01, sum by (cluster, job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="kube-apiserver"}[5m]))) < 86400 histogram_quantile(0.01, sum without (namespace, service, endpoint) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="kube-apiserver"}[5m]))) < 86400
and
on(job, cluster, instance) apiserver_client_certificate_expiration_seconds_count{job="kube-apiserver"} > 0
for: 5m for: 5m
labels: labels:
severity: critical severity: critical
......
...@@ -244,12 +244,13 @@ https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md ...@@ -244,12 +244,13 @@ https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md
{{< code lang="yaml" >}} {{< code lang="yaml" >}}
alert: KubeContainerWaiting alert: KubeContainerWaiting
annotations: annotations:
description: pod/{{ $labels.pod }} in namespace {{ $labels.namespace }} on container description: 'pod/{{ $labels.pod }} in namespace {{ $labels.namespace }} on container
{{ $labels.container}} has been in waiting state for longer than 1 hour. {{ $labels.container}} has been in waiting state for longer than 1 hour. (reason:
"{{ $labels.reason }}").'
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecontainerwaiting runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecontainerwaiting
summary: Pod container waiting longer than 1 hour summary: Pod container waiting longer than 1 hour
expr: | expr: |
sum by (namespace, pod, container, cluster) (kube_pod_container_status_waiting_reason{job="kube-state-metrics"}) > 0 kube_pod_container_status_waiting_reason{reason!="CrashLoopBackOff", job="kube-state-metrics"} > 0
for: 1h for: 1h
labels: labels:
severity: warning severity: warning
...@@ -525,9 +526,9 @@ annotations: ...@@ -525,9 +526,9 @@ annotations:
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-cputhrottlinghigh runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-cputhrottlinghigh
summary: Processes experience elevated CPU throttling. summary: Processes experience elevated CPU throttling.
expr: | expr: |
sum(increase(container_cpu_cfs_throttled_periods_total{container!="", }[5m])) by (cluster, container, pod, namespace) sum(increase(container_cpu_cfs_throttled_periods_total{container!="", job="cadvisor", }[5m])) without (id, metrics_path, name, image, endpoint, job, node)
/ /
sum(increase(container_cpu_cfs_periods_total{}[5m])) by (cluster, container, pod, namespace) sum(increase(container_cpu_cfs_periods_total{job="cadvisor", }[5m])) without (id, metrics_path, name, image, endpoint, job, node)
> ( 25 / 100 ) > ( 25 / 100 )
for: 15m for: 15m
labels: labels:
...@@ -805,7 +806,9 @@ annotations: ...@@ -805,7 +806,9 @@ annotations:
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration
summary: Client certificate is about to expire. summary: Client certificate is about to expire.
expr: | expr: |
apiserver_client_certificate_expiration_seconds_count{job="kube-apiserver"} > 0 and on(cluster, job) histogram_quantile(0.01, sum by (cluster, job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="kube-apiserver"}[5m]))) < 604800 histogram_quantile(0.01, sum without (namespace, service, endpoint) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="kube-apiserver"}[5m]))) < 604800
and
on(job, cluster, instance) apiserver_client_certificate_expiration_seconds_count{job="kube-apiserver"} > 0
for: 5m for: 5m
labels: labels:
severity: warning severity: warning
...@@ -822,7 +825,9 @@ annotations: ...@@ -822,7 +825,9 @@ annotations:
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration
summary: Client certificate is about to expire. summary: Client certificate is about to expire.
expr: | expr: |
apiserver_client_certificate_expiration_seconds_count{job="kube-apiserver"} > 0 and on(cluster, job) histogram_quantile(0.01, sum by (cluster, job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="kube-apiserver"}[5m]))) < 86400 histogram_quantile(0.01, sum without (namespace, service, endpoint) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="kube-apiserver"}[5m]))) < 86400
and
on(job, cluster, instance) apiserver_client_certificate_expiration_seconds_count{job="kube-apiserver"} > 0
for: 5m for: 5m
labels: labels:
severity: critical severity: critical
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment