diff --git a/assets/prometheus/rules/kube-state-metrics.rules.yaml b/assets/prometheus/rules/kube-state-metrics.rules.yaml index 32b99fa20a194f91d0e9913ceec2039a284f4953..9325df0b5ca8ab6010e52d73e24f1c278c53ba3f 100644 --- a/assets/prometheus/rules/kube-state-metrics.rules.yaml +++ b/assets/prometheus/rules/kube-state-metrics.rules.yaml @@ -8,7 +8,8 @@ groups: severity: warning annotations: description: Observed deployment generation does not match expected one for - deployment {{$labels.namespaces}}{{$labels.deployment}} + deployment {{$labels.namespaces}}/{{$labels.deployment}} + summary: Deployment is outdated - alert: DeploymentReplicasNotUpdated expr: ((kube_deployment_status_replicas_updated != kube_deployment_spec_replicas) or (kube_deployment_status_replicas_available != kube_deployment_spec_replicas)) @@ -18,8 +19,9 @@ groups: severity: warning annotations: description: Replicas are not updated and available for deployment {{$labels.namespaces}}/{{$labels.deployment}} + summary: Deployment replicas are outdated - alert: DaemonSetRolloutStuck - expr: kube_daemonset_status_current_number_ready / kube_daemonset_status_desired_number_scheduled + expr: kube_daemonset_status_number_ready / kube_daemonset_status_desired_number_scheduled * 100 < 100 for: 15m labels: @@ -27,6 +29,7 @@ groups: annotations: description: Only {{$value}}% of desired pods scheduled and ready for daemon set {{$labels.namespaces}}/{{$labels.daemonset}} + summary: DaemonSet is missing pods - alert: K8SDaemonSetsNotScheduled expr: kube_daemonset_status_desired_number_scheduled - kube_daemonset_status_current_number_scheduled > 0 @@ -46,10 +49,11 @@ groups: to run. summary: Daemonsets are not scheduled correctly - alert: PodFrequentlyRestarting - expr: increase(kube_pod_container_status_restarts[1h]) > 5 + expr: increase(kube_pod_container_status_restarts_total[1h]) > 5 for: 10m labels: severity: warning annotations: description: Pod {{$labels.namespaces}}/{{$labels.pod}} is was restarted {{$value}} times within the last hour + summary: Pod is restarting frequently diff --git a/manifests/prometheus/prometheus-k8s-rules.yaml b/manifests/prometheus/prometheus-k8s-rules.yaml index 0a667e01cdced51e87abcabf0eed32e5bcafe57e..7011423b9367b2f84ae8745b5ea587c4fe76e77a 100644 --- a/manifests/prometheus/prometheus-k8s-rules.yaml +++ b/manifests/prometheus/prometheus-k8s-rules.yaml @@ -285,7 +285,8 @@ data: severity: warning annotations: description: Observed deployment generation does not match expected one for - deployment {{$labels.namespaces}}{{$labels.deployment}} + deployment {{$labels.namespaces}}/{{$labels.deployment}} + summary: Deployment is outdated - alert: DeploymentReplicasNotUpdated expr: ((kube_deployment_status_replicas_updated != kube_deployment_spec_replicas) or (kube_deployment_status_replicas_available != kube_deployment_spec_replicas)) @@ -295,8 +296,9 @@ data: severity: warning annotations: description: Replicas are not updated and available for deployment {{$labels.namespaces}}/{{$labels.deployment}} + summary: Deployment replicas are outdated - alert: DaemonSetRolloutStuck - expr: kube_daemonset_status_current_number_ready / kube_daemonset_status_desired_number_scheduled + expr: kube_daemonset_status_number_ready / kube_daemonset_status_desired_number_scheduled * 100 < 100 for: 15m labels: @@ -304,6 +306,7 @@ data: annotations: description: Only {{$value}}% of desired pods scheduled and ready for daemon set {{$labels.namespaces}}/{{$labels.daemonset}} + summary: DaemonSet is missing pods - alert: K8SDaemonSetsNotScheduled expr: kube_daemonset_status_desired_number_scheduled - kube_daemonset_status_current_number_scheduled > 0 @@ -323,13 +326,14 @@ data: to run. summary: Daemonsets are not scheduled correctly - alert: PodFrequentlyRestarting - expr: increase(kube_pod_container_status_restarts[1h]) > 5 + expr: increase(kube_pod_container_status_restarts_total[1h]) > 5 for: 10m labels: severity: warning annotations: description: Pod {{$labels.namespaces}}/{{$labels.pod}} is was restarted {{$value}} times within the last hour + summary: Pod is restarting frequently kubelet.rules.yaml: |+ groups: - name: kubelet.rules