Skip to content
Snippets Groups Projects
kubernetesControlPlane-prometheusRule.yaml 69.82 KiB
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
  labels:
    app.kubernetes.io/name: kube-prometheus
    app.kubernetes.io/part-of: kube-prometheus
    prometheus: k8s
    role: alert-rules
  name: kubernetes-monitoring-rules
  namespace: monitoring
spec:
  groups:
  - name: kubernetes-apps
    rules:
    - alert: KubePodCrashLooping
      annotations:
        description: 'Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container
          }}) is in waiting state (reason: "CrashLoopBackOff").'
        runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepodcrashlooping
        summary: Pod is crash looping.
      expr: |
        max_over_time(kube_pod_container_status_waiting_reason{reason="CrashLoopBackOff", job="kube-state-metrics"}[5m]) >= 1
      for: 15m
      labels:
        severity: warning
    - alert: KubePodNotReady
      annotations:
        description: Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in a non-ready
          state for longer than 15 minutes.
        runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepodnotready
        summary: Pod has been in a non-ready state for more than 15 minutes.
      expr: |
        sum by (namespace, pod, cluster) (
          max by(namespace, pod, cluster) (
            kube_pod_status_phase{job="kube-state-metrics", phase=~"Pending|Unknown"}
          ) * on(namespace, pod, cluster) group_left(owner_kind) topk by(namespace, pod, cluster) (
            1, max by(namespace, pod, owner_kind, cluster) (kube_pod_owner{owner_kind!="Job"})
          )
        ) > 0
      for: 15m
      labels:
        severity: warning
    - alert: KubeDeploymentGenerationMismatch
      annotations:
        description: Deployment generation for {{ $labels.namespace }}/{{ $labels.deployment
          }} does not match, this indicates that the Deployment has failed but has
          not been rolled back.
        runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedeploymentgenerationmismatch
        summary: Deployment generation mismatch due to possible roll-back
      expr: |
        kube_deployment_status_observed_generation{job="kube-state-metrics"}
          !=
        kube_deployment_metadata_generation{job="kube-state-metrics"}
      for: 15m
      labels:
        severity: warning
    - alert: KubeDeploymentReplicasMismatch
      annotations:
        description: Deployment {{ $labels.namespace }}/{{ $labels.deployment }} has
          not matched the expected number of replicas for longer than 15 minutes.
        runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedeploymentreplicasmismatch
        summary: Deployment has not matched the expected number of replicas.
      expr: |
        (
          kube_deployment_spec_replicas{job="kube-state-metrics"}
            >
          kube_deployment_status_replicas_available{job="kube-state-metrics"}
        ) and (
          changes(kube_deployment_status_replicas_updated{job="kube-state-metrics"}[10m])
            ==