From d988ee64d3a850244035fbd81465b29e8c48cd68 Mon Sep 17 00:00:00 2001 From: Sheogorath <sheogorath@shivering-isles.com> Date: Mon, 6 Nov 2023 12:54:33 +0100 Subject: [PATCH] feat(postgres): Add alert for WAL size --- infrastructure/postgres/kustomization.yaml | 1 + infrastructure/postgres/prometheusrules.yaml | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+) create mode 100644 infrastructure/postgres/prometheusrules.yaml diff --git a/infrastructure/postgres/kustomization.yaml b/infrastructure/postgres/kustomization.yaml index a234bf501..6e94f812f 100644 --- a/infrastructure/postgres/kustomization.yaml +++ b/infrastructure/postgres/kustomization.yaml @@ -6,6 +6,7 @@ resources: - repository.yaml - release.yaml - podmonitor.yaml + - prometheusrules.yaml configMapGenerator: - name: postgres-system-grafana-dashboards files: diff --git a/infrastructure/postgres/prometheusrules.yaml b/infrastructure/postgres/prometheusrules.yaml new file mode 100644 index 000000000..0912f99d0 --- /dev/null +++ b/infrastructure/postgres/prometheusrules.yaml @@ -0,0 +1,19 @@ +--- +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: postgres-rules + namespace: postgres-system +spec: + groups: + - name: postgres-clusters + rules: + - alert: PostgresHighWALUsage + annotations: + description: The WAL size of the postgres cluster exceeded 1GiB for more than 1 hour. This indicates a problem with either a replica or the WAL archiving process to S3. + summary: The WAL size of the postgres cluster exceeded 1GiB for more than 1 hour. + expr: max by (namespace, cluster_name)(pg_wal_size_bytes) > 2^30 + for: 1h + labels: + issue: The WAL size of the postgres cluster exceeded 1GiB for more than 1 hour. + severity: critical \ No newline at end of file -- GitLab