From d988ee64d3a850244035fbd81465b29e8c48cd68 Mon Sep 17 00:00:00 2001
From: Sheogorath <sheogorath@shivering-isles.com>
Date: Mon, 6 Nov 2023 12:54:33 +0100
Subject: [PATCH] feat(postgres): Add alert for WAL size

---
 infrastructure/postgres/kustomization.yaml   |  1 +
 infrastructure/postgres/prometheusrules.yaml | 19 +++++++++++++++++++
 2 files changed, 20 insertions(+)
 create mode 100644 infrastructure/postgres/prometheusrules.yaml

diff --git a/infrastructure/postgres/kustomization.yaml b/infrastructure/postgres/kustomization.yaml
index a234bf501..6e94f812f 100644
--- a/infrastructure/postgres/kustomization.yaml
+++ b/infrastructure/postgres/kustomization.yaml
@@ -6,6 +6,7 @@ resources:
   - repository.yaml
   - release.yaml
   - podmonitor.yaml
+  - prometheusrules.yaml
 configMapGenerator:
   - name: postgres-system-grafana-dashboards
     files:
diff --git a/infrastructure/postgres/prometheusrules.yaml b/infrastructure/postgres/prometheusrules.yaml
new file mode 100644
index 000000000..0912f99d0
--- /dev/null
+++ b/infrastructure/postgres/prometheusrules.yaml
@@ -0,0 +1,19 @@
+---
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+  name: postgres-rules
+  namespace: postgres-system
+spec:
+  groups:
+  - name: postgres-clusters
+    rules:
+    - alert: PostgresHighWALUsage
+      annotations:
+        description: The WAL size of the postgres cluster exceeded 1GiB for more than 1 hour. This indicates a problem with either a replica or the WAL archiving process to S3.
+        summary: The WAL size of the postgres cluster exceeded 1GiB for more than 1 hour.
+      expr: max by (namespace, cluster_name)(pg_wal_size_bytes) > 2^30
+      for: 1h
+      labels:
+        issue: The WAL size of the postgres cluster exceeded 1GiB for more than 1 hour.
+        severity: critical
\ No newline at end of file
-- 
GitLab