diff --git a/infrastructure/postgres/kustomization.yaml b/infrastructure/postgres/kustomization.yaml index a234bf501090b2be193c1a671d899325f658a6dd..6e94f812fcfd8d1774c2320d42b527a61d4edb0a 100644 --- a/infrastructure/postgres/kustomization.yaml +++ b/infrastructure/postgres/kustomization.yaml @@ -6,6 +6,7 @@ resources: - repository.yaml - release.yaml - podmonitor.yaml + - prometheusrules.yaml configMapGenerator: - name: postgres-system-grafana-dashboards files: diff --git a/infrastructure/postgres/prometheusrules.yaml b/infrastructure/postgres/prometheusrules.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0912f99d07ed92e482ff416d5407d453584be300 --- /dev/null +++ b/infrastructure/postgres/prometheusrules.yaml @@ -0,0 +1,19 @@ +--- +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: postgres-rules + namespace: postgres-system +spec: + groups: + - name: postgres-clusters + rules: + - alert: PostgresHighWALUsage + annotations: + description: The WAL size of the postgres cluster exceeded 1GiB for more than 1 hour. This indicates a problem with either a replica or the WAL archiving process to S3. + summary: The WAL size of the postgres cluster exceeded 1GiB for more than 1 hour. + expr: max by (namespace, cluster_name)(pg_wal_size_bytes) > 2^30 + for: 1h + labels: + issue: The WAL size of the postgres cluster exceeded 1GiB for more than 1 hour. + severity: critical \ No newline at end of file