From 2fbbdb935a9e0fa900b60df2fbcba71af3c4bcd5 Mon Sep 17 00:00:00 2001 From: Sheogorath <sheogorath@shivering-isles.com> Date: Mon, 24 Jun 2024 12:56:10 +0200 Subject: [PATCH] fix: Fix runbook URLs --- infrastructure/base/cert-manager/alerts.yaml | 8 ++++---- infrastructure/base/longhorn/monitoring.yaml | 4 ++-- infrastructure/base/metallb/release.yaml | 14 +++++++------- infrastructure/base/postgres/prometheusrules.yaml | 4 ++-- .../base/system-upgrades/monitoring.yaml | 2 +- 5 files changed, 16 insertions(+), 16 deletions(-) diff --git a/infrastructure/base/cert-manager/alerts.yaml b/infrastructure/base/cert-manager/alerts.yaml index 946faddf7..9d89efdd1 100644 --- a/infrastructure/base/cert-manager/alerts.yaml +++ b/infrastructure/base/cert-manager/alerts.yaml @@ -12,7 +12,7 @@ spec: annotations: description: New certificates will not be able to be minted, and existing ones can't be renewed until cert-manager is back. - runbook_url: https://runbooks.s3.shivering-isles.com/runbooks/cert-manager/certmanagerabsent + runbook_url: https://runbooks.s3.shivering-isles.com/runbooks/cert-manager/certmanagerabsent/ summary: Cert Manager has dissapeared from Prometheus service discovery. expr: absent(up{job="cert-manager"}) for: 10m @@ -25,7 +25,7 @@ spec: description: The domain that this cert covers will be unavailable after {{ $value | humanizeDuration }}. Clients using endpoints that this cert protects will start to fail in {{ $value | humanizeDuration }}. - runbook_url: https://runbooks.s3.shivering-isles.com/runbooks/cert-manager/certmanagercertexpirysoon + runbook_url: https://runbooks.s3.shivering-isles.com/runbooks/cert-manager/certmanagercertexpirysoon/ summary: The cert `{{ $labels.name }}` is {{ $value | humanizeDuration }} from expiry, it should have renewed over a week ago. expr: | @@ -40,7 +40,7 @@ spec: description: This certificate has not been ready to serve traffic for at least 10m. If the cert is being renewed or there is another valid cert, the ingress controller _may_ be able to serve that instead. - runbook_url: https://runbooks.s3.shivering-isles.com/runbooks/cert-manager/certmanagercertnotready + runbook_url: https://runbooks.s3.shivering-isles.com/runbooks/cert-manager/certmanagercertnotready/ summary: The cert `{{ $labels.name }}` is not ready to serve traffic. expr: | max by (name, exported_namespace, namespace, condition) ( @@ -53,7 +53,7 @@ spec: annotations: description: Depending on the rate limit, cert-manager may be unable to generate certificates for up to a week. - runbook_url: https://runbooks.s3.shivering-isles.com/runbooks/cert-manager/certmanagerhittingratelimits + runbook_url: https://runbooks.s3.shivering-isles.com/runbooks/cert-manager/certmanagerhittingratelimits/ summary: Cert manager hitting LetsEncrypt rate limits. expr: | sum by (host) ( diff --git a/infrastructure/base/longhorn/monitoring.yaml b/infrastructure/base/longhorn/monitoring.yaml index 0bea7829b..fe7f33889 100644 --- a/infrastructure/base/longhorn/monitoring.yaml +++ b/infrastructure/base/longhorn/monitoring.yaml @@ -34,7 +34,7 @@ spec: labels: issue: The actual used space of Longhorn volume {{$labels.volume}} on {{$labels.node}} is high. severity: info - runbook_url: https://runbooks.s3.shivering-isles.com/runbooks/longhorn/longhornvolumeactualspaceusedinfo + runbook_url: https://runbooks.s3.shivering-isles.com/runbooks/longhorn/longhornvolumeactualspaceusedinfo/ - alert: LonghornVolumeStatusCritical annotations: description: Longhorn volume {{$labels.volume}} on {{$labels.node}} is Fault for @@ -113,4 +113,4 @@ spec: labels: issue: Longhorn share manager count is off by {{$value}} for 5m. severity: critical - runbook_url: https://runbooks.s3.shivering-isles.com/runbooks/longhorn/longhornsharemanageroff + runbook_url: https://runbooks.s3.shivering-isles.com/runbooks/longhorn/longhornsharemanageroff/ diff --git a/infrastructure/base/metallb/release.yaml b/infrastructure/base/metallb/release.yaml index 417fbcfab..b8f5f7682 100644 --- a/infrastructure/base/metallb/release.yaml +++ b/infrastructure/base/metallb/release.yaml @@ -46,32 +46,32 @@ data: addressPoolExhausted: labels: severity: critical - runbook_url: https://runbooks.s3.shivering-isles.com/runbooks/metallb/metallbaddresspoolexhausted + runbook_url: https://runbooks.s3.shivering-isles.com/runbooks/metallb/metallbaddresspoolexhausted/ addressPoolUsage: thresholds: - percent: 75 labels: severity: info - runbook_url: https://runbooks.s3.shivering-isles.com/runbooks/metallb/metallbaddresspoolusage + runbook_url: https://runbooks.s3.shivering-isles.com/runbooks/metallb/metallbaddresspoolusage/ - percent: 85 labels: severity: warning - runbook_url: https://runbooks.s3.shivering-isles.com/runbooks/metallb/metallbaddresspoolusage + runbook_url: https://runbooks.s3.shivering-isles.com/runbooks/metallb/metallbaddresspoolusage/ - percent: 95 labels: severity: critical - runbook_url: https://runbooks.s3.shivering-isles.com/runbooks/metallb/metallbaddresspoolusage + runbook_url: https://runbooks.s3.shivering-isles.com/runbooks/metallb/metallbaddresspoolusage/ bgpSessionDown: labels: severity: critical - runbook_url: https://runbooks.s3.shivering-isles.com/runbooks/metallb/metallbbgpsessiondown + runbook_url: https://runbooks.s3.shivering-isles.com/runbooks/metallb/metallbbgpsessiondown/ configNotLoaded: labels: severity: warning - runbook_url: https://runbooks.s3.shivering-isles.com/runbooks/metallb/metallbconfignotloaded + runbook_url: https://runbooks.s3.shivering-isles.com/runbooks/metallb/metallbconfignotloaded/ staleConfig: labels: severity: warning - runbook_url: https://runbooks.s3.shivering-isles.com/runbooks/metallb/metallbstaleconfig + runbook_url: https://runbooks.s3.shivering-isles.com/runbooks/metallb/metallbstaleconfig/ serviceAccount: kube-prometheus-stack-prometheus namespace: monitoring-system diff --git a/infrastructure/base/postgres/prometheusrules.yaml b/infrastructure/base/postgres/prometheusrules.yaml index 880de665b..040ffdbdb 100644 --- a/infrastructure/base/postgres/prometheusrules.yaml +++ b/infrastructure/base/postgres/prometheusrules.yaml @@ -17,7 +17,7 @@ spec: labels: issue: The WAL size of the postgres cluster exceeded 1GiB for more than 1 hour. severity: critical - runbook_url: https://runbooks.s3.shivering-isles.com/runbooks/postgres/postgreshighwalusage + runbook_url: https://runbooks.s3.shivering-isles.com/runbooks/postgres/postgreshighwalusage/ - alert: PostgresNotRunning annotations: description: PostgreSQL instance is not running inside the container @@ -27,4 +27,4 @@ spec: labels: issue: PostgreSQL instance is not running inside the container severity: critical - runbook_url: https://runbooks.s3.shivering-isles.com/runbooks/postgres/postgresnotrunning \ No newline at end of file + runbook_url: https://runbooks.s3.shivering-isles.com/runbooks/postgres/postgresnotrunning/ \ No newline at end of file diff --git a/infrastructure/base/system-upgrades/monitoring.yaml b/infrastructure/base/system-upgrades/monitoring.yaml index 008e423a6..f027f2b3a 100644 --- a/infrastructure/base/system-upgrades/monitoring.yaml +++ b/infrastructure/base/system-upgrades/monitoring.yaml @@ -16,5 +16,5 @@ spec: labels: issue: The node {{$labels.node}} has been marked as unscheduable for more than 24h. severity: critical - runbook_url: https://runbooks.s3.shivering-isles.com/runbooks/system-upgrades/kubenodeunschedulablecritical + runbook_url: https://runbooks.s3.shivering-isles.com/runbooks/system-upgrades/kubenodeunschedulablecritical/ -- GitLab