From a840536ec668c5199c2f3b78689a318de1336fbc Mon Sep 17 00:00:00 2001
From: Sheogorath <sheogorath@shivering-isles.com>
Date: Mon, 6 Nov 2023 12:15:57 +0100
Subject: [PATCH] feat(nas): Add SLO for all S3 related routes

---
 apps/k8s01/nas/kustomization.yaml |  1 +
 apps/k8s01/nas/slo.yaml           | 41 +++++++++++++++++++++++++++++++
 2 files changed, 42 insertions(+)
 create mode 100644 apps/k8s01/nas/slo.yaml

diff --git a/apps/k8s01/nas/kustomization.yaml b/apps/k8s01/nas/kustomization.yaml
index 2e5a209f9..e29e22d9d 100644
--- a/apps/k8s01/nas/kustomization.yaml
+++ b/apps/k8s01/nas/kustomization.yaml
@@ -6,3 +6,4 @@ resources:
 - s3.yaml
 - ../../../shared/applications/oauth2-proxy.yaml
 - oauth2.yaml
+- slo.yaml
diff --git a/apps/k8s01/nas/slo.yaml b/apps/k8s01/nas/slo.yaml
new file mode 100644
index 000000000..3d3a14051
--- /dev/null
+++ b/apps/k8s01/nas/slo.yaml
@@ -0,0 +1,41 @@
+apiVersion: sloth.slok.dev/v1
+kind: PrometheusServiceLevel
+metadata:
+  name: requests-s3
+  namespace: nas
+spec:
+  service: "s3"
+  slos:
+    - name: "requests-availability"
+      objective: 98
+      description: "S3: SLO based on availability for HTTP request responses."
+      sli:
+        events:
+          errorQuery: sum(rate(nginx_ingress_controller_requests{exported_namespace="s3",ingress=~"s3-.*",status=~"(5..|429)"}[{{.window}}])) OR vector(0)
+          totalQuery: sum(rate(nginx_ingress_controller_requests{exported_namespace="s3",ingress=~"s3-.*"}[{{.window}}])) > 0 OR vector(1)
+      alerting:
+        name: S3HighErrorRate
+        labels:
+          category: "availability"
+        annotations:
+          summary: "High error rate on 's3' requests responses"
+    - name: "requests-latency"
+      objective: 95
+      description: "S3: SLO based on latency for HTTP request responses. Warns if requests take longer than 250ms. When responses are slower than 200ms they become noticable slow."
+      labels:
+        category: latency
+      sli:
+        events:
+          errorQuery: |
+            (
+              sum(rate(nginx_ingress_controller_request_duration_seconds_count{exported_namespace="s3",ingress=~"s3-.*",method!="WATCH"}[{{.window}}]))
+              -
+              sum(rate(nginx_ingress_controller_request_duration_seconds_bucket{exported_namespace="s3",ingress=~"s3-.*",le="0.25",verb!="WATCH"}[{{.window}}]))
+            )
+          totalQuery: sum(rate(nginx_ingress_controller_request_duration_seconds_count{exported_namespace="s3",ingress=~"s3-.*",method!="WATCH"}[{{.window}}])) > 0 OR vector(1)
+      alerting:
+        name: S3LatencyAlert
+        labels:
+          category: "latency"
+        annotations:
+          summary: "Slow responses on 's3-proxy' requests responses. More than 1% take more than 250ms."
\ No newline at end of file
-- 
GitLab