From 4ab5bf36e4f878791ae8dd8c7689d0ea3d56f07c Mon Sep 17 00:00:00 2001
From: Sheogorath <sheogorath@shivering-isles.com>
Date: Mon, 30 Oct 2023 03:01:37 +0100
Subject: [PATCH] feat(immich): Add SLO for Immich

---
 apps/k8s01/immich/kustomization.yaml |  1 +
 apps/k8s01/immich/slo.yaml           | 41 ++++++++++++++++++++++++++++
 2 files changed, 42 insertions(+)
 create mode 100644 apps/k8s01/immich/slo.yaml

diff --git a/apps/k8s01/immich/kustomization.yaml b/apps/k8s01/immich/kustomization.yaml
index 0ce62fbdf..9b88b77a5 100644
--- a/apps/k8s01/immich/kustomization.yaml
+++ b/apps/k8s01/immich/kustomization.yaml
@@ -6,6 +6,7 @@ resources:
   - immich-values.yaml
   - certificate.yaml
   - limitrange.yaml
+  - slo.yaml
   - ../../../shared/resourcequotas/default.yaml
 patchesStrategicMerge:
 - database-override.yaml
diff --git a/apps/k8s01/immich/slo.yaml b/apps/k8s01/immich/slo.yaml
new file mode 100644
index 000000000..8e10c7dcf
--- /dev/null
+++ b/apps/k8s01/immich/slo.yaml
@@ -0,0 +1,41 @@
+apiVersion: sloth.slok.dev/v1
+kind: PrometheusServiceLevel
+metadata:
+  name: requests-immich
+  namespace: immich
+spec:
+  service: "immich"
+  slos:
+    - name: "requests-availability"
+      objective: 98
+      description: "Immich: SLO based on availability for HTTP request responses."
+      sli:
+        events:
+          errorQuery: sum(rate(nginx_ingress_controller_requests{exported_namespace="immich",ingress="immich",status=~"(5..|429)"}[{{.window}}]))
+          totalQuery: sum(rate(nginx_ingress_controller_requests{exported_namespace="immich",ingress="immich"}[{{.window}}])) > 0 OR vector(1)
+      alerting:
+        name: ImmichHighErrorRate
+        labels:
+          category: "availability"
+        annotations:
+          summary: "High error rate on 'immich' requests responses"
+    - name: "requests-latency"
+      objective: 95
+      description: "Immich: SLO based on latency for HTTP request responses. Warns if requests take longer than 250ms. When responses are slower than 200ms they become noticable slow."
+      labels:
+        category: latency
+      sli:
+        events:
+          errorQuery: |
+            (
+              sum(rate(nginx_ingress_controller_request_duration_seconds_count{exported_namespace="immich",ingress="immich",method!="WATCH"}[{{.window}}]))
+              -
+              sum(rate(nginx_ingress_controller_request_duration_seconds_bucket{exported_namespace="immich",ingress="immich",le="0.25",verb!="WATCH"}[{{.window}}]))
+            )
+          totalQuery: sum(rate(nginx_ingress_controller_request_duration_seconds_count{exported_namespace="immich",ingress="immich",method!="WATCH"}[{{.window}}])) > 0 OR vector(1)
+      alerting:
+        name: ImmichLatencyAlert
+        labels:
+          category: "latency"
+        annotations:
+          summary: "Slow responses on 'immich' requests responses. More than 1% take more than 250ms."
\ No newline at end of file
-- 
GitLab