From 4ab5bf36e4f878791ae8dd8c7689d0ea3d56f07c Mon Sep 17 00:00:00 2001 From: Sheogorath <sheogorath@shivering-isles.com> Date: Mon, 30 Oct 2023 03:01:37 +0100 Subject: [PATCH] feat(immich): Add SLO for Immich --- apps/k8s01/immich/kustomization.yaml | 1 + apps/k8s01/immich/slo.yaml | 41 ++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+) create mode 100644 apps/k8s01/immich/slo.yaml diff --git a/apps/k8s01/immich/kustomization.yaml b/apps/k8s01/immich/kustomization.yaml index 0ce62fbdf..9b88b77a5 100644 --- a/apps/k8s01/immich/kustomization.yaml +++ b/apps/k8s01/immich/kustomization.yaml @@ -6,6 +6,7 @@ resources: - immich-values.yaml - certificate.yaml - limitrange.yaml + - slo.yaml - ../../../shared/resourcequotas/default.yaml patchesStrategicMerge: - database-override.yaml diff --git a/apps/k8s01/immich/slo.yaml b/apps/k8s01/immich/slo.yaml new file mode 100644 index 000000000..8e10c7dcf --- /dev/null +++ b/apps/k8s01/immich/slo.yaml @@ -0,0 +1,41 @@ +apiVersion: sloth.slok.dev/v1 +kind: PrometheusServiceLevel +metadata: + name: requests-immich + namespace: immich +spec: + service: "immich" + slos: + - name: "requests-availability" + objective: 98 + description: "Immich: SLO based on availability for HTTP request responses." + sli: + events: + errorQuery: sum(rate(nginx_ingress_controller_requests{exported_namespace="immich",ingress="immich",status=~"(5..|429)"}[{{.window}}])) + totalQuery: sum(rate(nginx_ingress_controller_requests{exported_namespace="immich",ingress="immich"}[{{.window}}])) > 0 OR vector(1) + alerting: + name: ImmichHighErrorRate + labels: + category: "availability" + annotations: + summary: "High error rate on 'immich' requests responses" + - name: "requests-latency" + objective: 95 + description: "Immich: SLO based on latency for HTTP request responses. Warns if requests take longer than 250ms. When responses are slower than 200ms they become noticable slow." + labels: + category: latency + sli: + events: + errorQuery: | + ( + sum(rate(nginx_ingress_controller_request_duration_seconds_count{exported_namespace="immich",ingress="immich",method!="WATCH"}[{{.window}}])) + - + sum(rate(nginx_ingress_controller_request_duration_seconds_bucket{exported_namespace="immich",ingress="immich",le="0.25",verb!="WATCH"}[{{.window}}])) + ) + totalQuery: sum(rate(nginx_ingress_controller_request_duration_seconds_count{exported_namespace="immich",ingress="immich",method!="WATCH"}[{{.window}}])) > 0 OR vector(1) + alerting: + name: ImmichLatencyAlert + labels: + category: "latency" + annotations: + summary: "Slow responses on 'immich' requests responses. More than 1% take more than 250ms." \ No newline at end of file -- GitLab