From a840536ec668c5199c2f3b78689a318de1336fbc Mon Sep 17 00:00:00 2001 From: Sheogorath <sheogorath@shivering-isles.com> Date: Mon, 6 Nov 2023 12:15:57 +0100 Subject: [PATCH] feat(nas): Add SLO for all S3 related routes --- apps/k8s01/nas/kustomization.yaml | 1 + apps/k8s01/nas/slo.yaml | 41 +++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+) create mode 100644 apps/k8s01/nas/slo.yaml diff --git a/apps/k8s01/nas/kustomization.yaml b/apps/k8s01/nas/kustomization.yaml index 2e5a209f9..e29e22d9d 100644 --- a/apps/k8s01/nas/kustomization.yaml +++ b/apps/k8s01/nas/kustomization.yaml @@ -6,3 +6,4 @@ resources: - s3.yaml - ../../../shared/applications/oauth2-proxy.yaml - oauth2.yaml +- slo.yaml diff --git a/apps/k8s01/nas/slo.yaml b/apps/k8s01/nas/slo.yaml new file mode 100644 index 000000000..3d3a14051 --- /dev/null +++ b/apps/k8s01/nas/slo.yaml @@ -0,0 +1,41 @@ +apiVersion: sloth.slok.dev/v1 +kind: PrometheusServiceLevel +metadata: + name: requests-s3 + namespace: nas +spec: + service: "s3" + slos: + - name: "requests-availability" + objective: 98 + description: "S3: SLO based on availability for HTTP request responses." + sli: + events: + errorQuery: sum(rate(nginx_ingress_controller_requests{exported_namespace="s3",ingress=~"s3-.*",status=~"(5..|429)"}[{{.window}}])) OR vector(0) + totalQuery: sum(rate(nginx_ingress_controller_requests{exported_namespace="s3",ingress=~"s3-.*"}[{{.window}}])) > 0 OR vector(1) + alerting: + name: S3HighErrorRate + labels: + category: "availability" + annotations: + summary: "High error rate on 's3' requests responses" + - name: "requests-latency" + objective: 95 + description: "S3: SLO based on latency for HTTP request responses. Warns if requests take longer than 250ms. When responses are slower than 200ms they become noticable slow." + labels: + category: latency + sli: + events: + errorQuery: | + ( + sum(rate(nginx_ingress_controller_request_duration_seconds_count{exported_namespace="s3",ingress=~"s3-.*",method!="WATCH"}[{{.window}}])) + - + sum(rate(nginx_ingress_controller_request_duration_seconds_bucket{exported_namespace="s3",ingress=~"s3-.*",le="0.25",verb!="WATCH"}[{{.window}}])) + ) + totalQuery: sum(rate(nginx_ingress_controller_request_duration_seconds_count{exported_namespace="s3",ingress=~"s3-.*",method!="WATCH"}[{{.window}}])) > 0 OR vector(1) + alerting: + name: S3LatencyAlert + labels: + category: "latency" + annotations: + summary: "Slow responses on 's3-proxy' requests responses. More than 1% take more than 250ms." \ No newline at end of file -- GitLab