From a3ce6ce1d1aca9bf8a48ea044d1e81aa2ead4ea4 Mon Sep 17 00:00:00 2001
From: Sheogorath <sheogorath@shivering-isles.com>
Date: Tue, 22 Nov 2022 19:26:44 +0100
Subject: [PATCH] feat(iot): Add alerts for shelly devices
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch introduces some useful alerts regarding humidity and
room temprature in order to keep all rooms managed. This will be
especially handy while being outside, or on holiday. It's reusing
the regular notification infrastructure that exists for the
cluster anyway.

The alerts for humidity trigger a warning after 6 hours if the
humidity is above 65% and 30 minutes as critical when the
humidity is above 80%.

It also triggers a warning when tempratures climb above 25°C
for 6 hours or a critical alert when tempratures exceed 28°C
for more than 30 minutes.

Finally when tempratures drop too low, increasing the risk of
mold, a warning is triggered at tempratures below 20°C after 6
hours and a critical alert is triggered when the temprature
falls below 18°C for more than 30 minutes.

References:
https://www.restorationservicesil.com/what-best-temperature-prevent-mold/
---
 apps/k8s01/iot/kustomization.yaml |  1 +
 apps/k8s01/iot/monitoring.yaml    | 75 +++++++++++++++++++++++++++++++
 2 files changed, 76 insertions(+)
 create mode 100644 apps/k8s01/iot/monitoring.yaml

diff --git a/apps/k8s01/iot/kustomization.yaml b/apps/k8s01/iot/kustomization.yaml
index b66bcfa2a..ccee6fc2c 100644
--- a/apps/k8s01/iot/kustomization.yaml
+++ b/apps/k8s01/iot/kustomization.yaml
@@ -8,3 +8,4 @@ resources:
 - rainer.yaml
 - shelly.yaml
 - shelly-ht-monitor.yaml
+- monitoring.yaml
diff --git a/apps/k8s01/iot/monitoring.yaml b/apps/k8s01/iot/monitoring.yaml
new file mode 100644
index 000000000..c6a1aa859
--- /dev/null
+++ b/apps/k8s01/iot/monitoring.yaml
@@ -0,0 +1,75 @@
+---
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+  name: shelly-rules
+  namespace: iot
+spec:
+  groups:
+  - name: shelly-plugs
+    rules:
+    - alert: K8sEnergyUsageHigh
+      annotations:
+        description: The Kubernetes Cluster has been using more than 100 Watts per minute for the past 6 hours.
+        summary: The Kubernetes Cluster has been using more than 100 Watts per minute for the past 6 hours.
+      expr: sum(avg_over_time(shelly_meter_power_watts{target!="shelly01"}[1m])) > 100
+      for: 6h
+      labels:
+        issue: Check your K8s cluster, it's drawing a lot of power!
+        severity: warning
+  - name: shelly-ht
+    rules:
+    - alert: HumidityHighWarning
+      annotations:
+        description: The humidity of a room/sensor has exceeded 65% over the past 6 hours.
+        summary: The humidity of a room/sensor has exceeded 65% over the past 6 hours.
+      expr: avg_over_time(shellyht_humidity_percent[10m]) > 65
+      for: 6h
+      labels:
+        issue: Humidity in {{$labels.name}} is too high. Open a window!
+        severity: warning
+    - alert: HumidityHighCritical
+      annotations:
+        description: The humidity of a room/sensor has exceeded 80% over the past 30 minutes.
+        summary: The humidity of a room/sensor has exceeded 80% over the past 30 minutes.
+      expr: avg_over_time(shellyht_humidity_percent[1m]) > 80
+      for: 30m
+      labels:
+        issue: Humidity in {{$labels.name}} is too high. Open a window!
+        severity: critical
+    - alert: TempratureHighWarning
+      annotations:
+        description: The temprature of a room/sensor has exceeded 25°C over the past 6 hours.
+        summary: The temprature of a room/sensor has exceeded 25°C over the past 6 hours.
+      expr: avg_over_time(shellyht_temperature_celsius[10m]) > 25
+      for: 6h
+      labels:
+        issue: Temprature in {{$labels.name}} is high. Open a window!
+        severity: warning
+    - alert: TempratureHighCritical
+      annotations:
+        description: The temprature of a room/sensor has exceeded 28°C over the past 30 minutes.
+        summary: The temprature of a room/sensor has exceeded 28°C over the past 30 minutes.
+      expr: avg_over_time(shellyht_temperature_celsius[1m]) > 28
+      for: 30m
+      labels:
+        issue: Temprature in {{$labels.name}} is too high. Open a window!
+        severity: critical
+    - alert: TempratureLowWarning
+      annotations:
+        description: The temprature of a room/sensor has dropped below 20°C over the past 6 hours.
+        summary: The temprature of a room/sensor has dropped below 20°C over the past 6 hours.
+      expr: avg_over_time(shellyht_temperature_celsius[10m]) > 20
+      for: 6h
+      labels:
+        issue: Temprature in {{$labels.name}} is low. Turn up the heating!
+        severity: warning
+    - alert: TempratureHighCritical
+      annotations:
+        description: The temprature of a room/sensor has dropped below 18°C over the past 30 minutes.
+        summary: The temprature of a room/sensor has dropped below 18°C over the past 30 minutes.
+      expr: avg_over_time(shellyht_temperature_celsius[1m]) < 18
+      for: 30m
+      labels:
+        issue: Temprature in {{$labels.name}} is too low. Turn up the heating!
+        severity: critical
\ No newline at end of file
-- 
GitLab