From a3ce6ce1d1aca9bf8a48ea044d1e81aa2ead4ea4 Mon Sep 17 00:00:00 2001 From: Sheogorath <sheogorath@shivering-isles.com> Date: Tue, 22 Nov 2022 19:26:44 +0100 Subject: [PATCH] feat(iot): Add alerts for shelly devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch introduces some useful alerts regarding humidity and room temprature in order to keep all rooms managed. This will be especially handy while being outside, or on holiday. It's reusing the regular notification infrastructure that exists for the cluster anyway. The alerts for humidity trigger a warning after 6 hours if the humidity is above 65% and 30 minutes as critical when the humidity is above 80%. It also triggers a warning when tempratures climb above 25°C for 6 hours or a critical alert when tempratures exceed 28°C for more than 30 minutes. Finally when tempratures drop too low, increasing the risk of mold, a warning is triggered at tempratures below 20°C after 6 hours and a critical alert is triggered when the temprature falls below 18°C for more than 30 minutes. References: https://www.restorationservicesil.com/what-best-temperature-prevent-mold/ --- apps/k8s01/iot/kustomization.yaml | 1 + apps/k8s01/iot/monitoring.yaml | 75 +++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+) create mode 100644 apps/k8s01/iot/monitoring.yaml diff --git a/apps/k8s01/iot/kustomization.yaml b/apps/k8s01/iot/kustomization.yaml index b66bcfa2a..ccee6fc2c 100644 --- a/apps/k8s01/iot/kustomization.yaml +++ b/apps/k8s01/iot/kustomization.yaml @@ -8,3 +8,4 @@ resources: - rainer.yaml - shelly.yaml - shelly-ht-monitor.yaml +- monitoring.yaml diff --git a/apps/k8s01/iot/monitoring.yaml b/apps/k8s01/iot/monitoring.yaml new file mode 100644 index 000000000..c6a1aa859 --- /dev/null +++ b/apps/k8s01/iot/monitoring.yaml @@ -0,0 +1,75 @@ +--- +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: shelly-rules + namespace: iot +spec: + groups: + - name: shelly-plugs + rules: + - alert: K8sEnergyUsageHigh + annotations: + description: The Kubernetes Cluster has been using more than 100 Watts per minute for the past 6 hours. + summary: The Kubernetes Cluster has been using more than 100 Watts per minute for the past 6 hours. + expr: sum(avg_over_time(shelly_meter_power_watts{target!="shelly01"}[1m])) > 100 + for: 6h + labels: + issue: Check your K8s cluster, it's drawing a lot of power! + severity: warning + - name: shelly-ht + rules: + - alert: HumidityHighWarning + annotations: + description: The humidity of a room/sensor has exceeded 65% over the past 6 hours. + summary: The humidity of a room/sensor has exceeded 65% over the past 6 hours. + expr: avg_over_time(shellyht_humidity_percent[10m]) > 65 + for: 6h + labels: + issue: Humidity in {{$labels.name}} is too high. Open a window! + severity: warning + - alert: HumidityHighCritical + annotations: + description: The humidity of a room/sensor has exceeded 80% over the past 30 minutes. + summary: The humidity of a room/sensor has exceeded 80% over the past 30 minutes. + expr: avg_over_time(shellyht_humidity_percent[1m]) > 80 + for: 30m + labels: + issue: Humidity in {{$labels.name}} is too high. Open a window! + severity: critical + - alert: TempratureHighWarning + annotations: + description: The temprature of a room/sensor has exceeded 25°C over the past 6 hours. + summary: The temprature of a room/sensor has exceeded 25°C over the past 6 hours. + expr: avg_over_time(shellyht_temperature_celsius[10m]) > 25 + for: 6h + labels: + issue: Temprature in {{$labels.name}} is high. Open a window! + severity: warning + - alert: TempratureHighCritical + annotations: + description: The temprature of a room/sensor has exceeded 28°C over the past 30 minutes. + summary: The temprature of a room/sensor has exceeded 28°C over the past 30 minutes. + expr: avg_over_time(shellyht_temperature_celsius[1m]) > 28 + for: 30m + labels: + issue: Temprature in {{$labels.name}} is too high. Open a window! + severity: critical + - alert: TempratureLowWarning + annotations: + description: The temprature of a room/sensor has dropped below 20°C over the past 6 hours. + summary: The temprature of a room/sensor has dropped below 20°C over the past 6 hours. + expr: avg_over_time(shellyht_temperature_celsius[10m]) > 20 + for: 6h + labels: + issue: Temprature in {{$labels.name}} is low. Turn up the heating! + severity: warning + - alert: TempratureHighCritical + annotations: + description: The temprature of a room/sensor has dropped below 18°C over the past 30 minutes. + summary: The temprature of a room/sensor has dropped below 18°C over the past 30 minutes. + expr: avg_over_time(shellyht_temperature_celsius[1m]) < 18 + for: 30m + labels: + issue: Temprature in {{$labels.name}} is too low. Turn up the heating! + severity: critical \ No newline at end of file -- GitLab