From 65364967160603a38c945b1a05540a99e2b406a2 Mon Sep 17 00:00:00 2001 From: Frederic Branczyk <fbranczyk@gmail.com> Date: Wed, 5 Sep 2018 22:42:28 +0200 Subject: [PATCH] kube-prometheus: Improve disk alert Only trigger disk running full alerts when the disk capacity is over 85% and use recording rule of the kubernetes-mixin to only consider real physical filesystems. --- jsonnet/kube-prometheus/alerts/node.libsonnet | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/jsonnet/kube-prometheus/alerts/node.libsonnet b/jsonnet/kube-prometheus/alerts/node.libsonnet index 5c24f09f..27039f4e 100644 --- a/jsonnet/kube-prometheus/alerts/node.libsonnet +++ b/jsonnet/kube-prometheus/alerts/node.libsonnet @@ -7,11 +7,10 @@ { alert: 'NodeDiskRunningFull', annotations: { - description: 'device {{$labels.device}} on node {{$labels.instance}} is running full within the next 24 hours (mounted at {{$labels.mountpoint}})', - summary: 'Node disk is running full within 24 hours', + message: 'Device {{ $labels.device }} of node-exporter {{ $labels.namespace }}/{{ $labels.pod }} is running full within the next 24 hours.', }, expr: ||| - predict_linear(node_filesystem_free{%(nodeExporterSelector)s,mountpoint!~"^/etc/(?:resolv.conf|hosts|hostname)$"}[6h], 3600 * 24) < 0 and on(instance) up{%(nodeExporterSelector)s} + (node:node_filesystem_usage: > 0.85) and (predict_linear(node:node_filesystem_avail:[6h], 3600 * 24) < 0) ||| % $._config, 'for': '30m', labels: { @@ -21,11 +20,10 @@ { alert: 'NodeDiskRunningFull', annotations: { - description: 'device {{$labels.device}} on node {{$labels.instance}} is running full within the next 2 hours (mounted at {{$labels.mountpoint}})', - summary: 'Node disk is running full within 2 hours', + message: 'Device {{ $labels.device }} of node-exporter {{ $labels.namespace }}/{{ $labels.pod }} is running full within the next 2 hours.', }, expr: ||| - predict_linear(node_filesystem_free{%(nodeExporterSelector)s,mountpoint!~"^/etc/(?:resolv.conf|hosts|hostname)$"}[30m], 3600 * 2) < 0 and on(instance) up{%(nodeExporterSelector)s} + (node:node_filesystem_usage: > 0.85) and (predict_linear(node:node_filesystem_avail:[30m], 3600 * 2) < 0) ||| % $._config, 'for': '10m', labels: { -- GitLab