diff --git a/assets/prometheus/rules/node.rules.yaml b/assets/prometheus/rules/node.rules.yaml index 9a9d599c638ee5e9b07dc22babc92c020d872e80..e678ca8410f1f21bf5ac78aad558c659e25f5bef 100644 --- a/assets/prometheus/rules/node.rules.yaml +++ b/assets/prometheus/rules/node.rules.yaml @@ -2,7 +2,7 @@ groups: - name: node.rules rules: - record: instance:node_cpu:rate:sum - expr: sum(rate(node_cpu{mode!="idle",mode!="iowait",mode!~"^(?:guest.*)$"}[3m])) + expr: sum(rate(node_cpu{mode!="idle",mode!="iowait"}[3m])) BY (instance) - record: instance:node_filesystem_usage:sum expr: sum((node_filesystem_size{mountpoint="/"} - node_filesystem_free{mountpoint="/"})) @@ -12,10 +12,10 @@ groups: - record: instance:node_network_transmit_bytes:rate:sum expr: sum(rate(node_network_transmit_bytes[3m])) BY (instance) - record: instance:node_cpu:ratio - expr: sum(rate(node_cpu{mode!="idle"}[5m])) WITHOUT (cpu, mode) / ON(instance) + expr: sum(rate(node_cpu{mode!="idle",mode!="iowait"}[5m])) WITHOUT (cpu, mode) / ON(instance) GROUP_LEFT() count(sum(node_cpu) BY (instance, cpu)) BY (instance) - record: cluster:node_cpu:sum_rate5m - expr: sum(rate(node_cpu{mode!="idle"}[5m])) + expr: sum(rate(node_cpu{mode!="idle",mode!="iowait"}[5m])) - record: cluster:node_cpu:ratio expr: cluster:node_cpu:rate5m / count(sum(node_cpu) BY (instance, cpu)) - alert: NodeExporterDown diff --git a/manifests/prometheus/prometheus-k8s-rules.yaml b/manifests/prometheus/prometheus-k8s-rules.yaml index 403e438356fc12ddd426aaa04a8777825f74fde3..552aad5e708652067b94ef8b7ce0961a8602d0c8 100644 --- a/manifests/prometheus/prometheus-k8s-rules.yaml +++ b/manifests/prometheus/prometheus-k8s-rules.yaml @@ -498,7 +498,7 @@ data: - name: node.rules rules: - record: instance:node_cpu:rate:sum - expr: sum(rate(node_cpu{mode!="idle",mode!="iowait",mode!~"^(?:guest.*)$"}[3m])) + expr: sum(rate(node_cpu{mode!="idle",mode!="iowait"}[3m])) BY (instance) - record: instance:node_filesystem_usage:sum expr: sum((node_filesystem_size{mountpoint="/"} - node_filesystem_free{mountpoint="/"})) @@ -508,10 +508,10 @@ data: - record: instance:node_network_transmit_bytes:rate:sum expr: sum(rate(node_network_transmit_bytes[3m])) BY (instance) - record: instance:node_cpu:ratio - expr: sum(rate(node_cpu{mode!="idle"}[5m])) WITHOUT (cpu, mode) / ON(instance) + expr: sum(rate(node_cpu{mode!="idle",mode!="iowait"}[5m])) WITHOUT (cpu, mode) / ON(instance) GROUP_LEFT() count(sum(node_cpu) BY (instance, cpu)) BY (instance) - record: cluster:node_cpu:sum_rate5m - expr: sum(rate(node_cpu{mode!="idle"}[5m])) + expr: sum(rate(node_cpu{mode!="idle",mode!="iowait"}[5m])) - record: cluster:node_cpu:ratio expr: cluster:node_cpu:rate5m / count(sum(node_cpu) BY (instance, cpu)) - alert: NodeExporterDown