From 788c82860a5cce060cc04eb67f5e5f5e1d7d6789 Mon Sep 17 00:00:00 2001 From: Frederic Branczyk <fbranczyk@gmail.com> Date: Wed, 10 Apr 2019 17:43:24 +0200 Subject: [PATCH] kube-prometheus: Re-generate --- jsonnetfile.lock.json | 2 +- manifests/node-exporter-daemonset.yaml | 1 + manifests/prometheus-rules.yaml | 49 ++++++++++++++++++++++++++ 3 files changed, 51 insertions(+), 1 deletion(-) diff --git a/jsonnetfile.lock.json b/jsonnetfile.lock.json index 6b64adb0..ed5c26cc 100644 --- a/jsonnetfile.lock.json +++ b/jsonnetfile.lock.json @@ -8,7 +8,7 @@ "subdir": "contrib/kube-prometheus/jsonnet/kube-prometheus" } }, - "version": "3ba7822228654f3bc864a7c37139665c7549739a" + "version": "82817c8f9277c82ca164a6ef75bf476e56f24521" }, { "name": "ksonnet", diff --git a/manifests/node-exporter-daemonset.yaml b/manifests/node-exporter-daemonset.yaml index f0729b5d..56e4b90b 100644 --- a/manifests/node-exporter-daemonset.yaml +++ b/manifests/node-exporter-daemonset.yaml @@ -22,6 +22,7 @@ spec: - --path.rootfs=/host/root - --collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+)($|/) - --collector.filesystem.ignored-fs-types=^(autofs|binfmt_misc|cgroup|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|mqueue|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|sysfs|tracefs)$ + - --collector.ntp image: quay.io/prometheus/node-exporter:v0.17.0 name: node-exporter resources: diff --git a/manifests/prometheus-rules.yaml b/manifests/prometheus-rules.yaml index e4607322..ee1f21ba 100644 --- a/manifests/prometheus-rules.yaml +++ b/manifests/prometheus-rules.yaml @@ -931,6 +931,55 @@ spec: for: 10m labels: severity: critical + - name: node-time + rules: + - alert: ClockSkewDetected + annotations: + message: Clock skew detected on node-exporter {{ $labels.namespace }}/{{ $labels.pod + }}. Ensure NTP is configured correctly on this host. + expr: | + node_ntp_offset_seconds{job="node-exporter"} < -0.03 or node_ntp_offset_seconds{job="node-exporter"} > 0.03 + for: 2m + labels: + severity: warning + - name: node-network + rules: + - alert: NetworkReceiveErrors + annotations: + message: Network interface "{{ $labels.device }}" showing receive errors on + node-exporter {{ $labels.namespace }}/{{ $labels.pod }}" + expr: | + rate(node_network_receive_errs_total{job="node-exporter",device!~"veth.+"}[2m]) > 0 + for: 2m + labels: + severity: warning + - alert: NetworkTransmitErrors + annotations: + message: Network interface "{{ $labels.device }}" showing transmit errors + on node-exporter {{ $labels.namespace }}/{{ $labels.pod }}" + expr: | + rate(node_network_transmit_errs_total{job="node-exporter",device!~"veth.+"}[2m]) > 0 + for: 2m + labels: + severity: warning + - alert: NodeNetworkInterfaceDown + annotations: + message: Network interface "{{ $labels.device }}" down on node-exporter {{ + $labels.namespace }}/{{ $labels.pod }}" + expr: | + node_network_up{job="node-exporter",device!~"veth.+"} == 0 + for: 2m + labels: + severity: warning + - alert: NodeNetworkInterfaceFlapping + annotations: + message: Network interface "{{ $labels.device }}" changing it's up status + often on node-exporter {{ $labels.namespace }}/{{ $labels.pod }}" + expr: | + changes(node_network_up{job="node-exporter",device!~"veth.+"}[2m]) > 2 + for: 2m + labels: + severity: warning - name: prometheus.rules rules: - alert: PrometheusConfigReloadFailed -- GitLab