diff --git a/jsonnet/kube-prometheus/alerts/node.libsonnet b/jsonnet/kube-prometheus/alerts/node.libsonnet index 37fff428f81d3e22fe2beeaede04ad69fbd63552..3dca1b0aad0c9b42d2fcdb30fcf32a32fa13f99d 100644 --- a/jsonnet/kube-prometheus/alerts/node.libsonnet +++ b/jsonnet/kube-prometheus/alerts/node.libsonnet @@ -32,6 +32,81 @@ }, ], }, + { + name: 'node-time', + rules: [ + { + alert: 'ClockSkewDetected', + annotations: { + message: 'Clock skew detected on node-exporter {{ $labels.namespace }}/{{ $labels.pod }}. Ensure NTP is configured correctly on this host.', + }, + expr: ||| + node_ntp_offset_seconds{%(nodeExporterSelector)s} < -0.03 or node_ntp_offset_seconds{%(nodeExporterSelector)s} > 0.03 + ||| % $._config, + 'for': '2m', + labels: { + severity: 'warning', + }, + }, + ], + }, + { + name: 'node-network', + rules: [ + { + alert: 'NetworkReceiveErrors', + annotations: { + message: 'Network interface "{{ $labels.device }}" showing receive errors on node-exporter {{ $labels.namespace }}/{{ $labels.pod }}"', + }, + expr: ||| + rate(node_network_receive_errs_total{%(nodeExporterSelector)s,%(hostNetworkInterfaceSelector)s}[2m]) > 0 + ||| % $._config, + 'for': '2m', + labels: { + severity: 'warning', + }, + }, + { + alert: 'NetworkTransmitErrors', + annotations: { + message: 'Network interface "{{ $labels.device }}" showing transmit errors on node-exporter {{ $labels.namespace }}/{{ $labels.pod }}"', + }, + expr: ||| + rate(node_network_transmit_errs_total{%(nodeExporterSelector)s,%(hostNetworkInterfaceSelector)s}[2m]) > 0 + ||| % $._config, + 'for': '2m', + labels: { + severity: 'warning', + }, + }, + { + alert: 'NodeNetworkInterfaceDown', + annotations: { + message: 'Network interface "{{ $labels.device }}" down on node-exporter {{ $labels.namespace }}/{{ $labels.pod }}"', + }, + expr: ||| + node_network_up{%(nodeExporterSelector)s,%(hostNetworkInterfaceSelector)s} == 0 + ||| % $._config, + 'for': '2m', + labels: { + severity: 'warning', + }, + }, + { + alert: 'NodeNetworkInterfaceFlapping', + annotations: { + message: 'Network interface "{{ $labels.device }}" changing it\'s up status often on node-exporter {{ $labels.namespace }}/{{ $labels.pod }}"', + }, + expr: ||| + changes(node_network_up{%(nodeExporterSelector)s,%(hostNetworkInterfaceSelector)s}[2m]) > 2 + ||| % $._config, + 'for': '2m', + labels: { + severity: 'warning', + }, + }, + ], + }, ], }, } diff --git a/jsonnet/kube-prometheus/node-exporter/node-exporter.libsonnet b/jsonnet/kube-prometheus/node-exporter/node-exporter.libsonnet index 6b7f7f8a35a48559e137a8e39a6aca70e985bd8e..8aae5b86dda47add15145add3eae37e027fb0c91 100644 --- a/jsonnet/kube-prometheus/node-exporter/node-exporter.libsonnet +++ b/jsonnet/kube-prometheus/node-exporter/node-exporter.libsonnet @@ -101,6 +101,7 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet'; // Once node exporter is being released with those settings, this can be removed. '--collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+)($|/)', '--collector.filesystem.ignored-fs-types=^(autofs|binfmt_misc|cgroup|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|mqueue|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|sysfs|tracefs)$', + '--collector.ntp', ]) + container.withVolumeMounts([procVolumeMount, sysVolumeMount, rootVolumeMount]) + container.mixin.resources.withRequests({ cpu: '102m', memory: '180Mi' }) +