diff --git a/jsonnet/kube-prometheus/kube-prometheus.libsonnet b/jsonnet/kube-prometheus/kube-prometheus.libsonnet index 044d27fcb470652942464953ef1f4c6acee7d684..58392cc018a58d2472375c1e1574a7e74a21d4ba 100644 --- a/jsonnet/kube-prometheus/kube-prometheus.libsonnet +++ b/jsonnet/kube-prometheus/kube-prometheus.libsonnet @@ -6,17 +6,12 @@ local kubeStateMetrics = import './kube-state-metrics/kube-state-metrics.libsonn local nodeExporter = import './node-exporter/node-exporter.libsonnet'; local prometheusAdapter = import './prometheus-adapter/prometheus-adapter.libsonnet'; +local monitoringMixins = import './mixins/monitoring-mixins.libsonnet'; + (import 'github.com/brancz/kubernetes-grafana/grafana/grafana.libsonnet') + -(import 'github.com/kubernetes/kube-state-metrics/jsonnet/kube-state-metrics-mixin/mixin.libsonnet') + -(import 'github.com/prometheus/node_exporter/docs/node-mixin/mixin.libsonnet') + -(import 'github.com/prometheus/alertmanager/doc/alertmanager-mixin/mixin.libsonnet') + (import 'github.com/prometheus-operator/prometheus-operator/jsonnet/prometheus-operator/prometheus-operator.libsonnet') + -(import 'github.com/prometheus-operator/prometheus-operator/jsonnet/mixin/mixin.libsonnet') + (import './prometheus/prometheus.libsonnet') + -(import 'github.com/kubernetes-monitoring/kubernetes-mixin/mixin.libsonnet') + -(import 'github.com/prometheus/prometheus/documentation/prometheus-mixin/mixin.libsonnet') + -(import './alerts/alerts.libsonnet') + -(import './rules/rules.libsonnet') + + { alertmanager: alertmanager({ name: 'main', @@ -45,6 +40,11 @@ local prometheusAdapter = import './prometheus-adapter/prometheus-adapter.libson image: 'directxman12/k8s-prometheus-adapter:v0.8.2', prometheusURL: 'http://prometheus-' + $._config.prometheus.name + '.' + $._config.namespace + '.svc.cluster.local:9090/', }), + mixins+:: monitoringMixins({ + namespace: $._config.namespace, + alertmanagerName: 'main', + prometheusName: 'k8s', + }), kubePrometheus+:: { namespace: { apiVersion: 'v1', @@ -176,40 +176,6 @@ local prometheusAdapter = import './prometheus-adapter/prometheus-adapter.libson 'TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305', ], - runbookURLPattern: 'https://github.com/prometheus-operator/kube-prometheus/wiki/%s', - - cadvisorSelector: 'job="kubelet", metrics_path="/metrics/cadvisor"', - kubeletSelector: 'job="kubelet", metrics_path="/metrics"', - kubeStateMetricsSelector: 'job="kube-state-metrics"', - nodeExporterSelector: 'job="node-exporter"', - fsSpaceFillingUpCriticalThreshold: 15, - notKubeDnsSelector: 'job!="kube-dns"', - kubeSchedulerSelector: 'job="kube-scheduler"', - kubeControllerManagerSelector: 'job="kube-controller-manager"', - kubeApiserverSelector: 'job="apiserver"', - coreDNSSelector: 'job="kube-dns"', - podLabel: 'pod', - - alertmanagerName: '{{ $labels.namespace }}/{{ $labels.pod}}', - alertmanagerClusterLabels: 'namespace,service', - alertmanagerSelector: 'job="alertmanager-' + $._config.alertmanager.name + '",namespace="' + $._config.namespace + '"', - prometheusSelector: 'job="prometheus-' + $._config.prometheus.name + '",namespace="' + $._config.namespace + '"', - prometheusName: '{{$labels.namespace}}/{{$labels.pod}}', - prometheusOperatorSelector: 'job="prometheus-operator",namespace="' + $._config.namespace + '"', - - jobs: { - Kubelet: $._config.kubeletSelector, - KubeScheduler: $._config.kubeSchedulerSelector, - KubeControllerManager: $._config.kubeControllerManagerSelector, - KubeAPI: $._config.kubeApiserverSelector, - KubeStateMetrics: $._config.kubeStateMetricsSelector, - NodeExporter: $._config.nodeExporterSelector, - Alertmanager: $._config.alertmanagerSelector, - Prometheus: $._config.prometheusSelector, - PrometheusOperator: $._config.prometheusOperatorSelector, - CoreDNS: $._config.coreDNSSelector, - }, - resources+:: { 'addon-resizer': { requests: { cpu: '10m', memory: '30Mi' }, @@ -220,7 +186,25 @@ local prometheusAdapter = import './prometheus-adapter/prometheus-adapter.libson limits: { cpu: '20m', memory: '40Mi' }, }, }, - prometheus+:: { rules: $.prometheusRules + $.prometheusAlerts }, + + local allRules = + $.mixins.nodeExporter.prometheusRules + + $.mixins.kubernetes.prometheusRules + + $.mixins.base.prometheusRules + + $.mixins.kubeStateMetrics.prometheusAlerts + + $.mixins.nodeExporter.prometheusAlerts + + $.mixins.alertmanager.prometheusAlerts + + $.mixins.prometheusOperator.prometheusAlerts + + $.mixins.kubernetes.prometheusAlerts + + $.mixins.prometheus.prometheusAlerts + + $.mixins.base.prometheusAlerts, + + local allDashboards = + $.mixins.nodeExporter.grafanaDashboards + + $.mixins.kubernetes.grafanaDashboards + + $.mixins.prometheus.grafanaDashboards, + + prometheus+:: { rules: allRules }, grafana+:: { labels: { 'app.kubernetes.io/name': 'grafana', @@ -228,7 +212,7 @@ local prometheusAdapter = import './prometheus-adapter/prometheus-adapter.libson 'app.kubernetes.io/component': 'grafana', 'app.kubernetes.io/part-of': 'kube-prometheus', }, - dashboards: $.grafanaDashboards, + dashboards: allDashboards, }, }, } diff --git a/jsonnet/kube-prometheus/mixins/monitoring-mixins.libsonnet b/jsonnet/kube-prometheus/mixins/monitoring-mixins.libsonnet new file mode 100644 index 0000000000000000000000000000000000000000..bb03583770bd819ee9a3aa62fa43881fe4ff4e42 --- /dev/null +++ b/jsonnet/kube-prometheus/mixins/monitoring-mixins.libsonnet @@ -0,0 +1,77 @@ +local defaults = { + namespace: error 'must provide namespace', + prometheusName: error 'must provide Prometheus resource name', + alertmanagerName: error 'must provide Alertmanager resource name', +}; + +function(params) { + local m = self, + config:: defaults + params, + base+: + (import '../alerts/general.libsonnet') + + (import '../alerts/node.libsonnet') + + (import '../rules/node-rules.libsonnet') + + (import '../rules/general.libsonnet') { + _config+:: { + nodeExporterSelector: 'job="node-exporter"', + hostNetworkInterfaceSelector: 'device!~"veth.+"', + }, + }, + + kubernetes: + (import 'github.com/kubernetes-monitoring/kubernetes-mixin/mixin.libsonnet') { + _config+:: { + cadvisorSelector: 'job="kubelet", metrics_path="/metrics/cadvisor"', + kubeletSelector: 'job="kubelet", metrics_path="/metrics"', + kubeStateMetricsSelector: 'job="kube-state-metrics"', + nodeExporterSelector: 'job="node-exporter"', + kubeSchedulerSelector: 'job="kube-scheduler"', + kubeControllerManagerSelector: 'job="kube-controller-manager"', + kubeApiserverSelector: 'job="apiserver"', + podLabel: 'pod', + runbookURLPattern: 'https://github.com/prometheus-operator/kube-prometheus/wiki/%s', + diskDeviceSelector: 'device=~"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+"', + hostNetworkInterfaceSelector: 'device!~"veth.+"', + }, + }, + + kubeStateMetrics: + (import 'github.com/kubernetes/kube-state-metrics/jsonnet/kube-state-metrics-mixin/mixin.libsonnet') { + _config+:: { + kubeStateMetricsSelector: 'job="kube-state-metrics"', + }, + }, + + prometheusOperator: + (import 'github.com/prometheus-operator/prometheus-operator/jsonnet/mixin/mixin.libsonnet') { + _config+:: { + prometheusOperatorSelector: 'job="prometheus-operator",namespace="' + m.config.namespace + '"', + }, + }, + + prometheus: + (import 'github.com/prometheus/prometheus/documentation/prometheus-mixin/mixin.libsonnet') { + _config+:: { + prometheusSelector: 'job="prometheus-' + m.config.prometheusName + '",namespace="' + m.config.namespace + '"', + prometheusName: '{{$labels.namespace}}/{{$labels.pod}}', + }, + }, + + alertmanager: + (import 'github.com/prometheus/alertmanager/doc/alertmanager-mixin/mixin.libsonnet') { + _config+:: { + alertmanagerName: '{{ $labels.namespace }}/{{ $labels.pod}}', + alertmanagerClusterLabels: 'namespace,service', + alertmanagerSelector: 'job="alertmanager-' + m.config.alertmanagerName + '",namespace="' + m.config.namespace + '"', + }, + }, + + nodeExporter: + (import 'github.com/prometheus/node_exporter/docs/node-mixin/mixin.libsonnet') { + _config+:: { + nodeExporterSelector: 'job="node-exporter"', + fsSpaceFillingUpCriticalThreshold: 15, + diskDeviceSelector: 'device=~"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+"', + }, + }, +} diff --git a/manifests/grafana-dashboardDefinitions.yaml b/manifests/grafana-dashboardDefinitions.yaml index ed5c20a48bf0f46f874846e1f6c637aeb1db8a9d..e5e62bc938ee47e8af8ea904c18a20c61e413950 100644 --- a/manifests/grafana-dashboardDefinitions.yaml +++ b/manifests/grafana-dashboardDefinitions.yaml @@ -22243,7 +22243,7 @@ items: "timeShift": null, "title": "CPU Utilisation", "tooltip": { - "shared": false, + "shared": true, "sort": 0, "value_type": "individual" }, @@ -22329,7 +22329,7 @@ items: "timeShift": null, "title": "CPU Saturation (load1 per CPU)", "tooltip": { - "shared": false, + "shared": true, "sort": 0, "value_type": "individual" }, @@ -22427,7 +22427,7 @@ items: "timeShift": null, "title": "Memory Utilisation", "tooltip": { - "shared": false, + "shared": true, "sort": 0, "value_type": "individual" }, @@ -22513,7 +22513,7 @@ items: "timeShift": null, "title": "Memory Saturation (Major Page Faults)", "tooltip": { - "shared": false, + "shared": true, "sort": 0, "value_type": "individual" }, @@ -22627,7 +22627,7 @@ items: "timeShift": null, "title": "Net Utilisation (Bytes Receive/Transmit)", "tooltip": { - "shared": false, + "shared": true, "sort": 0, "value_type": "individual" }, @@ -22729,7 +22729,7 @@ items: "timeShift": null, "title": "Net Saturation (Drops Receive/Transmit)", "tooltip": { - "shared": false, + "shared": true, "sort": 0, "value_type": "individual" }, @@ -22827,7 +22827,7 @@ items: "timeShift": null, "title": "Disk IO Utilisation", "tooltip": { - "shared": false, + "shared": true, "sort": 0, "value_type": "individual" }, @@ -22913,7 +22913,7 @@ items: "timeShift": null, "title": "Disk IO Saturation", "tooltip": { - "shared": false, + "shared": true, "sort": 0, "value_type": "individual" }, @@ -23011,7 +23011,7 @@ items: "timeShift": null, "title": "Disk Space Utilisation", "tooltip": { - "shared": false, + "shared": true, "sort": 0, "value_type": "individual" }, @@ -23107,9 +23107,9 @@ items: "30d" ] }, - "timezone": "UTC", + "timezone": "utc", "title": "USE Method / Cluster", - "uid": "3e97d1d02672cdd0861f4c97c64f89b2", + "uid": "", "version": 0 } kind: ConfigMap @@ -23191,7 +23191,7 @@ items: "timeShift": null, "title": "CPU Utilisation", "tooltip": { - "shared": false, + "shared": true, "sort": 0, "value_type": "individual" }, @@ -23277,7 +23277,7 @@ items: "timeShift": null, "title": "CPU Saturation (Load1 per CPU)", "tooltip": { - "shared": false, + "shared": true, "sort": 0, "value_type": "individual" }, @@ -23375,7 +23375,7 @@ items: "timeShift": null, "title": "Memory Utilisation", "tooltip": { - "shared": false, + "shared": true, "sort": 0, "value_type": "individual" }, @@ -23461,7 +23461,7 @@ items: "timeShift": null, "title": "Memory Saturation (Major Page Faults)", "tooltip": { - "shared": false, + "shared": true, "sort": 0, "value_type": "individual" }, @@ -23575,7 +23575,7 @@ items: "timeShift": null, "title": "Net Utilisation (Bytes Receive/Transmit)", "tooltip": { - "shared": false, + "shared": true, "sort": 0, "value_type": "individual" }, @@ -23677,7 +23677,7 @@ items: "timeShift": null, "title": "Net Saturation (Drops Receive/Transmit)", "tooltip": { - "shared": false, + "shared": true, "sort": 0, "value_type": "individual" }, @@ -23775,7 +23775,7 @@ items: "timeShift": null, "title": "Disk IO Utilisation", "tooltip": { - "shared": false, + "shared": true, "sort": 0, "value_type": "individual" }, @@ -23861,7 +23861,7 @@ items: "timeShift": null, "title": "Disk IO Saturation", "tooltip": { - "shared": false, + "shared": true, "sort": 0, "value_type": "individual" }, @@ -23959,7 +23959,7 @@ items: "timeShift": null, "title": "Disk Space Utilisation", "tooltip": { - "shared": false, + "shared": true, "sort": 0, "value_type": "individual" }, @@ -24082,9 +24082,9 @@ items: "30d" ] }, - "timezone": "UTC", + "timezone": "utc", "title": "USE Method / Node", - "uid": "fac67cfbe174d3ef53eb473d73d9212f", + "uid": "", "version": 0 } kind: ConfigMap @@ -24181,7 +24181,7 @@ items: "timeShift": null, "title": "CPU Usage", "tooltip": { - "shared": false, + "shared": true, "sort": 0, "value_type": "individual" }, @@ -24295,7 +24295,7 @@ items: "timeShift": null, "title": "Load Average", "tooltip": { - "shared": false, + "shared": true, "sort": 0, "value_type": "individual" }, @@ -24422,7 +24422,7 @@ items: "timeShift": null, "title": "Memory Usage", "tooltip": { - "shared": false, + "shared": true, "sort": 0, "value_type": "individual" }, @@ -24525,9 +24525,6 @@ items: ], "thresholds": "80, 90", "title": "Memory Usage", - "tooltip": { - "shared": false - }, "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ @@ -24636,7 +24633,7 @@ items: "timeShift": null, "title": "Disk I/O", "tooltip": { - "shared": false, + "shared": true, "sort": 0, "value_type": "individual" }, @@ -24743,7 +24740,7 @@ items: "timeShift": null, "title": "Disk Space Usage", "tooltip": { - "shared": false, + "shared": true, "sort": 0, "value_type": "individual" }, @@ -24850,7 +24847,7 @@ items: "timeShift": null, "title": "Network Received", "tooltip": { - "shared": false, + "shared": true, "sort": 0, "value_type": "individual" }, @@ -24944,7 +24941,7 @@ items: "timeShift": null, "title": "Network Transmitted", "tooltip": { - "shared": false, + "shared": true, "sort": 0, "value_type": "individual" }, @@ -25067,9 +25064,8 @@ items: "30d" ] }, - "timezone": "UTC", + "timezone": "browser", "title": "Nodes", - "uid": "fa49a4706d07a042595b664c87fb33ea", "version": 0 } kind: ConfigMap diff --git a/manifests/grafana-deployment.yaml b/manifests/grafana-deployment.yaml index 99e178244e53a754d0746ea3c3c43ba327459705..85d3f68d5f1a3cec0078871f6006716aea1b2b8f 100644 --- a/manifests/grafana-deployment.yaml +++ b/manifests/grafana-deployment.yaml @@ -13,7 +13,7 @@ spec: template: metadata: annotations: - checksum/grafana-dashboards: ce13f0b50d04c73fb01da858eb1fb608 + checksum/grafana-dashboards: a9e19e1ab605dc374f30edda771e6917 checksum/grafana-datasources: 48faab41f579fc8efde6034391496f6a labels: app: grafana