diff --git a/docs/developing-prometheus-rules-and-grafana-dashboards.md b/docs/developing-prometheus-rules-and-grafana-dashboards.md index 1a9b3141436d19acb3492b5eaa14d2487630f6eb..56e7c297eb7bc9aaaff3c15fcc32ce50fee314f1 100644 --- a/docs/developing-prometheus-rules-and-grafana-dashboards.md +++ b/docs/developing-prometheus-rules-and-grafana-dashboards.md @@ -219,72 +219,113 @@ local kp = (import 'kube-prometheus/main.libsonnet') + { ``` ### Changing default rules -Along with adding additional rules, we give the user the option to filter or adjust the existing rules imported by `kube-prometheus/kube-prometheus.libsonnet`. The recording rules can be found in [kube-prometheus/rules](../jsonnet/kube-prometheus/rules) and [kubernetes-mixin/rules](https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/rules) while the alerting rules can be found in [kube-prometheus/alerts](../jsonnet/kube-prometheus/alerts) and [kubernetes-mixin/alerts](https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/alerts). +Along with adding additional rules, we give the user the option to filter or adjust the existing rules imported by `kube-prometheus/main.libsonnet`. The recording rules can be found in [kube-prometheus/components/mixin/rules](../jsonnet/kube-prometheus/components/mixin/rules) and [kubernetes-mixin/rules](https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/rules) while the alerting rules can be found in [kube-prometheus/components/mixin/alerts](../jsonnet/kube-prometheus/components/mixin/alerts) and [kubernetes-mixin/alerts](https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/alerts). Knowing which rules to change, the user can now use functions from the [Jsonnet standard library](https://jsonnet.org/ref/stdlib.html) to make these changes. Below are examples of both a filter and an adjustment being made to the default rules. These changes can be assigned to a local variable and then added to the `local kp` object as seen in the examples above. #### Filter -Here the alert `KubeStatefulSetReplicasMismatch` is being filtered out of the group `kubernetes-apps`. The default rule can be seen [here](https://github.com/kubernetes-monitoring/kubernetes-mixin/blob/master/alerts/apps_alerts.libsonnet). +Here the alert `KubeStatefulSetReplicasMismatch` is being filtered out of the group `kubernetes-apps`. The default rule can be seen [here](https://github.com/kubernetes-monitoring/kubernetes-mixin/blob/master/alerts/apps_alerts.libsonnet). You first need to find out in which component the rule is defined (here it is kuberentesControlPlane). ```jsonnet local filter = { - prometheusAlerts+:: { - groups: std.map( - function(group) - if group.name == 'kubernetes-apps' then - group { - rules: std.filter(function(rule) - rule.alert != "KubeStatefulSetReplicasMismatch", - group.rules - ) - } - else - group, - super.groups - ), + kubernetesControlPlane+: { + prometheusRule+:: { + spec+: { + groups: std.map( + function(group) + if group.name == 'kubernetes-apps' then + group { + rules: std.filter( + function(rule) + rule.alert != 'KubeStatefulSetReplicasMismatch', + group.rules + ), + } + else + group, + super.groups + ), + }, + }, }, }; ``` + #### Adjustment -Here the expression for the alert used above is updated from its previous value. The default rule can be seen [here](https://github.com/kubernetes-monitoring/kubernetes-mixin/blob/master/alerts/apps_alerts.libsonnet). +Here the expression for another alert in the same component is updated from its previous value. The default rule can be seen [here](https://github.com/kubernetes-monitoring/kubernetes-mixin/blob/master/alerts/apps_alerts.libsonnet). ```jsonnet local update = { - prometheusAlerts+:: { - groups: std.map( - function(group) - if group.name == 'kubernetes-apps' then - group { - rules: std.map( - function(rule) - if rule.alert == "KubeStatefulSetReplicasMismatch" then - rule { - expr: "kube_statefulset_status_replicas_ready{job=\"kube-state-metrics\",statefulset!=\"vault\"} != kube_statefulset_status_replicas{job=\"kube-state-metrics\",statefulset!=\"vault\"}" - } - else - rule, - group.rules - ) - } - else - group, - super.groups - ), + kubernetesControlPlane+: { + prometheusRule+:: { + spec+: { + groups: std.map( + function(group) + if group.name == 'kubernetes-apps' then + group { + rules: std.map( + function(rule) + if rule.alert == 'KubePodCrashLooping' then + rule { + expr: 'rate(kube_pod_container_status_restarts_total{namespace=kube-system,job="kube-state-metrics"}[10m]) * 60 * 5 > 0', + } + else + rule, + group.rules + ), + } + else + group, + super.groups + ), + }, + }, }, }; ``` + Using the example from above about adding in pre-rendered rules, the new local variables can be added in as follows: ```jsonnet -local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + filter + update + { - prometheusAlerts+:: (import 'existingrule.json'), +local add = { + exampleApplication:: { + prometheusRule+: { + apiVersion: 'monitoring.coreos.com/v1', + kind: 'PrometheusRule', + metadata: { + name: 'example-application-rules', + namespace: $.values.common.namespace, + }, + spec: (import 'existingrule.json'), + }, + }, }; - -{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } + -{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } + +local kp = (import 'kube-prometheus/main.libsonnet') + filter + update + add; +local kp = (import 'kube-prometheus/main.libsonnet') + + filter + + update + + add + { + values+:: { + common+: { + namespace: 'monitoring', + }, + }, + }; +{ 'setup/0namespace-namespace': kp.kubePrometheus.namespace } + +{ + ['setup/prometheus-operator-' + name]: kp.prometheusOperator[name] + for name in std.filter((function(name) name != 'serviceMonitor' && name != 'prometheusRule'), std.objectFields(kp.prometheusOperator)) +} + +// serviceMonitor and prometheusRule are separated so that they can be created after the CRDs are ready +{ 'prometheus-operator-serviceMonitor': kp.prometheusOperator.serviceMonitor } + +{ 'prometheus-operator-prometheusRule': kp.prometheusOperator.prometheusRule } + +{ 'kube-prometheus-prometheusRule': kp.kubePrometheus.prometheusRule } + { ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } + +{ ['blackbox-exporter-' + name]: kp.blackboxExporter[name] for name in std.objectFields(kp.blackboxExporter) } + { ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } + { ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } + { ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } + { ['prometheus-adapter-' + name]: kp.prometheusAdapter[name] for name in std.objectFields(kp.prometheusAdapter) } + -{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) } +{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) } + +{ ['kubernetes-' + name]: kp.kubernetesControlPlane[name] for name in std.objectFields(kp.kubernetesControlPlane) } + +{ ['exampleApplication-' + name]: kp.exampleApplication[name] for name in std.objectFields(kp.exampleApplication) } ``` ## Dashboards diff --git a/examples/changing-default-rules.libsonnet b/examples/changing-default-rules.libsonnet new file mode 100644 index 0000000000000000000000000000000000000000..1a0c192b9041da8d15af7f6b6db1a75efda49acc --- /dev/null +++ b/examples/changing-default-rules.libsonnet @@ -0,0 +1,92 @@ +local filter = { + kubernetesControlPlane+: { + prometheusRule+:: { + spec+: { + groups: std.map( + function(group) + if group.name == 'kubernetes-apps' then + group { + rules: std.filter( + function(rule) + rule.alert != 'KubeStatefulSetReplicasMismatch', + group.rules + ), + } + else + group, + super.groups + ), + }, + }, + }, +}; +local update = { + kubernetesControlPlane+: { + prometheusRule+:: { + spec+: { + groups: std.map( + function(group) + if group.name == 'kubernetes-apps' then + group { + rules: std.map( + function(rule) + if rule.alert == 'KubePodCrashLooping' then + rule { + expr: 'rate(kube_pod_container_status_restarts_total{namespace=kube-system,job="kube-state-metrics"}[10m]) * 60 * 5 > 0', + } + else + rule, + group.rules + ), + } + else + group, + super.groups + ), + }, + }, + }, +}; + +local add = { + exampleApplication:: { + prometheusRule+: { + apiVersion: 'monitoring.coreos.com/v1', + kind: 'PrometheusRule', + metadata: { + name: 'example-application-rules', + namespace: $.values.common.namespace, + }, + spec: (import 'existingrule.json'), + }, + }, +}; +local kp = (import 'kube-prometheus/main.libsonnet') + + filter + + update + + add + { + values+:: { + common+: { + namespace: 'monitoring', + }, + }, +}; + +{ 'setup/0namespace-namespace': kp.kubePrometheus.namespace } + +{ + ['setup/prometheus-operator-' + name]: kp.prometheusOperator[name] + for name in std.filter((function(name) name != 'serviceMonitor' && name != 'prometheusRule'), std.objectFields(kp.prometheusOperator)) +} + +// serviceMonitor and prometheusRule are separated so that they can be created after the CRDs are ready +{ 'prometheus-operator-serviceMonitor': kp.prometheusOperator.serviceMonitor } + +{ 'prometheus-operator-prometheusRule': kp.prometheusOperator.prometheusRule } + +{ 'kube-prometheus-prometheusRule': kp.kubePrometheus.prometheusRule } + +{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } + +{ ['blackbox-exporter-' + name]: kp.blackboxExporter[name] for name in std.objectFields(kp.blackboxExporter) } + +{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } + +{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } + +{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } + +{ ['prometheus-adapter-' + name]: kp.prometheusAdapter[name] for name in std.objectFields(kp.prometheusAdapter) } + +{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) } + +{ ['kubernetes-' + name]: kp.kubernetesControlPlane[name] for name in std.objectFields(kp.kubernetesControlPlane) } + +{ ['exampleApplication-' + name]: kp.exampleApplication[name] for name in std.objectFields(kp.exampleApplication) }