From 8812e45501bff47ece13cd286a41c9daa8e2119e Mon Sep 17 00:00:00 2001 From: Damien Grisonnet <dgrisonn@redhat.com> Date: Wed, 30 Jun 2021 16:53:02 +0200 Subject: [PATCH] jsonnet: readjust prometheus-adapter intervals Previously, prometheus-adapter configuration wasn't taking into account the scrape interval of kubelet, node-exporter and windows-exporter leading to getting non fresh results, and even negative results from the CPU queries when the irate() function was extrapolating data. To fix that, we want to set the interval used in the irate() function in the CPU queries to 4x scrape interval in order to extrapolate data between the last two scrapes. This will improve the freshness of the cpu usage exposed and prevent incorrect extrapolations. Signed-off-by: Damien Grisonnet <dgrisonn@redhat.com> --- .../components/prometheus-adapter.libsonnet | 15 +++++++++++++-- jsonnet/kube-prometheus/lib/utils.libsonnet | 7 +++++++ jsonnet/kube-prometheus/main.libsonnet | 6 ++++++ manifests/prometheus-adapter-configMap.yaml | 6 ++++-- 4 files changed, 30 insertions(+), 4 deletions(-) create mode 100644 jsonnet/kube-prometheus/lib/utils.libsonnet diff --git a/jsonnet/kube-prometheus/components/prometheus-adapter.libsonnet b/jsonnet/kube-prometheus/components/prometheus-adapter.libsonnet index 8312183c..eb708c0d 100644 --- a/jsonnet/kube-prometheus/components/prometheus-adapter.libsonnet +++ b/jsonnet/kube-prometheus/components/prometheus-adapter.libsonnet @@ -22,13 +22,24 @@ local defaults = { for labelName in std.objectFields(defaults.commonLabels) if !std.setMember(labelName, ['app.kubernetes.io/version']) }, + // Default range intervals are equal to 4 times the default scrape interval. + // This is done in order to follow Prometheus rule of thumb with irate(). + rangeIntervals: { + kubelet: '4m', + nodeExporter: '4m', + windowsExporter: '4m', + }, prometheusURL: error 'must provide prometheusURL', config: { resourceRules: { cpu: { - containerQuery: 'sum(irate(container_cpu_usage_seconds_total{<<.LabelMatchers>>,container!="",pod!=""}[5m])) by (<<.GroupBy>>)', - nodeQuery: 'sum(1 - irate(node_cpu_seconds_total{mode="idle"}[5m]) * on(namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{<<.LabelMatchers>>}) by (<<.GroupBy>>) or sum (1- irate(windows_cpu_time_total{mode="idle", job="windows-exporter",<<.LabelMatchers>>}[5m])) by (<<.GroupBy>>)', + containerQuery: ||| + sum(irate(container_cpu_usage_seconds_total{<<.LabelMatchers>>,container!="",pod!=""}[%(kubelet)s])) by (<<.GroupBy>>) + ||| % $.rangeIntervals, + nodeQuery: ||| + sum(1 - irate(node_cpu_seconds_total{mode="idle"}[%(nodeExporter)s]) * on(namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{<<.LabelMatchers>>}) by (<<.GroupBy>>) or sum (1- irate(windows_cpu_time_total{mode="idle", job="windows-exporter",<<.LabelMatchers>>}[%(windowsExporter)s])) by (<<.GroupBy>>) + ||| % $.rangeIntervals, resources: { overrides: { node: { resource: 'node' }, diff --git a/jsonnet/kube-prometheus/lib/utils.libsonnet b/jsonnet/kube-prometheus/lib/utils.libsonnet new file mode 100644 index 00000000..b5d29825 --- /dev/null +++ b/jsonnet/kube-prometheus/lib/utils.libsonnet @@ -0,0 +1,7 @@ +{ + // rangeInterval takes a scrape interval and convert its to a range interval + // following Prometheus rule of thumb for rate() and irate(). + rangeInterval(i='1m'): + local interval = std.parseInt(std.substr(i, 0, std.length(i) - 1)); + interval * 4 + i[std.length(i) - 1], +} diff --git a/jsonnet/kube-prometheus/main.libsonnet b/jsonnet/kube-prometheus/main.libsonnet index e1dc0b59..903c44b3 100644 --- a/jsonnet/kube-prometheus/main.libsonnet +++ b/jsonnet/kube-prometheus/main.libsonnet @@ -11,6 +11,8 @@ local prometheus = import './components/prometheus.libsonnet'; local platformPatch = import './platforms/platforms.libsonnet'; +local utils = import './lib/utils.libsonnet'; + { // using `values` as this is similar to helm values:: { @@ -97,6 +99,10 @@ local platformPatch = import './platforms/platforms.libsonnet'; version: $.values.common.versions.prometheusAdapter, image: $.values.common.images.prometheusAdapter, prometheusURL: 'http://prometheus-' + $.values.prometheus.name + '.' + $.values.common.namespace + '.svc.cluster.local:9090/', + rangeIntervals+: { + kubelet: utils.rangeInterval($.kubernetesControlPlane.serviceMonitorKubelet.spec.endpoints[0].interval), + nodeExporter: utils.rangeInterval($.nodeExporter.serviceMonitor.spec.endpoints[0].interval), + }, }, prometheusOperator: { namespace: $.values.common.namespace, diff --git a/manifests/prometheus-adapter-configMap.yaml b/manifests/prometheus-adapter-configMap.yaml index 64ee1079..899a0e7e 100644 --- a/manifests/prometheus-adapter-configMap.yaml +++ b/manifests/prometheus-adapter-configMap.yaml @@ -4,8 +4,10 @@ data: "resourceRules": "cpu": "containerLabel": "container" - "containerQuery": "sum(irate(container_cpu_usage_seconds_total{<<.LabelMatchers>>,container!=\"\",pod!=\"\"}[5m])) by (<<.GroupBy>>)" - "nodeQuery": "sum(1 - irate(node_cpu_seconds_total{mode=\"idle\"}[5m]) * on(namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{<<.LabelMatchers>>}) by (<<.GroupBy>>) or sum (1- irate(windows_cpu_time_total{mode=\"idle\", job=\"windows-exporter\",<<.LabelMatchers>>}[5m])) by (<<.GroupBy>>)" + "containerQuery": | + sum(irate(container_cpu_usage_seconds_total{<<.LabelMatchers>>,container!="",pod!=""}[120s])) by (<<.GroupBy>>) + "nodeQuery": | + sum(1 - irate(node_cpu_seconds_total{mode="idle"}[60s]) * on(namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{<<.LabelMatchers>>}) by (<<.GroupBy>>) or sum (1- irate(windows_cpu_time_total{mode="idle", job="windows-exporter",<<.LabelMatchers>>}[4m])) by (<<.GroupBy>>) "resources": "overrides": "namespace": -- GitLab