From 56f96e638955e65fed9dc3aabf08d2dcf600db8d Mon Sep 17 00:00:00 2001
From: Philip Gough <philip.p.gough@gmail.com>
Date: Fri, 24 Sep 2021 16:22:48 +0100
Subject: [PATCH] Adjust dropped metrics from cAdvisor

This change drops pod-centric metrics without a non-empty 'container' label.

Previously we dropped pod-centric metrics without a (pod, namespace) label set
however these can be critical for debugging.
---
 .../components/k8s-control-plane.libsonnet            | 11 +++++++++--
 manifests/kubernetes-serviceMonitorKubelet.yaml       |  7 ++++++-
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/jsonnet/kube-prometheus/components/k8s-control-plane.libsonnet b/jsonnet/kube-prometheus/components/k8s-control-plane.libsonnet
index 08cdfb20..9a55c850 100644
--- a/jsonnet/kube-prometheus/components/k8s-control-plane.libsonnet
+++ b/jsonnet/kube-prometheus/components/k8s-control-plane.libsonnet
@@ -127,9 +127,7 @@ function(params) {
               action: 'drop',
               regex: '(' + std.join('|',
                                     [
-                                      'container_fs_.*',  // add filesystem read/write data (nodes*disks*services*4)
                                       'container_spec_.*',  // everything related to cgroup specification and thus static data (nodes*services*5)
-                                      'container_blkio_device_usage_total',  // useful for containers, but not for system services (nodes*disks*services*operations*2)
                                       'container_file_descriptors',  // file descriptors limits and global numbers are exposed via (nodes*services)
                                       'container_sockets',  // used sockets in cgroup. Usually not important for system services (nodes*services)
                                       'container_threads_max',  // max number of threads in cgroup. Usually for system services it is not limited (nodes*services)
@@ -138,6 +136,15 @@ function(params) {
                                       'container_last_seen',  // not needed as system services are always running (nodes*services)
                                     ]) + ');;',
             },
+            {
+              sourceLabels: ['__name__', 'container'],
+              action: 'drop',
+              regex: '(' + std.join('|',
+                                    [  // metrics are available at slice level
+                                      'container_fs_.*',
+                                      'container_blkio_device_usage_total',
+                                    ]) + ');.+',
+            },
           ],
         },
         {
diff --git a/manifests/kubernetes-serviceMonitorKubelet.yaml b/manifests/kubernetes-serviceMonitorKubelet.yaml
index 5c6fc6ff..cded85cf 100644
--- a/manifests/kubernetes-serviceMonitorKubelet.yaml
+++ b/manifests/kubernetes-serviceMonitorKubelet.yaml
@@ -61,11 +61,16 @@ spec:
       sourceLabels:
       - __name__
     - action: drop
-      regex: (container_fs_.*|container_spec_.*|container_blkio_device_usage_total|container_file_descriptors|container_sockets|container_threads_max|container_threads|container_start_time_seconds|container_last_seen);;
+      regex: (container_spec_.*|container_file_descriptors|container_sockets|container_threads_max|container_threads|container_start_time_seconds|container_last_seen);;
       sourceLabels:
       - __name__
       - pod
       - namespace
+    - action: drop
+      regex: (container_fs_.*|container_blkio_device_usage_total);.+
+      sourceLabels:
+      - __name__
+      - container
     path: /metrics/cadvisor
     port: https-metrics
     relabelings:
-- 
GitLab