From 4c6a06cf7e496b3f9f752efec7ad214c0b9af169 Mon Sep 17 00:00:00 2001
From: Damien Grisonnet <dgrisonn@redhat.com>
Date: Fri, 16 Apr 2021 17:48:20 +0200
Subject: [PATCH] jsonnet: make prometheus-adapter highly-available

Prometheus-adapter is a component of the monitoring stack that in most
cases require to be highly available. For instance, we most likely
always want the autoscaling pipeline to be available and we also want to
avoid having no available backends serving the metrics API apiservices
has it would result in both the AggregatedAPIDown alert firing and the
kubectl top command not working anymore.

In order to make the adapter highly-avaible, we need to increase its
replica count to 2 and come up with a rolling update strategy and a
pod anti-affinity rule based on the kubernetes hostname to prevent the
adapters to be scheduled on the same node. The default rolling update
strategy for deployments isn't enough as the default maxUnavaible value
is 25% and is rounded down to 0. This means that during rolling-updates
scheduling will fail if there isn't more nodes than the number of
replicas. As for the maxSurge, the default should be fine as it is
rounded up to 1, but for clarity it might be better to just set it to 1.
For the pod anti-affinity constraints, it would be best if it was hard,
but having it soft should be good enough and fit most use-cases.

Signed-off-by: Damien Grisonnet <dgrisonn@redhat.com>
---
 .../addons/anti-affinity.libsonnet            | 36 ++++++++++++-------
 .../components/prometheus-adapter.libsonnet   |  5 +--
 manifests/prometheus-adapter-deployment.yaml  |  4 +--
 3 files changed, 29 insertions(+), 16 deletions(-)

diff --git a/jsonnet/kube-prometheus/addons/anti-affinity.libsonnet b/jsonnet/kube-prometheus/addons/anti-affinity.libsonnet
index 56ea5303..d7a48107 100644
--- a/jsonnet/kube-prometheus/addons/anti-affinity.libsonnet
+++ b/jsonnet/kube-prometheus/addons/anti-affinity.libsonnet
@@ -12,18 +12,18 @@
       podAntiAffinity: 'soft',
       podAntiAffinityTopologyKey: 'kubernetes.io/hostname',
     },
+    prometheusAdapter+: {
+      podAntiAffinity: 'soft',
+      podAntiAffinityTopologyKey: 'kubernetes.io/hostname',
+    },
   },
 
-  local antiaffinity(key, values, namespace, type, topologyKey) = {
+  local antiaffinity(labelSelector, namespace, type, topologyKey) = {
     local podAffinityTerm = {
       namespaces: [namespace],
       topologyKey: topologyKey,
       labelSelector: {
-        matchExpressions: [{
-          key: key,
-          operator: 'In',
-          values: values,
-        }],
+        matchLabels: labelSelector,
       },
     },
 
@@ -45,8 +45,7 @@
     alertmanager+: {
       spec+:
         antiaffinity(
-          'alertmanager',
-          [$.values.alertmanager.name],
+          $.alertmanager.config.selectorLabels,
           $.values.common.namespace,
           $.values.alertmanager.podAntiAffinity,
           $.values.alertmanager.podAntiAffinityTopologyKey,
@@ -58,8 +57,7 @@
     prometheus+: {
       spec+:
         antiaffinity(
-          'prometheus',
-          [$.values.prometheus.name],
+          $.prometheus.config.selectorLabels,
           $.values.common.namespace,
           $.values.prometheus.podAntiAffinity,
           $.values.prometheus.podAntiAffinityTopologyKey,
@@ -73,8 +71,7 @@
         template+: {
           spec+:
             antiaffinity(
-              'app.kubernetes.io/name',
-              ['blackbox-exporter'],
+              $.blackboxExporter.config.selectorLabels,
               $.values.common.namespace,
               $.values.blackboxExporter.podAntiAffinity,
               $.values.blackboxExporter.podAntiAffinityTopologyKey,
@@ -84,4 +81,19 @@
     },
   },
 
+  prometheusAdapter+: {
+    deployment+: {
+      spec+: {
+        template+: {
+          spec+:
+            antiaffinity(
+              $.prometheusAdapter.config.selectorLabels,
+              $.values.common.namespace,
+              $.values.prometheusAdapter.podAntiAffinity,
+              $.values.prometheusAdapter.podAntiAffinityTopologyKey,
+            ),
+        },
+      },
+    },
+  },
 }
diff --git a/jsonnet/kube-prometheus/components/prometheus-adapter.libsonnet b/jsonnet/kube-prometheus/components/prometheus-adapter.libsonnet
index 33b95d13..341a2f5a 100644
--- a/jsonnet/kube-prometheus/components/prometheus-adapter.libsonnet
+++ b/jsonnet/kube-prometheus/components/prometheus-adapter.libsonnet
@@ -8,6 +8,7 @@ local defaults = {
     requests: { cpu: '102m', memory: '180Mi' },
     limits: { cpu: '250m', memory: '180Mi' },
   },
+  replicas: 2,
   listenAddress: '127.0.0.1',
   port: 9100,
   commonLabels:: {
@@ -162,12 +163,12 @@ function(params) {
         labels: pa._config.commonLabels,
       },
       spec: {
-        replicas: 1,
+        replicas: pa._config.replicas,
         selector: { matchLabels: pa._config.selectorLabels },
         strategy: {
           rollingUpdate: {
             maxSurge: 1,
-            maxUnavailable: 0,
+            maxUnavailable: 1,
           },
         },
         template: {
diff --git a/manifests/prometheus-adapter-deployment.yaml b/manifests/prometheus-adapter-deployment.yaml
index 92740436..787e18fb 100644
--- a/manifests/prometheus-adapter-deployment.yaml
+++ b/manifests/prometheus-adapter-deployment.yaml
@@ -9,7 +9,7 @@ metadata:
   name: prometheus-adapter
   namespace: monitoring
 spec:
-  replicas: 1
+  replicas: 2
   selector:
     matchLabels:
       app.kubernetes.io/component: metrics-adapter
@@ -18,7 +18,7 @@ spec:
   strategy:
     rollingUpdate:
       maxSurge: 1
-      maxUnavailable: 0
+      maxUnavailable: 1
   template:
     metadata:
       labels:
-- 
GitLab