From 7363e20b65e2dcd178d8dcbece796f13be5822fb Mon Sep 17 00:00:00 2001 From: Joao Marcal <jmarcal@redhat.com> Date: Tue, 14 Mar 2023 16:39:30 +0100 Subject: [PATCH] Adds startupProbe to prometheus-adapter (#2029) Issue: https://issues.redhat.com/browse/OCPBUGS-7694 Problem: in clusters with a large nb of CRDs deployed prom-adapter takes too long to discover all of them which makes it fail the livenessProbe Solution: introduce a startupProbe that gives 3 minutes for prom-adapter to initialize Signed-off-by: JoaoBraveCoding <jmarcal@redhat.com> --- .../components/prometheus-adapter.libsonnet | 11 +++++++++-- manifests/prometheusAdapter-deployment.yaml | 9 +++++++-- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/jsonnet/kube-prometheus/components/prometheus-adapter.libsonnet b/jsonnet/kube-prometheus/components/prometheus-adapter.libsonnet index f83601cf..bf620fc6 100644 --- a/jsonnet/kube-prometheus/components/prometheus-adapter.libsonnet +++ b/jsonnet/kube-prometheus/components/prometheus-adapter.libsonnet @@ -236,13 +236,21 @@ function(params) { '--tls-cipher-suites=' + std.join(',', pa._config.tlsCipherSuites), ], resources: pa._config.resources, + startupProbe: { + httpGet: { + path: '/livez', + port: 'https', + scheme: 'HTTPS', + }, + periodSeconds: 10, + failureThreshold: 18, + }, readinessProbe: { httpGet: { path: '/readyz', port: 'https', scheme: 'HTTPS', }, - initialDelaySeconds: 30, periodSeconds: 5, failureThreshold: 5, }, @@ -252,7 +260,6 @@ function(params) { port: 'https', scheme: 'HTTPS', }, - initialDelaySeconds: 30, periodSeconds: 5, failureThreshold: 5, }, diff --git a/manifests/prometheusAdapter-deployment.yaml b/manifests/prometheusAdapter-deployment.yaml index fcfdf356..ac196193 100644 --- a/manifests/prometheusAdapter-deployment.yaml +++ b/manifests/prometheusAdapter-deployment.yaml @@ -44,7 +44,6 @@ spec: path: /livez port: https scheme: HTTPS - initialDelaySeconds: 30 periodSeconds: 5 name: prometheus-adapter ports: @@ -56,7 +55,6 @@ spec: path: /readyz port: https scheme: HTTPS - initialDelaySeconds: 30 periodSeconds: 5 resources: limits: @@ -71,6 +69,13 @@ spec: drop: - ALL readOnlyRootFilesystem: true + startupProbe: + failureThreshold: 18 + httpGet: + path: /livez + port: https + scheme: HTTPS + periodSeconds: 10 volumeMounts: - mountPath: /tmp name: tmpfs -- GitLab