From 21873e6cf8cfdffeefc6bea8e910bef4ef048194 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?G=C3=B6ran=20P=C3=B6hner?=
 <10630407+groundhog2k@users.noreply.github.com>
Date: Fri, 18 Mar 2022 14:54:21 +0100
Subject: [PATCH] Implement support for graceful shutdown in RabbitMQ chart
 (#921)

---
 charts/rabbitmq/Chart.yaml                 |  2 +-
 charts/rabbitmq/README.md                  |  5 +--
 charts/rabbitmq/RELEASENOTES.md            |  1 +
 charts/rabbitmq/templates/scripts.yaml     | 39 ++++++++++++++++++++++
 charts/rabbitmq/templates/statefulset.yaml | 14 ++++----
 charts/rabbitmq/values.yaml                |  5 ++-
 6 files changed, 55 insertions(+), 11 deletions(-)

diff --git a/charts/rabbitmq/Chart.yaml b/charts/rabbitmq/Chart.yaml
index f2c6afb6..e070b78d 100644
--- a/charts/rabbitmq/Chart.yaml
+++ b/charts/rabbitmq/Chart.yaml
@@ -7,6 +7,6 @@ type: application
 maintainers:
   - name: groundhog2k
 
-version: 0.4.12
+version: 0.4.13
 
 appVersion: "3.9.13"
diff --git a/charts/rabbitmq/README.md b/charts/rabbitmq/README.md
index 97f51102..83170b43 100644
--- a/charts/rabbitmq/README.md
+++ b/charts/rabbitmq/README.md
@@ -1,6 +1,6 @@
 # RabbitMQ
 
-![Version: 0.4.12](https://img.shields.io/badge/Version-0.4.12-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 3.9.13](https://img.shields.io/badge/AppVersion-3.9.13-informational?style=flat-square)
+![Version: 0.4.13](https://img.shields.io/badge/Version-0.4.13-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 3.9.13](https://img.shields.io/badge/AppVersion-3.9.13-informational?style=flat-square)
 
 ## Changelog
 
@@ -76,6 +76,7 @@ helm uninstall my-release
 | securityContext | object | `see values.yaml` | Container security context |
 | env | list | `[]` | Additional container environmment variables |
 | args | list | `[]` | Additional container command arguments |
+| terminationGracePeriodSeconds | int | `60` | Container termination grace period in seconds |
 | rbac.create | bool | `true` | Enable creation of RBAC |
 | serviceAccount.annotations | object | `{}` | Additional service account annotations |
 | serviceAccount.create | bool | `true` | Enable service account creation |
@@ -151,7 +152,7 @@ Section to define custom services
 | authentication.user | string | `"guest"` | Initial user name |
 | authentication.password | string | `"guest"` | Initial password |
 | authentication.erlangCookie | string | `nil` | Erlang cookie (MANDATORY) (Alternative: Set the environment variable ERLANG_COOKIE) |
-| clustering.rebalance | bool | `true` | Enable rebalance queues with master when new replica is created |
+| clustering.rebalance | bool | `false` | Enable rebalance queues with master when new replica is created |
 | clustering.useLongName | bool | `true` | Use FQDN for RabbitMQ node names |
 
 ## RabbitMQ memory parameters
diff --git a/charts/rabbitmq/RELEASENOTES.md b/charts/rabbitmq/RELEASENOTES.md
index c4d24a83..86437b08 100644
--- a/charts/rabbitmq/RELEASENOTES.md
+++ b/charts/rabbitmq/RELEASENOTES.md
@@ -9,4 +9,5 @@
 | 0.4.10 | 3.9.13 | Implemented support for extra secrets and advanced configuration capabilites |
 | 0.4.11 | 3.9.13 | Corrected chart documentation |
 | 0.4.12 | 3.9.13 | Fixed headless service and enabled publishNotReadyAddresses |
+| 0.4.13 | 3.9.13 | Added graceful shutdown hook |
 | | | |
diff --git a/charts/rabbitmq/templates/scripts.yaml b/charts/rabbitmq/templates/scripts.yaml
index aa3be2d4..42f2984c 100644
--- a/charts/rabbitmq/templates/scripts.yaml
+++ b/charts/rabbitmq/templates/scripts.yaml
@@ -33,3 +33,42 @@ data:
     fi
     chmod 600 /var/lib/rabbitmq/.erlang.cookie
     echo "Finished."
+  startup.sh: |
+    until rabbitmqctl cluster_status >/dev/null; do
+      echo "PostStart: Waiting for cluster readiness..." >/proc/1/fd/1
+      sleep 5
+    done
+    echo "PostStart: Rebalancing all queues.." >/proc/1/fd/1
+    rabbitmq-queues rebalance "all" >/proc/1/fd/1
+  shutdown.sh: |
+    {{- if and (.Values.terminationGracePeriodSeconds) (gt (int .Values.terminationGracePeriodSeconds) 10) }}
+    if rabbitmqctl cluster_status; then
+        echo "PreStop: Will wait up to {{ sub .Values.terminationGracePeriodSeconds 10 }} seconds for node to make sure cluster is healthy after node shutdown" >/proc/1/fd/1
+        timeout {{ sub .Values.terminationGracePeriodSeconds 10 }} /scripts/safeshutdown.sh
+    fi
+    {{- end }}
+    echo "PreStop: Stopping RabbitMQ" >/proc/1/fd/1
+    rabbitmqctl stop_app
+    echo "PreStop: RabbitMQ stopped" >/proc/1/fd/1
+    sleep 5
+  safeshutdown.sh: |
+    while true; do
+        waiting="false"
+        if ! rabbitmq-diagnostics -q check_if_node_is_mirror_sync_critical >/proc/1/fd/1; then
+            echo "PreStop: check_if_node_is_mirror_sync_critical returns error. Continuing to wait" >/proc/1/fd/1
+            waiting="true"
+        else
+            echo "PreStop: check_if_node_is_mirror_sync_critical returns o.k." >/proc/1/fd/1
+        fi
+        if ! rabbitmq-diagnostics -q check_if_node_is_quorum_critical >/proc/1/fd/1; then
+            echo "PreStop: check_if_node_is_quorum_critical returns error. Continuing to wait" >/proc/1/fd/1
+            waiting="true"
+        else
+            echo "PreStop: check_if_node_is_quorum_critical returns o.k." >/proc/1/fd/1
+        fi
+        if [ $waiting = "true" ]; then
+            sleep 1
+        else
+            break
+        fi
+    done
diff --git a/charts/rabbitmq/templates/statefulset.yaml b/charts/rabbitmq/templates/statefulset.yaml
index 24d4486c..d65e7725 100644
--- a/charts/rabbitmq/templates/statefulset.yaml
+++ b/charts/rabbitmq/templates/statefulset.yaml
@@ -74,6 +74,9 @@ spec:
       {{- with .Values.extraInitContainers }}
       {{- toYaml . | nindent 8 }}
       {{- end }}
+      {{- if .Values.terminationGracePeriodSeconds }}
+      terminationGracePeriodSeconds: {{ .Values.terminationGracePeriodSeconds }}
+      {{- end }}
       containers:
         - name: {{ .Chart.Name }}
           {{- with  .Values.securityContext }}
@@ -177,19 +180,14 @@ spec:
                 command:
                   - /bin/bash
                   - -ec
-                  - |
-                    until rabbitmqctl cluster_status >/dev/null; do
-                        echo "Waiting for cluster readiness..."
-                        sleep 5
-                    done
-                    rabbitmq-queues rebalance "all"
+                  - /scripts/startup.sh
           {{- end }}
             preStop:
               exec:
                 command:
                   - bash
                   - -ec
-                  - rabbitmqctl stop_app
+                  - /scripts/shutdown.sh
           env:
             - name: RABBITMQ_USE_LONGNAME
               value: {{ .Values.clustering.useLongName | quote }}
@@ -227,6 +225,8 @@ spec:
               mountPath: /var/lib/rabbitmq
             - name: config
               mountPath: /etc/rabbitmq
+            - name: scripts
+              mountPath: /scripts
             {{- if .Values.certificates.enabled }}
             - name: certs
               mountPath: /ssl
diff --git a/charts/rabbitmq/values.yaml b/charts/rabbitmq/values.yaml
index 555af3d5..3916bbbd 100644
--- a/charts/rabbitmq/values.yaml
+++ b/charts/rabbitmq/values.yaml
@@ -202,6 +202,9 @@ extraContainers: []
 ## Default Kubernetes cluster domain
 clusterDomain: cluster.local
 
+## Container termination grace period
+terminationGracePeriodSeconds: 60
+
 # A list of plugins (without whitespace!) - f.e.: rabbitmq_federation_management,rabbitmq_mqtt,rabbitmq_stomp
 plugins: []
 
@@ -326,7 +329,7 @@ extraSecretAdvancedConfigs:
 ## Common (recommended) cluster settings
 clustering:
   ## Rebalance queues with master when new replica is created
-  rebalance: true
+  rebalance: false
   ## Use FQDN for RabbitMQ node names
   useLongName: true
 
-- 
GitLab