diff --git a/charts/redis/Chart.yaml b/charts/redis/Chart.yaml index 8d0060aae313562bfd81b3ad7fe2d28a4d9f1dae..3359b925d4e42d7bc6daab7adfbc6a06969d75db 100644 --- a/charts/redis/Chart.yaml +++ b/charts/redis/Chart.yaml @@ -8,7 +8,7 @@ maintainers: - name: groundhog2k # This is the chart version -version: 0.5.10 +version: 0.6.0 # This is the version number of the application being deployed. appVersion: "7.0.7" diff --git a/charts/redis/README.md b/charts/redis/README.md index 73d286a54962e72b300255ea9e047521434b7fe5..d36e9393a91b3fc4a01ea8cfa97742305f68beb7 100644 --- a/charts/redis/README.md +++ b/charts/redis/README.md @@ -1,6 +1,6 @@ # Redis -   +   ## Changelog @@ -160,11 +160,13 @@ helm uninstall my-release | extraSentinelSecrets[].name | string | `nil` | Name of the existing K8s secret | | extraSentinelSecrets[].mountPath | string | `nil` | Mount path where the secret should be mounted into the container (f.e. /mysecretfolder) | | haMode.enabled | bool | `false` | Enable Redis high availibility mode with master-slave replication and sentinel | +| haMode.useDnsNames | bool | `false` | Use DNS names instead of Pod IPs to build the cluster | | haMode.masterGroupName | string | `"redisha"` | Mandatory redis HA-master group name | | haMode.replicas | int | `3` | Number of replicas (minimum should be 3) | | haMode.quorum | int | `2` | Quorum of sentinels that need to agree that a master node is not available | -| haMode.downAfterMilliseconds | int | `5000` | Number of milliseconds after the master should be declared as unavailable | -| haMode.failoverTimeout | int | `180000` | Timeout for a failover | -| haMode.parallelSyncs | int | `2` | Number of parallel reconfigurations +| haMode.downAfterMilliseconds | int | `30000` | Number of milliseconds after the master should be declared as unavailable | +| haMode.failoverTimeout | int | `180000` | Timeout for a failover in milliseoncds | +| haMode.parallelSyncs | int | `1` | Number of parallel reconfigurations | haMode.masterAliveTestTimeout | int | `2` | Timeout in seconds to detect if Redis master is alive | -| haMode.failoverWait | int | `5` | Assumed wait time in seconds until failover should be finished | +| haMode.failoverWait | int | `35` | Assumed wait time in seconds until failover should be finished and before failover will be forced (should be greater than value of downAfterMilliseconds) | +| haMode.keepOldLogs | bool | `false` | Keep old init logs in /data/init.log after a successful initialization (use only for debugging) | diff --git a/charts/redis/RELEASENOTES.md b/charts/redis/RELEASENOTES.md index f9ed6b874364e5439a0e5eb6dd22c8a0d5acba61..173add1da9ace1328335395a23c010dd80bc5ea6 100644 --- a/charts/redis/RELEASENOTES.md +++ b/charts/redis/RELEASENOTES.md @@ -16,4 +16,5 @@ | 0.5.8 | 7.0.5 | Implemented support for image.registry option | | 0.5.9 | 7.0.6 | Upgraded to Redis 7.0.6 and added support for init container resources | | 0.5.10 | 7.0.7 | Upgraded to Redis 7.0.7 | +| 0.6.0 | 7.0.7 | Implemented alternative DNS option for cluster building | | | | | diff --git a/charts/redis/templates/scripts.yaml b/charts/redis/templates/scripts.yaml index f52cf070358de31efafc8eb2d49bcb26d10de26a..e52ec19ffbd163938e005a1eed9969e6634fcb18 100644 --- a/charts/redis/templates/scripts.yaml +++ b/charts/redis/templates/scripts.yaml @@ -6,98 +6,167 @@ metadata: {{- include "redis.labels" . | nindent 4 }} data: init.sh: | - #!/bin/sh - {{- if .Values.haMode.enabled }} - MASTER="$(timeout {{ .Values.haMode.masterAliveTestTimeout }}s redis-cli -h {{ template "redis.fullname" . }} -p {{ .Values.service.sentinelPort }} sentinel get-master-addr-by-name {{ include "redis.masterGroupName" . }} | grep -E '[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}')" - {{- end }} + #!/bin/bash REDIS_CONFIG=/data/conf/redis.conf SENTINEL_CONFIG=/data/conf/sentinel.conf + if [ -f "/data/init.log" ]; then + echo "Detected restart of this instance ($HOSTNAME)" + echo "## This was the previous log:" + cat /data/init.log + echo "## End of previous log" + fi - {{- if .Values.haMode.enabled }} - test_master_alive() { - TEST="$(timeout {{ .Values.haMode.masterAliveTestTimeout }}s redis-cli -h $MASTER -p 6379 ping)" - if [ -z "$TEST" ]; then - return 1 - fi - return 0 + # Log a message during initialization phase + # $1 - the log message + log() { + echo "$(date) $1" + echo "$(date) $1" >>/data/init.log } - {{- end }} - configure_redis() { - echo "Configuring redis server..." - rm -f $REDIS_CONFIG + # Creating redis base configuration + configure_redis_base() { + log "Creating redis base configuration" + mkdir -p /data/conf + rm -f $REDIS_CONFIG - echo "Setting redis server defaults" - echo "port 6379" >>$REDIS_CONFIG - echo "protected-mode no" >>$REDIS_CONFIG - echo "bind 0.0.0.0" >>$REDIS_CONFIG - echo "dir /data" >>$REDIS_CONFIG + log "Setting redis server defaults" + echo "port 6379" >>$REDIS_CONFIG + echo "protected-mode no" >>$REDIS_CONFIG + echo "bind 0.0.0.0" >>$REDIS_CONFIG + echo "dir /data" >>$REDIS_CONFIG + log "Finished creating base configuration" + } - {{- if .Values.haMode.enabled }} - if [ -z "$MASTER" ]; then - echo "No master found - This instance ($HOSTNAME) will be master now" - else - echo "Redis master was found with address $MASTER - Checking if master is still alive " - test_master_alive - if [ "$?" -eq "1" ]; then - echo "Dead master at address $MASTER detected! - waiting for failover" - sleep {{ .Values.haMode.failoverWait }} - test_master_alive - if [ "$?" -eq "1" ]; then - echo "Master is still dead! - forcing failover and retry pod initialization" - redis-cli -h {{ template "redis.fullname" . }} -p {{ .Values.service.sentinelPort }} sentinel failover {{ include "redis.masterGroupName" . }} - exit 1 - fi - fi - echo "Setting this instance ($HOSTNAME) as replicaof $MASTER" - echo "replicaof $MASTER 6379" >>$REDIS_CONFIG - fi - {{- end }} + configure_redis_ext() { + if [ -f /usr/local/etc/redis/redis.conf ]; then + log "Adding optional redis configuration" + cat /usr/local/etc/redis/redis.conf >>$REDIS_CONFIG + fi + if [ -d /extraredisconfigs ]; then + log "Adding extra redis configs to redis configuration" + cat /extraredisconfigs/* >>$REDIS_CONFIG + fi + } - if [ -f /usr/local/etc/redis/redis.conf ]; then - echo "Adding optional redis configuration settings" - cat /usr/local/etc/redis/redis.conf >>$REDIS_CONFIG - fi - if [ -d /extraredisconfigs ]; then - echo "Adding extra redis configs to redis configuration" - cat /extraredisconfigs/* >>$REDIS_CONFIG - fi - echo "Configuring redis server finished." + test_master_alive() { + TEST="$(timeout {{ .Values.haMode.masterAliveTestTimeout }}s redis-cli -h $MASTER -p 6379 ping)" + if [ -z "$TEST" ]; then + log "Master is not alive" + return 1 + fi + log "Master is alive" + return 0 } - {{- if .Values.haMode.enabled }} + + test_valid_dns() { + KNOWN_HOSTS=($(getent hosts {{ include "redis.fullname" . }}-headless.{{ .Release.Namespace }}.svc.{{ .Values.clusterDomain }} | awk '{ print $1 }')) + log "## Known hosts for headless service {{ include "redis.fullname" . }}-headless.{{ .Release.Namespace }}.svc.{{ .Values.clusterDomain }}:" + for AHOSTIP in "${KNOWN_HOSTS[@]}"; do + AHOSTNAME=$(getent hosts $AHOSTIP | awk '{print $2}') + log "${AHOSTIP} ${AHOSTNAME}" + done + log "#####" + {{- if .Values.haMode.useDnsNames }} + MASTERENTRY="$(getent hosts $MASTER | awk '{ print $2 }')" + {{- else }} + MASTERENTRY="$(getent hosts $MASTER | awk '{ print $1 }')" + {{- end }} + } + configure_sentinel() { - echo "Redis HA-mode enabled" - echo "Configuring sentinel server..." - rm -f $SENTINEL_CONFIG + log "Configuring sentinel server..." + rm -f $SENTINEL_CONFIG + + log "Setting sentinel defaults" + if [ -z "$MASTER" ]; then + {{- if .Values.haMode.useDnsNames }} + MASTER=${HOSTNAME}.{{ include "redis.fullname" . }}-headless.{{ .Release.Namespace }}.svc.{{ .Values.clusterDomain }} + {{- else }} + MASTER="$(getent hosts $HOSTNAME | awk '{ print $1 }')" + {{- end }} + log "No master found - Configuring sentinel for master $HOSTNAME with address $MASTER" + echo "sentinel monitor {{ include "redis.masterGroupName" . }} $MASTER 6379 {{ .Values.haMode.quorum }}" >>$SENTINEL_CONFIG + else + log "Redis master was found - Configuring sentinel for master address $MASTER" + echo "sentinel monitor {{ include "redis.masterGroupName" . }} $MASTER 6379 {{ .Values.haMode.quorum }}" >>$SENTINEL_CONFIG + fi + {{- if .Values.haMode.useDnsNames }} + echo "SENTINEL resolve-hostnames yes" >>$SENTINEL_CONFIG + echo "SENTINEL announce-hostnames yes" >>$SENTINEL_CONFIG + {{- end }} + echo "sentinel down-after-milliseconds {{ include "redis.masterGroupName" . }} {{ .Values.haMode.downAfterMilliseconds }}" >>$SENTINEL_CONFIG + echo "sentinel failover-timeout {{ include "redis.masterGroupName" . }} {{ .Values.haMode.failoverTimeout }}" >>$SENTINEL_CONFIG + } + + configure_sentinel_ext() { + if [ -f /usr/local/etc/redis/sentinel.conf ]; then + log "Adding optional sentinel configuration settings" + cat /usr/local/etc/redis/sentinel.conf >>$SENTINEL_CONFIG + fi + if [ -d /extrasentinelconfigs ]; then + log "Adding extra sentinel configs to sentinel configuration" + cat /extrasentinelconfigs/* >>$SENTINEL_CONFIG + fi + log "Configuring sentinel server finished." + } - echo "Setting sentinel defaults" - if [ -z "$MASTER" ]; then - MASTER_IP="$(getent hosts $HOSTNAME | awk '{ print $1 }')" - echo "No master found - Configuring sentinel for master $HOSTNAME with address $MASTER_IP" - echo "sentinel monitor {{ include "redis.masterGroupName" . }} $MASTER_IP 6379 {{ .Values.haMode.quorum }}" >>$SENTINEL_CONFIG + configure_redis_cluster() { + log "Try to detect cluster master.." + MASTER="$(timeout {{ .Values.haMode.masterAliveTestTimeout }}s redis-cli -h {{ template "redis.fullname" . }} -p {{ .Values.service.sentinelPort }} sentinel get-master-addr-by-name {{ include "redis.masterGroupName" . }} | head -n 1)" + if [ -z "$MASTER" ]; then + log "No master found - This instance ($HOSTNAME) will be master now" + else + log "Redis master was found with address $MASTER - Checking host dns entry" + test_valid_dns + if [ -z "$MASTERENTRY" ]; then + log "No valid DNS entry found!" + if [ -f "/data/failover_restart" ]; then + rm /data/failover_restart + log "Forcing failover now" + redis-cli -h {{ template "redis.fullname" . }} -p {{ .Values.service.sentinelPort }} sentinel failover {{ include "redis.masterGroupName" . }} + else + log "Waiting for failover before restart" + sleep {{ .Values.haMode.failoverWait }} + touch /data/failover_restart + fi + log "Restart" + exit 1 else - echo "Redis master was found - Configuring sentinel for master address $MASTER" - echo "sentinel monitor {{ include "redis.masterGroupName" . }} $MASTER 6379 {{ .Values.haMode.quorum }}" >>$SENTINEL_CONFIG - fi - echo "sentinel down-after-milliseconds {{ include "redis.masterGroupName" . }} {{ .Values.haMode.downAfterMilliseconds }}" >>$SENTINEL_CONFIG - echo "sentinel failover-timeout {{ include "redis.masterGroupName" . }} {{ .Values.haMode.failoverTimeout }}" >>$SENTINEL_CONFIG - - if [ -f /usr/local/etc/redis/sentinel.conf ]; then - echo "Adding optional sentinel configuration settings" - cat /usr/local/etc/redis/sentinel.conf >>$SENTINEL_CONFIG - fi - if [ -d /extrasentinelconfigs ]; then - echo "Adding extra sentinel configs to sentinel configuration" - cat /extrasentinelconfigs/* >>$SENTINEL_CONFIG + MASTER=$MASTERENTRY + log "$MASTER has valid DNS entry" + log "Checking if master is alive" + test_master_alive + if [ "$?" -eq "1" ]; then + if [ -f "/data/failover_restart" ]; then + rm /data/failover_restart + log "Master is still dead! - forcing failover and retry pod initialization" + redis-cli -h {{ template "redis.fullname" . }} -p {{ .Values.service.sentinelPort }} sentinel failover {{ include "redis.masterGroupName" . }} + else + log "Dead master at address $MASTER detected! - waiting for failover" + sleep {{ .Values.haMode.failoverWait }} + touch /data/failover_restart + fi + log "Restart" + exit 1 + else + rm -f /data/failover_restart + log "Setting this instance ($HOSTNAME) as replicaof $MASTER" + echo "replicaof $MASTER 6379" >>$REDIS_CONFIG + fi fi - echo "Configuring sentinel server finished." + fi } - {{- end }} - echo "Initialize configuration..." - mkdir -p /data/conf - configure_redis + log "Creating configuration..." + configure_redis_base {{- if .Values.haMode.enabled }} + log "Redis HA-mode is enabled" + configure_redis_cluster configure_sentinel + configure_sentinel_ext + {{- end }} + configure_redis_ext + log "Done." + {{- if not .Values.haMode.keepOldLogs }} + rm -f /data/init.log {{- end }} - echo "Finished." diff --git a/charts/redis/values.yaml b/charts/redis/values.yaml index abda3f0ead2caa42a9d92fec44efaf8c481c0021..cc3839addaf8ac9802add794bfe9b2a3c5f47dfd 100644 --- a/charts/redis/values.yaml +++ b/charts/redis/values.yaml @@ -151,6 +151,9 @@ extraInitContainers: [] ## Extra containers for usage as sidecars extraContainers: [] +## Default Kubernetes cluster domain +clusterDomain: cluster.local + ## Arguments for the container entrypoint process (Redis server) args: [] @@ -195,6 +198,8 @@ extraSentinelSecrets: [] haMode: ## Enable high availibility deployment mode enabled: false + ## Use DNS names instead of Pod IPs to build the cluster + useDnsNames: false ## Mandatory redis HA-master group name (default "redisha") masterGroupName: "redisha" ## Number of replicas (minimum should be 3) @@ -202,15 +207,17 @@ haMode: ## Quorum of sentinels that need to agree that a master node is not available quorum: 2 ## Number of milliseconds after the master should be declared as unavailable - downAfterMilliseconds: 5000 + downAfterMilliseconds: 30000 ## Timeout for a failover failoverTimeout: 180000 ## Number of parallel reconfigurations - parallelSyncs: 2 + parallelSyncs: 1 ## Timeout in seconds to detect if Redis master is alive masterAliveTestTimeout: 2 - ## Assumed wait time in seconds until failover should be finished - failoverWait: 5 + ## Assumed wait time in seconds until failover should be finished and before failover will be forced (should be greater than value of downAfterMilliseconds) + failoverWait: 35 + ## Keep old init logs in /data/init.log after a successful initialization (use only for debugging) + keepOldLogs: false ## Storage parameters storage: