From e99484b6aed74b94e932e80ce8b819a05a868bf3 Mon Sep 17 00:00:00 2001
From: Felix Kunde <felix-kunde@gmx.de>
Date: Tue, 28 Jun 2022 19:51:59 +0200
Subject: [PATCH] annotation to bypass globally configured instance limits

---
 .../crds/operatorconfigurations.yaml          |   2 +
 charts/postgres-operator/values.yaml          |   9 +-
 docs/reference/operator_parameters.md         |   6 +
 manifests/configmap.yaml                      |   1 +
 manifests/operatorconfiguration.crd.yaml      |   2 +
 ...gresql-operator-default-configuration.yaml |   1 +
 pkg/apis/acid.zalan.do/v1/crds.go             |   3 +
 .../v1/operator_configuration_type.go         |   6 +-
 pkg/cluster/k8sres.go                         |   8 ++
 pkg/cluster/k8sres_test.go                    | 130 ++++++++++++++++++
 pkg/controller/operator_config.go             |   1 +
 pkg/util/config/config.go                     |   6 +-
 12 files changed, 169 insertions(+), 6 deletions(-)

diff --git a/charts/postgres-operator/crds/operatorconfigurations.yaml b/charts/postgres-operator/crds/operatorconfigurations.yaml
index c5b9a4c1..6b7c4630 100644
--- a/charts/postgres-operator/crds/operatorconfigurations.yaml
+++ b/charts/postgres-operator/crds/operatorconfigurations.yaml
@@ -91,6 +91,8 @@ spec:
               etcd_host:
                 type: string
                 default: ""
+              ignore_instance_limits_annotation_key:
+                type: string
               kubernetes_use_configmaps:
                 type: boolean
                 default: false
diff --git a/charts/postgres-operator/values.yaml b/charts/postgres-operator/values.yaml
index 2650824b..86cbb298 100644
--- a/charts/postgres-operator/values.yaml
+++ b/charts/postgres-operator/values.yaml
@@ -35,10 +35,15 @@ configGeneral:
   enable_spilo_wal_path_compat: false
   # etcd connection string for Patroni. Empty uses K8s-native DCS.
   etcd_host: ""
-  # Select if setup uses endpoints (default), or configmaps to manage leader (DCS=k8s)
-  # kubernetes_use_configmaps: false
   # Spilo docker image
   docker_image: registry.opensource.zalan.do/acid/spilo-14:2.1-p6
+
+  # key name for annotation to ignore globally configured instance limits
+  # ignore_instance_limits_annotation_key: ""
+
+  # Select if setup uses endpoints (default), or configmaps to manage leader (DCS=k8s)
+  # kubernetes_use_configmaps: false
+
   # min number of instances in Postgres cluster. -1 = no limit
   min_instances: -1
   # max number of instances in Postgres cluster. -1 = no limit
diff --git a/docs/reference/operator_parameters.md b/docs/reference/operator_parameters.md
index b31b753f..8aa76aa7 100644
--- a/docs/reference/operator_parameters.md
+++ b/docs/reference/operator_parameters.md
@@ -147,6 +147,12 @@ Those are top-level keys, containing both leaf keys and groups.
   When `-1` is specified for `min_instances`, no limits are applied. The default
   is `-1`.
 
+* **ignore_instance_limits_annotation_key**
+  for some clusters it might be required to scale beyond the limits that can be
+  configured with `min_instances` and `max_instances` options. You can define
+  an annotation key that can be used as a toggle in cluster manifests to ignore
+  globally configured instance limits. The default is empty.
+
 * **resync_period**
   period between consecutive sync requests. The default is `30m`.
 
diff --git a/manifests/configmap.yaml b/manifests/configmap.yaml
index e43f9205..93e27804 100644
--- a/manifests/configmap.yaml
+++ b/manifests/configmap.yaml
@@ -66,6 +66,7 @@ data:
   # ignored_annotations: ""
   # infrastructure_roles_secret_name: "postgresql-infrastructure-roles"
   # infrastructure_roles_secrets: "secretname:monitoring-roles,userkey:user,passwordkey:password,rolekey:inrole"
+  # ignore_instance_limits_annotation_key: ""
   # inherited_annotations: owned-by
   # inherited_labels: application,environment
   # kube_iam_role: ""
diff --git a/manifests/operatorconfiguration.crd.yaml b/manifests/operatorconfiguration.crd.yaml
index 091aa4eb..aae9e502 100644
--- a/manifests/operatorconfiguration.crd.yaml
+++ b/manifests/operatorconfiguration.crd.yaml
@@ -89,6 +89,8 @@ spec:
               etcd_host:
                 type: string
                 default: ""
+              ignore_instance_limits_annotation_key:
+                type: string
               kubernetes_use_configmaps:
                 type: boolean
                 default: false
diff --git a/manifests/postgresql-operator-default-configuration.yaml b/manifests/postgresql-operator-default-configuration.yaml
index 9ea313cf..b18855fc 100644
--- a/manifests/postgresql-operator-default-configuration.yaml
+++ b/manifests/postgresql-operator-default-configuration.yaml
@@ -12,6 +12,7 @@ configuration:
   # enable_shm_volume: true
   enable_spilo_wal_path_compat: false
   etcd_host: ""
+  # ignore_instance_limits_annotation_key: ""
   # kubernetes_use_configmaps: false
   max_instances: -1
   min_instances: -1
diff --git a/pkg/apis/acid.zalan.do/v1/crds.go b/pkg/apis/acid.zalan.do/v1/crds.go
index 5a252500..e672e473 100644
--- a/pkg/apis/acid.zalan.do/v1/crds.go
+++ b/pkg/apis/acid.zalan.do/v1/crds.go
@@ -1115,6 +1115,9 @@ var OperatorConfigCRDResourceValidation = apiextv1.CustomResourceValidation{
 					"etcd_host": {
 						Type: "string",
 					},
+					"ignore_instance_limits_annotation_key": {
+						Type: "string",
+					},
 					"kubernetes_use_configmaps": {
 						Type: "boolean",
 					},
diff --git a/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go b/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go
index 0be27555..2211b9ed 100644
--- a/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go
+++ b/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go
@@ -236,8 +236,6 @@ type OperatorConfigurationData struct {
 	KubernetesUseConfigMaps    bool                               `json:"kubernetes_use_configmaps,omitempty"`
 	DockerImage                string                             `json:"docker_image,omitempty"`
 	Workers                    uint32                             `json:"workers,omitempty"`
-	MinInstances               int32                              `json:"min_instances,omitempty"`
-	MaxInstances               int32                              `json:"max_instances,omitempty"`
 	ResyncPeriod               Duration                           `json:"resync_period,omitempty"`
 	RepairPeriod               Duration                           `json:"repair_period,omitempty"`
 	SetMemoryRequestToLimit    bool                               `json:"set_memory_request_to_limit,omitempty"`
@@ -257,6 +255,10 @@ type OperatorConfigurationData struct {
 	Scalyr                     ScalyrConfiguration                `json:"scalyr"`
 	LogicalBackup              OperatorLogicalBackupConfiguration `json:"logical_backup"`
 	ConnectionPooler           ConnectionPoolerConfiguration      `json:"connection_pooler"`
+
+	MinInstances                      int32  `json:"min_instances,omitempty"`
+	MaxInstances                      int32  `json:"max_instances,omitempty"`
+	IgnoreInstanceLimitsAnnotationKey string `json:"ignore_instance_limits_annotation_key,omitempty"`
 }
 
 //Duration shortens this frequently used name
diff --git a/pkg/cluster/k8sres.go b/pkg/cluster/k8sres.go
index cf84f420..e84303b1 100644
--- a/pkg/cluster/k8sres.go
+++ b/pkg/cluster/k8sres.go
@@ -1343,6 +1343,7 @@ func (c *Cluster) generateStatefulSet(spec *acidv1.PostgresSpec) (*appsv1.Statef
 		return nil, fmt.Errorf("could not generate volume claim template: %v", err)
 	}
 
+	// global minInstances and maxInstances settings can overwrite manifest
 	numberOfInstances := c.getNumberOfInstances(spec)
 
 	// the operator has domain-specific logic on how to do rolling updates of PG clusters
@@ -1443,9 +1444,16 @@ func (c *Cluster) generateScalyrSidecarSpec(clusterName, APIKey, serverURL, dock
 func (c *Cluster) getNumberOfInstances(spec *acidv1.PostgresSpec) int32 {
 	min := c.OpConfig.MinInstances
 	max := c.OpConfig.MaxInstances
+	instanceLimitAnnotationKey := c.OpConfig.IgnoreInstanceLimitsAnnotationKey
 	cur := spec.NumberOfInstances
 	newcur := cur
 
+	if instanceLimitAnnotationKey != "" {
+		if value, exists := c.ObjectMeta.Annotations[instanceLimitAnnotationKey]; exists && value == "true" {
+			return cur
+		}
+	}
+
 	if spec.StandbyCluster != nil {
 		if newcur == 1 {
 			min = newcur
diff --git a/pkg/cluster/k8sres_test.go b/pkg/cluster/k8sres_test.go
index 7f96a223..36f23999 100644
--- a/pkg/cluster/k8sres_test.go
+++ b/pkg/cluster/k8sres_test.go
@@ -864,6 +864,136 @@ func TestGenerateSpiloPodEnvVars(t *testing.T) {
 	}
 }
 
+func TestGetNumberOfInstances(t *testing.T) {
+	testName := "TestGetNumberOfInstances"
+	tests := []struct {
+		subTest         string
+		config          config.Config
+		annotationKey   string
+		annotationValue string
+		desired         int32
+		provided        int32
+	}{
+		{
+			subTest: "no constraints",
+			config: config.Config{
+				Resources: config.Resources{
+					MinInstances:                      -1,
+					MaxInstances:                      -1,
+					IgnoreInstanceLimitsAnnotationKey: "",
+				},
+			},
+			annotationKey:   "",
+			annotationValue: "",
+			desired:         2,
+			provided:        2,
+		},
+		{
+			subTest: "minInstances defined",
+			config: config.Config{
+				Resources: config.Resources{
+					MinInstances:                      2,
+					MaxInstances:                      -1,
+					IgnoreInstanceLimitsAnnotationKey: "",
+				},
+			},
+			annotationKey:   "",
+			annotationValue: "",
+			desired:         1,
+			provided:        2,
+		},
+		{
+			subTest: "maxInstances defined",
+			config: config.Config{
+				Resources: config.Resources{
+					MinInstances:                      -1,
+					MaxInstances:                      5,
+					IgnoreInstanceLimitsAnnotationKey: "",
+				},
+			},
+			annotationKey:   "",
+			annotationValue: "",
+			desired:         10,
+			provided:        5,
+		},
+		{
+			subTest: "ignore minInstances",
+			config: config.Config{
+				Resources: config.Resources{
+					MinInstances:                      2,
+					MaxInstances:                      -1,
+					IgnoreInstanceLimitsAnnotationKey: "ignore-instance-limits",
+				},
+			},
+			annotationKey:   "ignore-instance-limits",
+			annotationValue: "true",
+			desired:         1,
+			provided:        1,
+		},
+		{
+			subTest: "want to ignore minInstances but wrong key",
+			config: config.Config{
+				Resources: config.Resources{
+					MinInstances:                      2,
+					MaxInstances:                      -1,
+					IgnoreInstanceLimitsAnnotationKey: "ignore-instance-limits",
+				},
+			},
+			annotationKey:   "ignoring-instance-limits",
+			annotationValue: "true",
+			desired:         1,
+			provided:        2,
+		},
+		{
+			subTest: "want to ignore minInstances but wrong value",
+			config: config.Config{
+				Resources: config.Resources{
+					MinInstances:                      2,
+					MaxInstances:                      -1,
+					IgnoreInstanceLimitsAnnotationKey: "ignore-instance-limits",
+				},
+			},
+			annotationKey:   "ignore-instance-limits",
+			annotationValue: "active",
+			desired:         1,
+			provided:        2,
+		},
+		{
+			subTest: "annotation set but no constraints to ignore",
+			config: config.Config{
+				Resources: config.Resources{
+					MinInstances:                      -1,
+					MaxInstances:                      -1,
+					IgnoreInstanceLimitsAnnotationKey: "ignore-instance-limits",
+				},
+			},
+			annotationKey:   "ignore-instance-limits",
+			annotationValue: "true",
+			desired:         1,
+			provided:        1,
+		},
+	}
+
+	for _, tt := range tests {
+		var cluster = New(
+			Config{
+				OpConfig: tt.config,
+			}, k8sutil.KubernetesClient{}, acidv1.Postgresql{}, logger, eventRecorder)
+
+		cluster.Spec.NumberOfInstances = tt.desired
+		if tt.annotationKey != "" {
+			cluster.ObjectMeta.Annotations = make(map[string]string)
+			cluster.ObjectMeta.Annotations[tt.annotationKey] = tt.annotationValue
+		}
+		numInstances := cluster.getNumberOfInstances(&cluster.Spec)
+
+		if numInstances != tt.provided {
+			t.Errorf("%s %s: Expected to get %d instances, have %d instead",
+				testName, tt.subTest, tt.provided, numInstances)
+		}
+	}
+}
+
 func TestCloneEnv(t *testing.T) {
 	testName := "TestCloneEnv"
 	tests := []struct {
diff --git a/pkg/controller/operator_config.go b/pkg/controller/operator_config.go
index 09b2b9e2..9e1d4064 100644
--- a/pkg/controller/operator_config.go
+++ b/pkg/controller/operator_config.go
@@ -42,6 +42,7 @@ func (c *Controller) importConfigurationFromCRD(fromCRD *acidv1.OperatorConfigur
 	result.Workers = util.CoalesceUInt32(fromCRD.Workers, 8)
 	result.MinInstances = fromCRD.MinInstances
 	result.MaxInstances = fromCRD.MaxInstances
+	result.IgnoreInstanceLimitsAnnotationKey = fromCRD.IgnoreInstanceLimitsAnnotationKey
 	result.ResyncPeriod = util.CoalesceDuration(time.Duration(fromCRD.ResyncPeriod), "30m")
 	result.RepairPeriod = util.CoalesceDuration(time.Duration(fromCRD.RepairPeriod), "5m")
 	result.SetMemoryRequestToLimit = fromCRD.SetMemoryRequestToLimit
diff --git a/pkg/util/config/config.go b/pkg/util/config/config.go
index 3e0f4164..21948cc0 100644
--- a/pkg/util/config/config.go
+++ b/pkg/util/config/config.go
@@ -58,9 +58,11 @@ type Resources struct {
 	PodEnvironmentSecret          string              `name:"pod_environment_secret"`
 	NodeReadinessLabel            map[string]string   `name:"node_readiness_label" default:""`
 	NodeReadinessLabelMerge       string              `name:"node_readiness_label_merge" default:"OR"`
-	MaxInstances                  int32               `name:"max_instances" default:"-1"`
-	MinInstances                  int32               `name:"min_instances" default:"-1"`
 	ShmVolume                     *bool               `name:"enable_shm_volume" default:"true"`
+
+	MaxInstances                      int32  `name:"max_instances" default:"-1"`
+	MinInstances                      int32  `name:"min_instances" default:"-1"`
+	IgnoreInstanceLimitsAnnotationKey string `name:"ignore_instance_limits_annotation_key"`
 }
 
 type InfrastructureRole struct {
-- 
GitLab