From f9e14d25170f26cf12367e29a2496c3767f5f839 Mon Sep 17 00:00:00 2001
From: Oleksii Kliukin <oleksii.kliukin@zalando.de>
Date: Thu, 30 Nov 2017 12:36:06 +0100
Subject: [PATCH] Tweak our reaction to the cluster upgrade process.

Previously, the operator started to move the pods off the nodes to be
decomissioned by watching the eol_node_label value. Every new postgres
pod has been created with the anti-affinity to that label, making sure
that the pods being moved won't land on another to be decomissioned
node.

The changes introduce another label that indicates the ready node.  The
new pod affinity will esnure that the pod is only scheduled to the node
marked as ready, discarding the previous anti-affinity.  That way the
nodes can transition from the pending-decomission to the other statuses
(drained, terminating) without having pods suddently scaled to them.

In addition, rename the label that triggers the start of the upgrade
process to node_eol_label (for consistency with node_readiness_label)
and set its default vvalue to lifecycle-status:pending-decomission.
---
 manifests/configmap.yaml  | 3 ++-
 pkg/cluster/k8sres.go     | 4 ++--
 pkg/cluster/pod.go        | 2 +-
 pkg/controller/node.go    | 6 +++---
 pkg/util/config/config.go | 3 ++-
 5 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/manifests/configmap.yaml b/manifests/configmap.yaml
index 073ab1f7..d996a473 100644
--- a/manifests/configmap.yaml
+++ b/manifests/configmap.yaml
@@ -38,5 +38,6 @@ data:
   cluster_history_entries: "1000"
   pod_terminate_grace_period: 5m
   pdb_name_format: "postgres-{cluster}-pdb"
-  eol_node_label: "eol:true"
+  node_eol_label: "lifecycle-status:pending-decommission"
+  node_readiness_label: "lifecycle-status:ready"
   team_api_role_configuration: "log_statement:all"
diff --git a/pkg/cluster/k8sres.go b/pkg/cluster/k8sres.go
index 0f0e5cd0..2170bea1 100644
--- a/pkg/cluster/k8sres.go
+++ b/pkg/cluster/k8sres.go
@@ -238,10 +238,10 @@ PatroniInitDBParams:
 
 func (c *Cluster) nodeAffinity() *v1.Affinity {
 	matchExpressions := make([]v1.NodeSelectorRequirement, 0)
-	for k, v := range c.OpConfig.EOLNodeLabel {
+	for k, v := range c.OpConfig.NodeReadinessLabel {
 		matchExpressions = append(matchExpressions, v1.NodeSelectorRequirement{
 			Key:      k,
-			Operator: v1.NodeSelectorOpNotIn,
+			Operator: v1.NodeSelectorOpIn,
 			Values:   []string{v},
 		})
 	}
diff --git a/pkg/cluster/pod.go b/pkg/cluster/pod.go
index 5f458db9..44af662c 100644
--- a/pkg/cluster/pod.go
+++ b/pkg/cluster/pod.go
@@ -319,6 +319,6 @@ func (c *Cluster) podIsEndOfLife(pod *v1.Pod) (bool, error) {
 	if err != nil {
 		return false, err
 	}
-	return node.Spec.Unschedulable || util.MapContains(node.Labels, c.OpConfig.EOLNodeLabel), nil
+	return node.Spec.Unschedulable || util.MapContains(node.Labels, c.OpConfig.NodeEOLLabel), nil
 
 }
diff --git a/pkg/controller/node.go b/pkg/controller/node.go
index abe74da0..524b3ce3 100644
--- a/pkg/controller/node.go
+++ b/pkg/controller/node.go
@@ -55,13 +55,13 @@ func (c *Controller) nodeUpdate(prev, cur interface{}) {
 		return
 	}
 
-	if nodePrev.Spec.Unschedulable && util.MapContains(nodePrev.Labels, c.opConfig.EOLNodeLabel) ||
-		!nodeCur.Spec.Unschedulable || !util.MapContains(nodeCur.Labels, c.opConfig.EOLNodeLabel) {
+	if nodePrev.Spec.Unschedulable && util.MapContains(nodePrev.Labels, c.opConfig.NodeEOLLabel) ||
+		!nodeCur.Spec.Unschedulable || !util.MapContains(nodeCur.Labels, c.opConfig.NodeEOLLabel) {
 		return
 	}
 
 	c.logger.Infof("node %q became unschedulable and has EOL labels: %q", util.NameFromMeta(nodeCur.ObjectMeta),
-		c.opConfig.EOLNodeLabel)
+		c.opConfig.NodeEOLLabel)
 
 	opts := metav1.ListOptions{
 		LabelSelector: labels.Set(c.opConfig.ClusterLabels).String(),
diff --git a/pkg/util/config/config.go b/pkg/util/config/config.go
index db98d5c2..c92b9d35 100644
--- a/pkg/util/config/config.go
+++ b/pkg/util/config/config.go
@@ -30,7 +30,8 @@ type Resources struct {
 	DefaultMemoryRequest    string            `name:"default_memory_request" default:"100Mi"`
 	DefaultCPULimit         string            `name:"default_cpu_limit" default:"3"`
 	DefaultMemoryLimit      string            `name:"default_memory_limit" default:"1Gi"`
-	EOLNodeLabel            map[string]string `name:"eol_node_label" default:"eol:true"`
+	NodeEOLLabel            map[string]string `name:"node_eol_label" default:"lifecycle-status:pending-decommission"`
+	NodeReadinessLabel      map[string]string `name:"node_readiness_label" default:"lifecycle-status:ready"`
 }
 
 // Auth describes authentication specific configuration parameters
-- 
GitLab