Tweak our reaction to the cluster upgrade process.

Previously, the operator started to move the pods off the nodes to be decomissioned by watching the eol_node_label value. Every new postgres pod has been created with the anti-affinity to that label, making sure that the pods being moved won't land on another to be decomissioned node. The changes introduce another label that indicates the ready node. The new pod affinity will esnure that the pod is only scheduled to the node marked as ready, discarding the previous anti-affinity. That way the nodes can transition from the pending-decomission to the other statuses (drained, terminating) without having pods suddently scaled to them. In addition, rename the label that triggers the start of the upgrade process to node_eol_label (for consistency with node_readiness_label) and set its default vvalue to lifecycle-status:pending-decomission.

Tweak our reaction to the cluster upgrade process.
f9e14d25 · Oleksii Kliukin · a39e89d1 · f9e14d25 · f9e14d25 · f9e14d25
Commit f9e14d25 authored 7 years ago by Oleksii Kliukin
--- a/manifests/configmap.yaml
+++ b/manifests/configmap.yaml
@@ -38,5 +38,6 @@ data:
  cluster_history_entries: "1000"
  pod_terminate_grace_period: 5m
  pdb_name_format: "postgres-{cluster}-pdb"
-  eol_node_label: "eol:true"
+  node_eol_label: "lifecycle-status:pending-decommission"
+  node_readiness_label: "lifecycle-status:ready"
  team_api_role_configuration: "log_statement:all"
--- a/pkg/cluster/k8sres.go
+++ b/pkg/cluster/k8sres.go
@@ -238,10 +238,10 @@ PatroniInitDBParams:

 func (c *Cluster) nodeAffinity() *v1.Affinity {
 	matchExpressions := make([]v1.NodeSelectorRequirement, 0)
-	for k, v := range c.OpConfig.EOLNodeLabel {
+	for k, v := range c.OpConfig.NodeReadinessLabel {
 		matchExpressions = append(matchExpressions, v1.NodeSelectorRequirement{
 			Key:      k,
-			Operator: v1.NodeSelectorOpNotIn,
+			Operator: v1.NodeSelectorOpIn,
 			Values:   []string{v},
 		})
 	}

--- a/pkg/cluster/pod.go
+++ b/pkg/cluster/pod.go
@@ -319,6 +319,6 @@ func (c *Cluster) podIsEndOfLife(pod *v1.Pod) (bool, error) {
 	if err != nil {
 		return false, err
 	}
-	return node.Spec.Unschedulable || util.MapContains(node.Labels, c.OpConfig.EOLNodeLabel), nil
+	return node.Spec.Unschedulable || util.MapContains(node.Labels, c.OpConfig.NodeEOLLabel), nil

 }
--- a/pkg/controller/node.go
+++ b/pkg/controller/node.go
@@ -55,13 +55,13 @@ func (c *Controller) nodeUpdate(prev, cur interface{}) {
 		return
 	}

-	if nodePrev.Spec.Unschedulable && util.MapContains(nodePrev.Labels, c.opConfig.EOLNodeLabel) ||
-		!nodeCur.Spec.Unschedulable || !util.MapContains(nodeCur.Labels, c.opConfig.EOLNodeLabel) {
+	if nodePrev.Spec.Unschedulable && util.MapContains(nodePrev.Labels, c.opConfig.NodeEOLLabel) ||
+		!nodeCur.Spec.Unschedulable || !util.MapContains(nodeCur.Labels, c.opConfig.NodeEOLLabel) {
 		return
 	}

 	c.logger.Infof("node %q became unschedulable and has EOL labels: %q", util.NameFromMeta(nodeCur.ObjectMeta),
-		c.opConfig.EOLNodeLabel)
+		c.opConfig.NodeEOLLabel)

 	opts := metav1.ListOptions{
 		LabelSelector: labels.Set(c.opConfig.ClusterLabels).String(),

--- a/pkg/util/config/config.go
+++ b/pkg/util/config/config.go
@@ -30,7 +30,8 @@ type Resources struct {
 	DefaultMemoryRequest    string            `name:"default_memory_request" default:"100Mi"`
 	DefaultCPULimit         string            `name:"default_cpu_limit" default:"3"`
 	DefaultMemoryLimit      string            `name:"default_memory_limit" default:"1Gi"`
-	EOLNodeLabel            map[string]string `name:"eol_node_label" default:"eol:true"`
+	NodeEOLLabel            map[string]string `name:"node_eol_label" default:"lifecycle-status:pending-decommission"`
+	NodeReadinessLabel      map[string]string `name:"node_readiness_label" default:"lifecycle-status:ready"`
 }

 // Auth describes authentication specific configuration parameters