From 23011bdf9add464b72cec02d6d0001262193db51 Mon Sep 17 00:00:00 2001
From: Oleksii Kliukin <oleksii.kliukin@zalando.de>
Date: Tue, 9 Jan 2018 11:55:11 +0100
Subject: [PATCH] Migrate only master pods. Migrate single masters. (#199)

Avoid migrating replica pods, since they will be handled by the
node draining anyway (the PDB specifies that only masters are to
be kept).

Allow migration of the single-pod clusters.
---
 pkg/cluster/pod.go       | 39 ++++++++++++++++++++++-----------------
 pkg/cluster/resources.go |  3 +++
 pkg/controller/node.go   | 39 +++++++++++++--------------------------
 3 files changed, 38 insertions(+), 43 deletions(-)

diff --git a/pkg/cluster/pod.go b/pkg/cluster/pod.go
index e64421a3..79f33f18 100644
--- a/pkg/cluster/pod.go
+++ b/pkg/cluster/pod.go
@@ -34,10 +34,6 @@ func (c *Cluster) getRolePods(role PostgresRole) ([]v1.Pod, error) {
 		return nil, fmt.Errorf("could not get list of pods: %v", err)
 	}
 
-	if len(pods.Items) == 0 {
-		return nil, fmt.Errorf("no pods")
-	}
-
 	if role == Master && len(pods.Items) > 1 {
 		return nil, fmt.Errorf("too many masters")
 	}
@@ -158,6 +154,11 @@ func (c *Cluster) masterCandidate(oldNodeName string) (*v1.Pod, error) {
 		return nil, fmt.Errorf("could not get replica pods: %v", err)
 	}
 
+	if len(replicas) == 0 {
+		c.logger.Warningf("single master pod for cluster %q, migration will cause disruption of the service")
+		return nil, nil
+	}
+
 	for i, pod := range replicas {
 		// look for replicas running on live nodes. Ignore errors when querying the nodes.
 		if pod.Spec.NodeName != oldNodeName {
@@ -198,21 +199,25 @@ func (c *Cluster) MigrateMasterPod(podName spec.NamespacedName) error {
 		return fmt.Errorf("could not get new master candidate: %v", err)
 	}
 
-	pod, err := c.movePodFromEndOfLifeNode(masterCandidatePod)
-	if err != nil {
-		return fmt.Errorf("could not move pod: %v", err)
-	}
-
-	masterCandidateName := util.NameFromMeta(pod.ObjectMeta)
-	if err := c.ManualFailover(oldMaster, masterCandidateName); err != nil {
-		return fmt.Errorf("could not failover to pod %q: %v", masterCandidateName, err)
-	}
+	// there are two cases for each postgres cluster that has its master pod on the node to migrate from:
+	// - the cluster has some replicas - migrate one of those if necessary and failover to it
+	// - there are no replicas - just terminate the master and wait until it respawns
+	// in both cases the result is the new master up and running on a new node.
+	if masterCandidatePod != nil {
+		pod, err := c.movePodFromEndOfLifeNode(masterCandidatePod)
+		if err != nil {
+			return fmt.Errorf("could not move pod: %v", err)
+		}
 
-	_, err = c.movePodFromEndOfLifeNode(oldMaster)
-	if err != nil {
-		return fmt.Errorf("could not move pod: %v", err)
+		masterCandidateName := util.NameFromMeta(pod.ObjectMeta)
+		if err := c.ManualFailover(oldMaster, masterCandidateName); err != nil {
+			return fmt.Errorf("could not failover to pod %q: %v", masterCandidateName, err)
+		}
+	} else {
+		if _, err = c.movePodFromEndOfLifeNode(oldMaster); err != nil {
+			return fmt.Errorf("could not move pod: %v", err)
+		}
 	}
-
 	return nil
 }
 
diff --git a/pkg/cluster/resources.go b/pkg/cluster/resources.go
index 1928143b..37b342b4 100644
--- a/pkg/cluster/resources.go
+++ b/pkg/cluster/resources.go
@@ -95,6 +95,9 @@ func (c *Cluster) preScaleDown(newStatefulSet *v1beta1.StatefulSet) error {
 	if err != nil {
 		return fmt.Errorf("could not get master pod: %v", err)
 	}
+	if len(masterPod) == 0 {
+		return fmt.Errorf("no master pod is running in the cluster")
+	}
 
 	podNum, err := getPodIndex(masterPod[0].Name)
 	if err != nil {
diff --git a/pkg/controller/node.go b/pkg/controller/node.go
index f745333a..98e8f288 100644
--- a/pkg/controller/node.go
+++ b/pkg/controller/node.go
@@ -40,7 +40,7 @@ func (c *Controller) nodeAdd(obj interface{}) {
 	c.logger.Debugf("new node has been added: %q (%s)", util.NameFromMeta(node.ObjectMeta), node.Spec.ProviderID)
 	// check if the node became not ready while the operator was down (otherwise we would have caught it in nodeUpdate)
 	if !c.nodeIsReady(node) {
-		c.movePodsOffNode(node)
+		c.moveMasterPodsOffNode(node)
 	}
 }
 
@@ -64,7 +64,7 @@ func (c *Controller) nodeUpdate(prev, cur interface{}) {
 	if !c.nodeIsReady(nodePrev) || c.nodeIsReady(nodeCur) {
 		return
 	}
-	c.movePodsOffNode(nodeCur)
+	c.moveMasterPodsOffNode(nodeCur)
 }
 
 func (c *Controller) nodeIsReady(node *v1.Node) bool {
@@ -72,7 +72,7 @@ func (c *Controller) nodeIsReady(node *v1.Node) bool {
 		util.MapContains(node.Labels, map[string]string{"master": "true"}))
 }
 
-func (c *Controller) movePodsOffNode(node *v1.Node) {
+func (c *Controller) moveMasterPodsOffNode(node *v1.Node) {
 	nodeName := util.NameFromMeta(node.ObjectMeta)
 	c.logger.Infof("moving pods: node %q became unschedulable and does not have a ready label: %q",
 		nodeName, c.opConfig.NodeReadinessLabel)
@@ -95,14 +95,15 @@ func (c *Controller) movePodsOffNode(node *v1.Node) {
 
 	clusters := make(map[*cluster.Cluster]bool)
 	masterPods := make(map[*v1.Pod]*cluster.Cluster)
-	replicaPods := make(map[*v1.Pod]*cluster.Cluster)
 	movedPods := 0
 	for _, pod := range nodePods {
 		podName := util.NameFromMeta(pod.ObjectMeta)
 
 		role, ok := pod.Labels[c.opConfig.PodRoleLabel]
-		if !ok {
-			c.logger.Warningf("could not move pod %q: pod has no role", podName)
+		if !ok || cluster.PostgresRole(role) != cluster.Master {
+			if !ok {
+				c.logger.Warningf("could not move pod %q: pod has no role", podName)
+			}
 			continue
 		}
 
@@ -116,17 +117,11 @@ func (c *Controller) movePodsOffNode(node *v1.Node) {
 			continue
 		}
 
-		movedPods++
-
 		if !clusters[cl] {
 			clusters[cl] = true
 		}
 
-		if cluster.PostgresRole(role) == cluster.Master {
-			masterPods[pod] = cl
-		} else {
-			replicaPods[pod] = cl
-		}
+		masterPods[pod] = cl
 	}
 
 	for cl := range clusters {
@@ -138,16 +133,8 @@ func (c *Controller) movePodsOffNode(node *v1.Node) {
 
 		if err := cl.MigrateMasterPod(podName); err != nil {
 			c.logger.Errorf("could not move master pod %q: %v", podName, err)
-			movedPods--
-		}
-	}
-
-	for pod, cl := range replicaPods {
-		podName := util.NameFromMeta(pod.ObjectMeta)
-
-		if err := cl.MigrateReplicaPod(podName, node.Name); err != nil {
-			c.logger.Errorf("could not move replica pod %q: %v", podName, err)
-			movedPods--
+		} else {
+			movedPods++
 		}
 	}
 
@@ -155,13 +142,13 @@ func (c *Controller) movePodsOffNode(node *v1.Node) {
 		cl.Unlock()
 	}
 
-	totalPods := len(nodePods)
+	totalPods := len(masterPods)
 
-	c.logger.Infof("%d/%d pods have been moved out from the %q node",
+	c.logger.Infof("%d/%d master pods have been moved out from the %q node",
 		movedPods, totalPods, nodeName)
 
 	if leftPods := totalPods - movedPods; leftPods > 0 {
-		c.logger.Warnf("could not move %d/%d pods from the %q node",
+		c.logger.Warnf("could not move master %d/%d pods from the %q node",
 			leftPods, totalPods, nodeName)
 	}
 }
-- 
GitLab