Skip to content
Snippets Groups Projects
Unverified Commit 23011bdf authored by Oleksii Kliukin's avatar Oleksii Kliukin Committed by GitHub
Browse files

Migrate only master pods. Migrate single masters. (#199)

Avoid migrating replica pods, since they will be handled by the
node draining anyway (the PDB specifies that only masters are to
be kept).

Allow migration of the single-pod clusters.
parent bb5ce6cb
Branches
Tags
No related merge requests found
......@@ -34,10 +34,6 @@ func (c *Cluster) getRolePods(role PostgresRole) ([]v1.Pod, error) {
return nil, fmt.Errorf("could not get list of pods: %v", err)
}
if len(pods.Items) == 0 {
return nil, fmt.Errorf("no pods")
}
if role == Master && len(pods.Items) > 1 {
return nil, fmt.Errorf("too many masters")
}
......@@ -158,6 +154,11 @@ func (c *Cluster) masterCandidate(oldNodeName string) (*v1.Pod, error) {
return nil, fmt.Errorf("could not get replica pods: %v", err)
}
if len(replicas) == 0 {
c.logger.Warningf("single master pod for cluster %q, migration will cause disruption of the service")
return nil, nil
}
for i, pod := range replicas {
// look for replicas running on live nodes. Ignore errors when querying the nodes.
if pod.Spec.NodeName != oldNodeName {
......@@ -198,6 +199,11 @@ func (c *Cluster) MigrateMasterPod(podName spec.NamespacedName) error {
return fmt.Errorf("could not get new master candidate: %v", err)
}
// there are two cases for each postgres cluster that has its master pod on the node to migrate from:
// - the cluster has some replicas - migrate one of those if necessary and failover to it
// - there are no replicas - just terminate the master and wait until it respawns
// in both cases the result is the new master up and running on a new node.
if masterCandidatePod != nil {
pod, err := c.movePodFromEndOfLifeNode(masterCandidatePod)
if err != nil {
return fmt.Errorf("could not move pod: %v", err)
......@@ -207,12 +213,11 @@ func (c *Cluster) MigrateMasterPod(podName spec.NamespacedName) error {
if err := c.ManualFailover(oldMaster, masterCandidateName); err != nil {
return fmt.Errorf("could not failover to pod %q: %v", masterCandidateName, err)
}
_, err = c.movePodFromEndOfLifeNode(oldMaster)
if err != nil {
} else {
if _, err = c.movePodFromEndOfLifeNode(oldMaster); err != nil {
return fmt.Errorf("could not move pod: %v", err)
}
}
return nil
}
......
......@@ -95,6 +95,9 @@ func (c *Cluster) preScaleDown(newStatefulSet *v1beta1.StatefulSet) error {
if err != nil {
return fmt.Errorf("could not get master pod: %v", err)
}
if len(masterPod) == 0 {
return fmt.Errorf("no master pod is running in the cluster")
}
podNum, err := getPodIndex(masterPod[0].Name)
if err != nil {
......
......@@ -40,7 +40,7 @@ func (c *Controller) nodeAdd(obj interface{}) {
c.logger.Debugf("new node has been added: %q (%s)", util.NameFromMeta(node.ObjectMeta), node.Spec.ProviderID)
// check if the node became not ready while the operator was down (otherwise we would have caught it in nodeUpdate)
if !c.nodeIsReady(node) {
c.movePodsOffNode(node)
c.moveMasterPodsOffNode(node)
}
}
......@@ -64,7 +64,7 @@ func (c *Controller) nodeUpdate(prev, cur interface{}) {
if !c.nodeIsReady(nodePrev) || c.nodeIsReady(nodeCur) {
return
}
c.movePodsOffNode(nodeCur)
c.moveMasterPodsOffNode(nodeCur)
}
func (c *Controller) nodeIsReady(node *v1.Node) bool {
......@@ -72,7 +72,7 @@ func (c *Controller) nodeIsReady(node *v1.Node) bool {
util.MapContains(node.Labels, map[string]string{"master": "true"}))
}
func (c *Controller) movePodsOffNode(node *v1.Node) {
func (c *Controller) moveMasterPodsOffNode(node *v1.Node) {
nodeName := util.NameFromMeta(node.ObjectMeta)
c.logger.Infof("moving pods: node %q became unschedulable and does not have a ready label: %q",
nodeName, c.opConfig.NodeReadinessLabel)
......@@ -95,14 +95,15 @@ func (c *Controller) movePodsOffNode(node *v1.Node) {
clusters := make(map[*cluster.Cluster]bool)
masterPods := make(map[*v1.Pod]*cluster.Cluster)
replicaPods := make(map[*v1.Pod]*cluster.Cluster)
movedPods := 0
for _, pod := range nodePods {
podName := util.NameFromMeta(pod.ObjectMeta)
role, ok := pod.Labels[c.opConfig.PodRoleLabel]
if !ok || cluster.PostgresRole(role) != cluster.Master {
if !ok {
c.logger.Warningf("could not move pod %q: pod has no role", podName)
}
continue
}
......@@ -116,17 +117,11 @@ func (c *Controller) movePodsOffNode(node *v1.Node) {
continue
}
movedPods++
if !clusters[cl] {
clusters[cl] = true
}
if cluster.PostgresRole(role) == cluster.Master {
masterPods[pod] = cl
} else {
replicaPods[pod] = cl
}
}
for cl := range clusters {
......@@ -138,16 +133,8 @@ func (c *Controller) movePodsOffNode(node *v1.Node) {
if err := cl.MigrateMasterPod(podName); err != nil {
c.logger.Errorf("could not move master pod %q: %v", podName, err)
movedPods--
}
}
for pod, cl := range replicaPods {
podName := util.NameFromMeta(pod.ObjectMeta)
if err := cl.MigrateReplicaPod(podName, node.Name); err != nil {
c.logger.Errorf("could not move replica pod %q: %v", podName, err)
movedPods--
} else {
movedPods++
}
}
......@@ -155,13 +142,13 @@ func (c *Controller) movePodsOffNode(node *v1.Node) {
cl.Unlock()
}
totalPods := len(nodePods)
totalPods := len(masterPods)
c.logger.Infof("%d/%d pods have been moved out from the %q node",
c.logger.Infof("%d/%d master pods have been moved out from the %q node",
movedPods, totalPods, nodeName)
if leftPods := totalPods - movedPods; leftPods > 0 {
c.logger.Warnf("could not move %d/%d pods from the %q node",
c.logger.Warnf("could not move master %d/%d pods from the %q node",
leftPods, totalPods, nodeName)
}
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment