Skip to content
Snippets Groups Projects
Commit 26a7fdfa authored by teuto.net Netzdienste GmbH's avatar teuto.net Netzdienste GmbH Committed by Sergey Dudoladov
Browse files

Add Pod Anti Affinity (#489)


* Add Pod Anti Affinity
parent 2e9b6533
Branches
Tags
No related merge requests found
......@@ -151,6 +151,36 @@ Postgres pods by default receive tolerations for `unreachable` and `noExecute` t
Depending on your setup, you may want to adjust these parameters to prevent master pods from being evicted by the Kubernetes runtime.
To prevent eviction completely, specify the toleration by leaving out the `tolerationSeconds` value (similar to how Kubernetes' own DaemonSets are configured)
### Enable pod anti affinity
To ensure Postgres pods are running on different topologies, you can use [pod anti affinity](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/)
and configure the required topology in the operator ConfigMap.
Enable pod anti affinity by adding following line to the operator ConfigMap:
```yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: postgres-operator
data:
enable_pod_antiaffinity: "true"
```
By default the topology key for the pod anti affinity is set to `kubernetes.io/hostname`,
you can set another topology key e.g. `failure-domain.beta.kubernetes.io/zone` by adding following line
to the operator ConfigMap, see [built-in node labels](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#interlude-built-in-node-labels) for available topology keys:
```yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: postgres-operator
data:
enable_pod_antiaffinity: "true"
pod_antiaffinity_topology_key: "failure-domain.beta.kubernetes.io/zone"
```
### Add cluster-specific labels
In some cases, you might want to add `labels` that are specific to a given
......
......@@ -213,6 +213,14 @@ configuration they are grouped under the `kubernetes` key.
that should be assigned to the Postgres pods. The priority class itself must be defined in advance.
Default is empty (use the default priority class).
* **enable_pod_antiaffinity**
toggles [pod anti affinity](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/) on the Postgres pods, to avoid multiple pods
of the same Postgres cluster in the same topology , e.g. node. The default is `false`.
* **pod_antiaffinity_topology_key**
override
[topology key](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#interlude-built-in-node-labels)
for pod anti affinity. The default is `kubernetes.io/hostname`.
## Kubernetes resource requests
......
......@@ -60,6 +60,8 @@ type KubernetesMetaConfiguration struct {
// TODO: use namespacedname
PodEnvironmentConfigMap string `json:"pod_environment_configmap,omitempty"`
PodPriorityClassName string `json:"pod_priority_class_name,omitempty"`
EnablePodAntiAffinity bool `json:"enable_pod_antiaffinity" default:"false"`
PodAntiAffinityTopologyKey string `name:"pod_antiaffinity_topology_key" default:"kubernetes.io/hostname"`
}
// PostgresPodResourcesDefaults defines the spec of default resources
......
......@@ -290,6 +290,26 @@ func nodeAffinity(nodeReadinessLabel map[string]string) *v1.Affinity {
}
}
func generatePodAffinity(labels labels.Set, topologyKey string, nodeAffinity *v1.Affinity) *v1.Affinity {
// generate pod anti-affinity to avoid multiple pods of the same Postgres cluster in the same topology , e.g. node
podAffinity := v1.Affinity{
PodAntiAffinity: &v1.PodAntiAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{{
LabelSelector: &metav1.LabelSelector{
MatchLabels: labels,
},
TopologyKey: topologyKey,
}},
},
}
if nodeAffinity != nil && nodeAffinity.NodeAffinity != nil {
podAffinity.NodeAffinity = nodeAffinity.NodeAffinity
}
return &podAffinity
}
func tolerations(tolerationsSpec *[]v1.Toleration, podToleration map[string]string) []v1.Toleration {
// allow to override tolerations by postgresql manifest
if len(*tolerationsSpec) > 0 {
......@@ -419,6 +439,8 @@ func generatePodTemplate(
kubeIAMRole string,
priorityClassName string,
shmVolume bool,
podAntiAffinity bool,
podAntiAffinityTopologyKey string,
) (*v1.PodTemplateSpec, error) {
terminateGracePeriodSeconds := terminateGracePeriod
......@@ -437,7 +459,9 @@ func generatePodTemplate(
addShmVolume(&podSpec)
}
if nodeAffinity != nil {
if podAntiAffinity {
podSpec.Affinity = generatePodAffinity(labels, podAntiAffinityTopologyKey, nodeAffinity)
} else if nodeAffinity != nil {
podSpec.Affinity = nodeAffinity
}
......@@ -813,7 +837,9 @@ func (c *Cluster) generateStatefulSet(spec *acidv1.PostgresSpec) (*v1beta1.State
c.OpConfig.PodServiceAccountName,
c.OpConfig.KubeIAMRole,
effectivePodPriorityClassName,
mountShmVolumeNeeded(c.OpConfig, spec)); err != nil {
mountShmVolumeNeeded(c.OpConfig, spec),
c.OpConfig.EnablePodAntiAffinity,
c.OpConfig.PodAntiAffinityTopologyKey); err != nil {
return nil, fmt.Errorf("could not generate pod template: %v", err)
}
......
......@@ -53,6 +53,9 @@ func (c *Controller) importConfigurationFromCRD(fromCRD *acidv1.OperatorConfigur
result.NodeReadinessLabel = fromCRD.Kubernetes.NodeReadinessLabel
result.PodPriorityClassName = fromCRD.Kubernetes.PodPriorityClassName
result.EnablePodAntiAffinity = fromCRD.Kubernetes.EnablePodAntiAffinity;
result.PodAntiAffinityTopologyKey = fromCRD.Kubernetes.PodAntiAffinityTopologyKey;
result.DefaultCPURequest = fromCRD.PostgresPodResources.DefaultCPURequest
result.DefaultMemoryRequest = fromCRD.PostgresPodResources.DefaultMemoryRequest
result.DefaultCPULimit = fromCRD.PostgresPodResources.DefaultCPULimit
......
......@@ -95,6 +95,8 @@ type Config struct {
EnableMasterLoadBalancer bool `name:"enable_master_load_balancer" default:"true"`
EnableReplicaLoadBalancer bool `name:"enable_replica_load_balancer" default:"false"`
CustomServiceAnnotations map[string]string `name:"custom_service_annotations"`
EnablePodAntiAffinity bool `name:"enable_pod_antiaffinity" default:"false"`
PodAntiAffinityTopologyKey string `name:"pod_antiaffinity_topology_key" default:"kubernetes.io/hostname"`
// deprecated and kept for backward compatibility
EnableLoadBalancer *bool `name:"enable_load_balancer"`
MasterDNSNameFormat StringTemplate `name:"master_dns_name_format" default:"{cluster}.{team}.{hostedzone}"`
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment