From 7e72431e87fd5d5f25eaaacb02e266c8ead9fa55 Mon Sep 17 00:00:00 2001
From: Lukas Metzner <lukas.metzner@hetzner-cloud.de>
Date: Tue, 19 Nov 2024 12:11:09 +0100
Subject: [PATCH] feat: Added new option enableProvidedByTopology (#780)

We are reintroducing a feature originally present in v2.10.0 to prevent
pods from getting stuck in the `pending` state in clusters with
non-cloud nodes. This feature is now optional and can be enabled via the
Helm Chart. By default, it remains disabled to avoid compatibility
issues with Nomad clusters, which have a different CSI spec
implementation.

Learn more about it in #400.
---
 chart/templates/controller/deployment.yaml |  4 +++
 chart/templates/core/storageclass.yaml     |  8 ++++++
 chart/templates/node/daemonset.yaml        |  4 +++
 chart/values.yaml                          |  9 ++++++
 cmd/aio/main.go                            |  4 +++
 cmd/controller/main.go                     |  3 ++
 cmd/node/main.go                           |  3 ++
 docs/kubernetes/README.md                  | 22 +++++++++++++--
 internal/app/app.go                        |  9 ++++++
 internal/driver/controller.go              | 31 +++++++++++++--------
 internal/driver/controller_test.go         |  8 ++++++
 internal/driver/driver.go                  |  1 +
 internal/driver/node.go                    | 32 ++++++++++++++--------
 internal/driver/node_test.go               |  1 +
 internal/driver/sanity_test.go             |  2 ++
 15 files changed, 115 insertions(+), 26 deletions(-)

diff --git a/chart/templates/controller/deployment.yaml b/chart/templates/controller/deployment.yaml
index 5d17825..f375d8c 100644
--- a/chart/templates/controller/deployment.yaml
+++ b/chart/templates/controller/deployment.yaml
@@ -167,6 +167,10 @@ spec:
                   key: {{ .Values.controller.hcloudToken.existingSecret.key }}
                   {{- end }}
             {{- end }}
+            {{- if .Values.global.enableProvidedByTopology}}
+            - name: ENABLE_PROVIDED_BY_TOPOLOGY
+              value: "t"
+            {{ end }}
             {{- if .Values.controller.extraEnvVars }}
             {{- include "common.tplvalues.render" (dict "value" .Values.controller.extraEnvVars "context" $) | nindent 12 }}
             {{- end }}
diff --git a/chart/templates/core/storageclass.yaml b/chart/templates/core/storageclass.yaml
index 1bc246d..f5f282c 100644
--- a/chart/templates/core/storageclass.yaml
+++ b/chart/templates/core/storageclass.yaml
@@ -1,3 +1,4 @@
+{{ $global := .Values.global }}
 {{- if .Values.storageClasses }}
 {{- range $key, $val := .Values.storageClasses }}
 kind: StorageClass
@@ -10,6 +11,13 @@ provisioner: csi.hetzner.cloud
 volumeBindingMode: WaitForFirstConsumer
 allowVolumeExpansion: true
 reclaimPolicy: {{ $val.reclaimPolicy | quote }}
+{{- if $global.enableProvidedByTopology }}
+allowedTopologies:
+- matchLabelExpressions:
+  - key: instance.hetzner.cloud/provided-by
+    values:
+    - "cloud"
+{{- end}}
 ---
 {{- end }}
 {{- end }}
\ No newline at end of file
diff --git a/chart/templates/node/daemonset.yaml b/chart/templates/node/daemonset.yaml
index ec1ee4c..cde83e3 100644
--- a/chart/templates/node/daemonset.yaml
+++ b/chart/templates/node/daemonset.yaml
@@ -121,6 +121,10 @@ spec:
             {{- end }}
             - name: ENABLE_METRICS
               value: {{if .Values.metrics.enabled}}"true"{{ else }}"false"{{end}}
+            {{- if .Values.global.enableProvidedByTopology}}
+            - name: ENABLE_PROVIDED_BY_TOPOLOGY
+              value: "t"
+            {{ end }}
             {{- if .Values.node.extraEnvVars }}
             {{- include "common.tplvalues.render" (dict "value" .Values.node.extraEnvVars "context" $) | nindent 12 }}
             {{- end }}
diff --git a/chart/values.yaml b/chart/values.yaml
index 481862f..bb6d548 100644
--- a/chart/values.yaml
+++ b/chart/values.yaml
@@ -11,6 +11,14 @@ global:
   ##   - myRegistryKeySecretName
   ##
   imagePullSecrets: []
+  ## @param node.enableProvidedByTopology Enables workaround for upstream Kubernetes issue where nodes without the CSI plugin are still considered for scheduling.
+  ## ref: https://github.com/kubernetes-csi/external-provisioner/issues/544
+  ## Warning: Once enabled, this feature cannot be easily disabled.
+  ## It automatically adds required nodeAffinities to each volume and the topology keys to `csinode` objects.
+  ## If the feature is later disabled, the topology keys are removed from the `csinode` objects, leaving volumes with required affinities that cannot be satisfied.
+  ## Note: After enabling this feature, the workaround for the Kubernetes upstream issue only works on newly created volumes, as old volumes are not updated with the required node affinity.
+  ##
+  enableProvidedByTopology: false
 
 ## @section Common parameters
 ##
@@ -648,6 +656,7 @@ node:
   ## kubeletDir: /var/lib/k0s/kubelet
   ## For microk8s:
   ## kubeletDir: /var/snap/microk8s/common/var/lib/kubelet
+  ##
   kubeletDir: /var/lib/kubelet
 
 ## @section Other Parameters
diff --git a/cmd/aio/main.go b/cmd/aio/main.go
index b610195..84abb09 100644
--- a/cmd/aio/main.go
+++ b/cmd/aio/main.go
@@ -63,10 +63,13 @@ func main() {
 	volumeResizeService := volumes.NewLinuxResizeService(logger.With("component", "linux-resize-service"))
 	volumeStatsService := volumes.NewLinuxStatsService(logger.With("component", "linux-stats-service"))
 
+	enableProvidedByTopology := app.GetEnableProvidedByTopology()
+
 	nodeService := driver.NewNodeService(
 		logger.With("component", "driver-node-service"),
 		strconv.FormatInt(serverID, 10),
 		serverLocation,
+		enableProvidedByTopology,
 		volumeMountService,
 		volumeResizeService,
 		volumeStatsService,
@@ -84,6 +87,7 @@ func main() {
 		logger.With("component", "driver-controller-service"),
 		volumeService,
 		serverLocation,
+		enableProvidedByTopology,
 	)
 
 	// common
diff --git a/cmd/controller/main.go b/cmd/controller/main.go
index 27a5c76..a29e6e2 100644
--- a/cmd/controller/main.go
+++ b/cmd/controller/main.go
@@ -54,6 +54,8 @@ func main() {
 		location = server.Datacenter.Location.Name
 	}
 
+	enableProvidedByTopology := app.GetEnableProvidedByTopology()
+
 	volumeService := volumes.NewIdempotentService(
 		logger.With("component", "idempotent-volume-service"),
 		api.NewVolumeService(
@@ -65,6 +67,7 @@ func main() {
 		logger.With("component", "driver-controller-service"),
 		volumeService,
 		location,
+		enableProvidedByTopology,
 	)
 
 	identityService := driver.NewIdentityService(
diff --git a/cmd/node/main.go b/cmd/node/main.go
index 772ec47..c3daed8 100644
--- a/cmd/node/main.go
+++ b/cmd/node/main.go
@@ -53,10 +53,13 @@ func main() {
 	volumeStatsService := volumes.NewLinuxStatsService(logger.With("component", "linux-stats-service"))
 	identityService := driver.NewIdentityService(logger.With("component", "driver-identity-service"))
 
+	enableProvidedByTopology := app.GetEnableProvidedByTopology()
+
 	nodeService := driver.NewNodeService(
 		logger.With("component", "driver-node-service"),
 		strconv.FormatInt(serverID, 10),
 		serverLocation,
+		enableProvidedByTopology,
 		volumeMountService,
 		volumeResizeService,
 		volumeStatsService,
diff --git a/docs/kubernetes/README.md b/docs/kubernetes/README.md
index fe85dc7..28ad2dd 100644
--- a/docs/kubernetes/README.md
+++ b/docs/kubernetes/README.md
@@ -209,9 +209,7 @@ $ kubectl apply -f https://raw.githubusercontent.com/hetznercloud/csi-driver/v2.
 
 ## Integration with Root Servers
 
-Root servers can be part of the cluster, but the CSI plugin doesn't work there.
-
-Labels are needed to indicate whether a node is a cloud VM or a dedicated server from Robot. If you are using the `hcloud-cloud-controller-manager` version 1.21.0 or later, these labels are added automatically. Otherwise, you will need to label the nodes manually.
+Root servers can be part of the cluster, but the CSI plugin doesn't work there. To ensure proper topology evaluation, labels are needed to indicate whether a node is a cloud VM or a dedicated server from Robot. If you are using the `hcloud-cloud-controller-manager` version 1.21.0 or later, these labels are added automatically. Otherwise, you will need to label the nodes manually.
 
 ### Adding labels manually
 
@@ -240,6 +238,24 @@ kubectl label nodes <node name> instance.hetzner.cloud/is-root-server=false
 kubectl label nodes <node name> instance.hetzner.cloud/is-root-server=true
 ```
 
+### Pods stuck in pending
+The current behavior of the scheduler can cause Pods to be stuck in `Pending` when using the integration with Robot servers.
+
+To address this behavior, you can set `enableProvidedByTopology` to `true` in the Helm Chart configuration. This setting prevents pods from being scheduled on nodes — specifically, Robot servers — where Hetzner volumes are unavailable. Enabling this option adds the `instance.hetzner.cloud/provided-by` label to the [allowed topologies](https://kubernetes.io/docs/concepts/storage/storage-classes/#allowed-topologies) section of the storage classes that are created. Additionally, this label is included in the `topologyKeys` section of `csinode` objects, and a node affinity is set up for each persistent volume. This workaround does not work with the [old label](#deprecated-old-label).
+
+> [!WARNING]
+> Once enabled, this feature cannot be easily disabled. It automatically adds required nodeAffinities to each volume and the topology keys to `csinode` objects. If the feature is later disabled, the topology keys are removed from the `csinode` objects, leaving volumes with required affinities that cannot be satisfied.
+
+> [!NOTE]
+> After enabling this feature, the workaround for the Kubernetes upstream issue only works on newly created volumes, as old volumes are not updated with the required node affinity.
+
+```yaml
+global:
+  enableProvidedByTopology: true
+```
+
+Further information on the upstream issue can be found [here](https://github.com/kubernetes-csi/external-provisioner/issues/544).
+
 ## Versioning policy
 
 We aim to support the latest three versions of Kubernetes. When a Kubernetes
diff --git a/internal/app/app.go b/internal/app/app.go
index 7118240..9051907 100644
--- a/internal/app/app.go
+++ b/internal/app/app.go
@@ -49,6 +49,15 @@ func CreateLogger() *slog.Logger {
 	return logger
 }
 
+// GetEnableProvidedByTopology parses the ENABLE_PROVIDED_BY_TOPOLOGY environment variable and returns false by default.
+func GetEnableProvidedByTopology() bool {
+	var enableProvidedByTopology bool
+	if featFlag, exists := os.LookupEnv("ENABLE_PROVIDED_BY_TOPOLOGY"); exists {
+		enableProvidedByTopology, _ = strconv.ParseBool(featFlag)
+	}
+	return enableProvidedByTopology
+}
+
 // CreateListener creates and binds the unix socket in location specified by the CSI_ENDPOINT environment variable.
 func CreateListener() (net.Listener, error) {
 	endpoint := os.Getenv("CSI_ENDPOINT")
diff --git a/internal/driver/controller.go b/internal/driver/controller.go
index d757214..2591a92 100644
--- a/internal/driver/controller.go
+++ b/internal/driver/controller.go
@@ -18,20 +18,23 @@ import (
 type ControllerService struct {
 	proto.UnimplementedControllerServer
 
-	logger        *slog.Logger
-	volumeService volumes.Service
-	location      string
+	logger                   *slog.Logger
+	volumeService            volumes.Service
+	location                 string
+	enableProvidedByTopology bool
 }
 
 func NewControllerService(
 	logger *slog.Logger,
 	volumeService volumes.Service,
 	location string,
+	enableProvidedByTopology bool,
 ) *ControllerService {
 	return &ControllerService{
-		logger:        logger,
-		volumeService: volumeService,
-		location:      location,
+		logger:                   logger,
+		volumeService:            volumeService,
+		location:                 location,
+		enableProvidedByTopology: enableProvidedByTopology,
 	}
 }
 
@@ -88,16 +91,22 @@ func (s *ControllerService) CreateVolume(ctx context.Context, req *proto.CreateV
 		"volume-name", volume.Name,
 	)
 
+	topology := &proto.Topology{
+		Segments: map[string]string{
+			TopologySegmentLocation: volume.Location,
+		},
+	}
+
+	if s.enableProvidedByTopology {
+		topology.Segments[ProvidedByLabel] = "cloud"
+	}
+
 	resp := &proto.CreateVolumeResponse{
 		Volume: &proto.Volume{
 			VolumeId:      strconv.FormatInt(volume.ID, 10),
 			CapacityBytes: volume.SizeBytes(),
 			AccessibleTopology: []*proto.Topology{
-				{
-					Segments: map[string]string{
-						TopologySegmentLocation: volume.Location,
-					},
-				},
+				topology,
 			},
 			VolumeContext: map[string]string{
 				"fsFormatOptions": req.Parameters["fsFormatOptions"],
diff --git a/internal/driver/controller_test.go b/internal/driver/controller_test.go
index 8b78171..d1f7c0b 100644
--- a/internal/driver/controller_test.go
+++ b/internal/driver/controller_test.go
@@ -33,6 +33,7 @@ func newControllerServiceTestEnv() *controllerServiceTestEnv {
 			logger,
 			volumeService,
 			"testloc",
+			false,
 		),
 		volumeService: volumeService,
 	}
@@ -41,6 +42,8 @@ func newControllerServiceTestEnv() *controllerServiceTestEnv {
 func TestControllerServiceCreateVolume(t *testing.T) {
 	env := newControllerServiceTestEnv()
 
+	env.service.enableProvidedByTopology = true
+
 	env.volumeService.CreateFunc = func(ctx context.Context, opts volumes.CreateOpts) (*csi.Volume, error) {
 		if opts.Name != "testvol" {
 			t.Errorf("unexpected name passed to volume service: %s", opts.Name)
@@ -94,6 +97,11 @@ func TestControllerServiceCreateVolume(t *testing.T) {
 		if loc := top.Segments[TopologySegmentLocation]; loc != "testloc" {
 			t.Errorf("unexpected location segment in topology: %s", loc)
 		}
+		if env.service.enableProvidedByTopology {
+			if provider := top.Segments[ProvidedByLabel]; provider != "cloud" {
+				t.Errorf("unexpected provider segment in topology: %s", provider)
+			}
+		}
 	} else {
 		t.Errorf("unexpected number of topologies: %d", len(resp.Volume.AccessibleTopology))
 	}
diff --git a/internal/driver/driver.go b/internal/driver/driver.go
index 8fbcd0f..ce7d8c2 100644
--- a/internal/driver/driver.go
+++ b/internal/driver/driver.go
@@ -9,4 +9,5 @@ const (
 	DefaultVolumeSize = MinVolumeSize
 
 	TopologySegmentLocation = PluginName + "/location"
+	ProvidedByLabel         = "instance.hetzner.cloud/provided-by"
 )
diff --git a/internal/driver/node.go b/internal/driver/node.go
index 5b084ea..0e03ee4 100644
--- a/internal/driver/node.go
+++ b/internal/driver/node.go
@@ -15,29 +15,32 @@ import (
 type NodeService struct {
 	proto.UnimplementedNodeServer
 
-	logger              *slog.Logger
-	serverID            string
-	serverLocation      string
-	volumeMountService  volumes.MountService
-	volumeResizeService volumes.ResizeService
-	volumeStatsService  volumes.StatsService
+	logger                   *slog.Logger
+	serverID                 string
+	serverLocation           string
+	enableProvidedByTopology bool
+	volumeMountService       volumes.MountService
+	volumeResizeService      volumes.ResizeService
+	volumeStatsService       volumes.StatsService
 }
 
 func NewNodeService(
 	logger *slog.Logger,
 	serverID string,
 	serverLocation string,
+	enableProvidedByTopology bool,
 	volumeMountService volumes.MountService,
 	volumeResizeService volumes.ResizeService,
 	volumeStatsService volumes.StatsService,
 ) *NodeService {
 	return &NodeService{
-		logger:              logger,
-		serverID:            serverID,
-		serverLocation:      serverLocation,
-		volumeMountService:  volumeMountService,
-		volumeResizeService: volumeResizeService,
-		volumeStatsService:  volumeStatsService,
+		logger:                   logger,
+		serverID:                 serverID,
+		serverLocation:           serverLocation,
+		enableProvidedByTopology: enableProvidedByTopology,
+		volumeMountService:       volumeMountService,
+		volumeResizeService:      volumeResizeService,
+		volumeStatsService:       volumeStatsService,
 	}
 }
 
@@ -181,6 +184,11 @@ func (s *NodeService) NodeGetInfo(_ context.Context, _ *proto.NodeGetInfoRequest
 			},
 		},
 	}
+
+	if s.enableProvidedByTopology {
+		resp.AccessibleTopology.Segments[ProvidedByLabel] = "cloud"
+	}
+
 	return resp, nil
 }
 
diff --git a/internal/driver/node_test.go b/internal/driver/node_test.go
index 624c2af..22a1982 100644
--- a/internal/driver/node_test.go
+++ b/internal/driver/node_test.go
@@ -35,6 +35,7 @@ func newNodeServerTestEnv() nodeServiceTestEnv {
 			testutil.NewNopLogger(),
 			"1",
 			"loc",
+			false,
 			volumeMountService,
 			volumeResizeService,
 			volumeStatsService,
diff --git a/internal/driver/sanity_test.go b/internal/driver/sanity_test.go
index 4309430..4b34609 100644
--- a/internal/driver/sanity_test.go
+++ b/internal/driver/sanity_test.go
@@ -46,6 +46,7 @@ func TestSanity(t *testing.T) {
 		logger.With("component", "driver-controller-service"),
 		volumeService,
 		"testloc",
+		false,
 	)
 
 	identityService := NewIdentityService(
@@ -56,6 +57,7 @@ func TestSanity(t *testing.T) {
 		logger.With("component", "driver-node-service"),
 		"123456",
 		"loc",
+		false,
 		volumeMountService,
 		volumeResizeService,
 		volumeStatsService,
-- 
GitLab