diff --git a/cmd/controller/main.go b/cmd/controller/main.go index a29e6e26248f03252d827cae1448042ebcd5f584..9816d441fcaab608c1450350d530709bb56c9071 100644 --- a/cmd/controller/main.go +++ b/cmd/controller/main.go @@ -30,7 +30,6 @@ func main() { } var location string - if s := os.Getenv("HCLOUD_VOLUME_DEFAULT_LOCATION"); s != "" { location = s } else { @@ -42,7 +41,7 @@ func main() { "You can set HCLOUD_VOLUME_DEFAULT_LOCATION if you want to run it somewhere else.") } - server, err := app.GetServer(logger, hcloudClient, metadataClient) + location, err = app.GetServerLocation(logger, hcloudClient, metadataClient) if err != nil { logger.Error( "failed to fetch server", @@ -50,8 +49,16 @@ func main() { ) os.Exit(1) } + } - location = server.Datacenter.Location.Name + logger.Debug( + "evaluated default location for volumes", + "location", location, + ) + + if location == "" { + logger.Error("could not set a default location for volumes") + os.Exit(1) } enableProvidedByTopology := app.GetEnableProvidedByTopology() diff --git a/docs/kubernetes/README.md b/docs/kubernetes/README.md index 28ad2dda962935bf42483e037e04ad343dd700d9..7104b8e863d03960a5863c1358194723ee4b9400 100644 --- a/docs/kubernetes/README.md +++ b/docs/kubernetes/README.md @@ -143,6 +143,15 @@ When using XFS as the filesystem type and no `fsFormatOptions` are set, we apply If you set any options at all, it is your responsible to make sure that all default flags from `mkfs.xfs` are supported on your current Linux Kernel version or that you set the flags appropriately. +### Volume Location + +During the initialization of the CSI controller, the default location for all volumes is determined based on the following prioritized methods (evaluated in order from 1 to 4). However, when `volumeBindingMode: WaitForFirstConsumer` is used, the volume's location is determined by the node where the Pod is scheduled, and the default location is not applicable. For more details, refer to the official [Kubernetes documentation](https://kubernetes.io/docs/concepts/storage/storage-classes/#volume-binding-mode). + +1. The location is explicitly set using the `HCLOUD_VOLUME_DEFAULT_LOCATION` variable. +2. The location is derived by querying a server specified by the `HCLOUD_SERVER_ID` variable. +3. If neither of the above is set, the `KUBE_NODE_NAME` environment variable defaults to the name of the node where the CSI controller is scheduled. This node name is then used to query the Hetzner API for a matching server and its location. +4. As a final fallback, the [Hetzner metadata service](https://docs.hetzner.cloud/#server-metadata) is queried to obtain the server ID, which is then used to fetch the location from the Hetzner API. + ## Upgrading To upgrade the csi-driver version, you just need to apply the new manifests to your cluster. diff --git a/internal/app/app.go b/internal/app/app.go index 9051907a1a35b96d9240b79471ee74657d26f119..e0df7bd496488070acaeb077f83d5a61cc204441 100644 --- a/internal/app/app.go +++ b/internal/app/app.go @@ -167,69 +167,97 @@ func CreateHcloudClient(metricsRegistry *prometheus.Registry, logger *slog.Logge return hcloud.NewClient(opts...), nil } -// GetServer retrieves the hcloud server the application is running on. -func GetServer(logger *slog.Logger, hcloudClient *hcloud.Client, metadataClient *metadata.Client) (*hcloud.Server, error) { - hcloudServerID, err := getServerID(logger, hcloudClient, metadataClient) +// GetServerLocation retrieves the hcloud server the application is running on. +func GetServerLocation(logger *slog.Logger, hcloudClient *hcloud.Client, metadataClient *metadata.Client) (string, error) { + // Option 1: Get from HCLOUD_SERVER_ID env + // This env would be set explicitly by the user + // If this is set and location can not be found we do not want a fallback + isSet, location, err := getLocationByEnvID(logger, hcloudClient) + if isSet { + return location, err + } + + // Option 2: Get from node name and search server list + // This env is set by default via a fieldRef on spec.nodeName + // If this is set and server can not be found we fallback to the metadata fallback + location, err = getLocationByEnvNodeName(logger, hcloudClient) if err != nil { - return nil, err + return "", err } - logger.Debug("fetching server") - server, _, err := hcloudClient.Server.GetByID(context.Background(), hcloudServerID) + if location != "" { + return location, nil + } + + // Option 3: Metadata service as fallback + return getLocationFromMetadata(logger, metadataClient) +} + +func getLocationByEnvID(logger *slog.Logger, hcloudClient *hcloud.Client) (bool, string, error) { + envID := os.Getenv("HCLOUD_SERVER_ID") + if envID == "" { + return false, "", nil + } + + id, err := strconv.ParseInt(envID, 10, 64) if err != nil { - return nil, err + return true, "", fmt.Errorf("invalid server id in HCLOUD_SERVER_ID env var: %s", envID) } - // Cover potential cases where the server is not found. This results in a - // nil server object and nil error. If we do not do this, we will panic - // when trying to log the server.Name. + logger.Debug( + "using server id from HCLOUD_SERVER_ID env var", + "server-id", id, + ) + + server, _, err := hcloudClient.Server.GetByID(context.Background(), id) + if err != nil { + return true, "", err + } if server == nil { - return nil, errors.New("could not determine server") + return true, "", fmt.Errorf("HCLOUD_SERVER_ID is set to %d, but no server could be found", id) } - logger.Info("fetched server", "server-name", server.Name) - - return server, nil + return true, server.Datacenter.Location.Name, nil } -func getServerID(logger *slog.Logger, hcloudClient *hcloud.Client, metadataClient *metadata.Client) (int64, error) { - if s := os.Getenv("HCLOUD_SERVER_ID"); s != "" { - id, err := strconv.ParseInt(s, 10, 64) - if err != nil { - return 0, fmt.Errorf("invalid server id in HCLOUD_SERVER_ID env var: %s", err) - } - logger.Debug( - "using server id from HCLOUD_SERVER_ID env var", - "server-id", id, - ) - return id, nil +func getLocationByEnvNodeName(logger *slog.Logger, hcloudClient *hcloud.Client) (string, error) { + nodeName := os.Getenv("KUBE_NODE_NAME") + if nodeName == "" { + return "", nil } - if s := os.Getenv("KUBE_NODE_NAME"); s != "" { - server, _, err := hcloudClient.Server.GetByName(context.Background(), s) - if err != nil { - return 0, fmt.Errorf("error while getting server through node name: %s", err) - } - if server != nil { - logger.Debug( - "using server name from KUBE_NODE_NAME env var", - "server-id", server.ID, - ) - return server.ID, nil - } + server, _, err := hcloudClient.Server.GetByName(context.Background(), nodeName) + if err != nil { + return "", fmt.Errorf("error while getting server through node name: %s", err) + } + if server != nil { logger.Debug( - "server not found by name, fallback to metadata service", - "err", err, + "fetched server via server name from KUBE_NODE_NAME env var", + "server-id", server.ID, ) + return server.Datacenter.Location.Name, nil } - logger.Debug( - "getting instance id from metadata service", + logger.Info( + "KUBE_NODE_NAME is set, but no server could be found", + "KUBE_NODE_NAME", nodeName, ) - id, err := metadataClient.InstanceID() + + return "", nil +} + +func getLocationFromMetadata(logger *slog.Logger, metadataClient *metadata.Client) (string, error) { + logger.Debug("getting location from metadata service") + availabilityZone, err := metadataClient.AvailabilityZone() if err != nil { - return 0, fmt.Errorf("failed to get instance id from metadata service: %s", err) + return "", fmt.Errorf("failed to get location from metadata service: %s", err) + } + + parts := strings.Split(availabilityZone, "-") + if len(parts) != 2 { + return "", fmt.Errorf("availability zone from metadata service is not in the correct format, got: %s", availabilityZone) } - return id, nil + + return parts[0], nil } func CreateGRPCServer(logger *slog.Logger, metricsInterceptor grpc.UnaryServerInterceptor) *grpc.Server {