diff --git a/.github/workflows/test_e2e.yml b/.github/workflows/test_e2e.yml index 4e90eed661760f5c895e55c1275584e2cad8f71e..0379944f54b9960ce9868931517ae03f9a86d1e0 100644 --- a/.github/workflows/test_e2e.yml +++ b/.github/workflows/test_e2e.yml @@ -1,11 +1,11 @@ -name: Run e2e tests +name: e2e tests on: pull_request: {} push: branches: [main] jobs: - k3s: - name: k3s ${{ matrix.k3s }} + cloud: + name: Cloud ${{ matrix.k3s }} permissions: id-token: write runs-on: ubuntu-latest @@ -51,6 +51,7 @@ jobs: # make exported env variables available to following jobs echo "KUBECONFIG=$KUBECONFIG" >> "$GITHUB_ENV" echo "SKAFFOLD_DEFAULT_REPO=$SKAFFOLD_DEFAULT_REPO" >> "$GITHUB_ENV" + echo "CONTROL_IP=$CONTROL_IP" >> "$GITHUB_ENV" - name: Build and Deploy HCCM run: | @@ -102,3 +103,127 @@ jobs: with: name: debug-logs-${{ env.SCOPE }} path: debug-logs/ + + robot: + name: Robot + permissions: + id-token: write + + # Make sure that only one Job is using the server at a time + concurrency: robot-test-server + environment: e2e-robot + + env: + K3S_CHANNEL: v1.28 + SCOPE: gha-${{ github.run_id }}-${{ github.run_attempt }}-robot + + # Disable routes in dev-env, not supported for Robot. + ROUTES_ENABLED: "false" + ROBOT_ENABLED: "true" + SERVER_NUMBER: ${{ vars.SERVER_NUMBER }} + + runs-on: ubuntu-latest + steps: + - uses: actions/setup-go@v4 + with: + go-version: "1.21" + - uses: actions/checkout@master + - uses: hetznercloud/tps-action@main + with: + token: ${{ secrets.HCLOUD_TOKEN }} + - uses: 3bit/setup-hcloud@v2 + - uses: yokawasa/action-setup-kube-tools@v0.9.3 + with: + setup-tools: | + helm + kubectl + skaffold + helm: v3.11.2 + kubectl: v1.28.1 + skaffold: v2.3.0 + - name: Install k3sup + run: | + curl -sLS https://get.k3sup.dev | sh + + - name: Setup test environment + env: + ROBOT_USER: ${{ secrets.ROBOT_USER }} + ROBOT_PASSWORD: ${{ secrets.ROBOT_PASSWORD }} + run: | + source <(hack/dev-up.sh) + + # make exported env variables available to following jobs + echo "KUBECONFIG=$KUBECONFIG" >> "$GITHUB_ENV" + echo "SKAFFOLD_DEFAULT_REPO=$SKAFFOLD_DEFAULT_REPO" >> "$GITHUB_ENV" + echo "CONTROL_IP=$CONTROL_IP" >> "$GITHUB_ENV" + + - name: Build and Deploy HCCM + run: | + skaffold build --tag="e2e-${GITHUB_RUN_ID}-${GITHUB_RUN_NUMBER}" + tag=$(skaffold build --tag="e2e-${GITHUB_RUN_ID}-${GITHUB_RUN_NUMBER}" --quiet --output="{{ (index .Builds 0).Tag }}") + skaffold deploy \ + --profile=robot \ + --images=hetznercloud/hcloud-cloud-controller-manager=$tag + + - name: Setup Robot Server + env: + ROBOT_USER: ${{ secrets.ROBOT_USER }} + ROBOT_PASSWORD: ${{ secrets.ROBOT_PASSWORD }} + + # Nicer output + PY_COLORS: true + ANSIBLE_FORCE_COLOR: true + working-directory: hack/robot-e2e + run: | + ansible-galaxy install -r requirements.yml + echo "::group::ansible-playbook e2e-setup-robot-server.yml" + ansible-playbook e2e-setup-robot-server.yml -e scope=$SCOPE -e server_number=$SERVER_NUMBER -vvv + echo "::endgroup::" + + - name: Run tests + env: + ROBOT_USER: ${{ secrets.ROBOT_USER }} + ROBOT_PASSWORD: ${{ secrets.ROBOT_PASSWORD }} + run: | + go test ./tests/e2e -tags e2e,robot -v -timeout 60m + + - name: Download logs & events + if: always() + continue-on-error: true + run: | + mkdir debug-logs + kubectl logs \ + --namespace kube-system \ + --selector app.kubernetes.io/name=hcloud-cloud-controller-manager \ + --all-containers \ + --prefix=true \ + --tail=-1 \ + > debug-logs/hccm.log + + kubectl get events \ + --all-namespaces \ + --sort-by=.firstTimestamp \ + --output yaml \ + > debug-logs/events.yaml + + - name: Show HCCM Logs on Failure + if: failure() + continue-on-error: true + run: | + echo "::group::hccm.log" + cat debug-logs/hccm.log + echo "::endgroup::" + + - name: Cleanup test environment + if: always() + continue-on-error: true + run: | + hack/dev-down.sh + + - name: Persist debug artifacts + if: always() + continue-on-error: true + uses: actions/upload-artifact@v3 + with: + name: debug-logs-${{ env.SCOPE }} + path: debug-logs/ diff --git a/.golangci.yaml b/.golangci.yaml index 5e28abcee98772f1e4a64c3f50e1f90b6840a07b..bc44f45d8c4913c3be89aa3739ff8f8dc8b1b772 100644 --- a/.golangci.yaml +++ b/.golangci.yaml @@ -19,6 +19,11 @@ linters-settings: - pkg: k8s.io/apimachinery/pkg/apis/meta/v1 alias: metav1 + - pkg: github.com/syself/hrobot-go + alias: hrobot + - pkg: github.com/syself/hrobot-go/models + alias: hrobotmodels + misspell: locale: "US" @@ -58,3 +63,4 @@ issues: - path: internal/mocks linters: - unparam + - revive diff --git a/README.md b/README.md index 40f79f9750a75b466831b8ec5f170b663bf378c5..305a604e0a18b3f49997068f941ac83b2ffa2770 100644 --- a/README.md +++ b/README.md @@ -254,6 +254,15 @@ alias kgp="kubectl get pods" alias kgs="kubectl get services" ``` +The test suite is split in three parts: + +- **General Part**: Sets up the test env & checks if the HCCM Pod is properly running + - Build Tag: `e2e` +- **Cloud Part**: Tests regular functionality against a Cloud-only environment + - Build Tag: `e2e && !robot` +- **Robot Part**: Tests Robot functionality against a Cloud+Robot environment + - Build Tag: `e2e && robot` + ## Local test setup This repository provides [skaffold](https://skaffold.dev/) to easily deploy / debug this controller on demand diff --git a/chart/templates/daemonset.yaml b/chart/templates/daemonset.yaml index c6cfa438e479eb78fe33cddb1c03b6660e152595..12242b8b40af2af2ee07df92844f910ec18ae8b7 100644 --- a/chart/templates/daemonset.yaml +++ b/chart/templates/daemonset.yaml @@ -73,6 +73,10 @@ spec: - name: HCLOUD_METRICS_ENABLED value: "false" {{- end }} + {{- if $.Values.robot.enabled }} + - name: ROBOT_ENABLED + value: "true" + {{- end }} image: {{ $.Values.image.repository }}:{{ tpl $.Values.image.tag . }} # x-release-please-version ports: {{- if $.Values.monitoring.enabled }} diff --git a/chart/templates/deployment.yaml b/chart/templates/deployment.yaml index 9b39671e763a32b91b155bfc740b454703ae87ec..b59daa2a89b7c5e197e5f7aa86de27f73c6635f7 100644 --- a/chart/templates/deployment.yaml +++ b/chart/templates/deployment.yaml @@ -77,6 +77,10 @@ spec: - name: HCLOUD_METRICS_ENABLED value: "false" {{- end }} + {{- if $.Values.robot.enabled }} + - name: ROBOT_ENABLED + value: "true" + {{- end }} image: {{ $.Values.image.repository }}:{{ tpl $.Values.image.tag . }} # x-release-please-version ports: {{- if $.Values.monitoring.enabled }} diff --git a/chart/values.yaml b/chart/values.yaml index c7fc4518fa881cf57a27e87f349c0b3dae8a1d4d..f475dffcc14ff1896e5d711a5f62abc07796d3c0 100644 --- a/chart/values.yaml +++ b/chart/values.yaml @@ -24,12 +24,27 @@ env: # The following two variables should *not* be set here: # HCLOUD_METRICS_ENABLED - see monitoring.enabled # HCLOUD_NETWORK - see networking.enabled + # ROBOT_ENABLED - see robot.enabled + HCLOUD_TOKEN: valueFrom: secretKeyRef: name: hcloud key: token + ROBOT_USER: + valueFrom: + secretKeyRef: + name: hcloud + key: robot-user + optional: true + ROBOT_PASSWORD: + valueFrom: + secretKeyRef: + name: hcloud + key: robot-password + optional: true + image: repository: hetznercloud/hcloud-cloud-controller-manager tag: '{{ $.Chart.Version }}' @@ -81,3 +96,7 @@ additionalTolerations: [] nodeSelector: {} # node-role.kubernetes.io/control-plane: "" + +robot: + # Set to true to enable support for Robot (Dedicated) servers. + enabled: false diff --git a/deploy/ccm-networks.yaml b/deploy/ccm-networks.yaml index b227a2a36130463acdde384ab74c880caa4b3c66..40eef144d8da706386a9208e9ac6fdb56c5f9f01 100644 --- a/deploy/ccm-networks.yaml +++ b/deploy/ccm-networks.yaml @@ -75,6 +75,18 @@ spec: secretKeyRef: key: token name: hcloud + - name: ROBOT_PASSWORD + valueFrom: + secretKeyRef: + key: robot-password + name: hcloud + optional: true + - name: ROBOT_USER + valueFrom: + secretKeyRef: + key: robot-user + name: hcloud + optional: true - name: HCLOUD_NETWORK valueFrom: secretKeyRef: diff --git a/deploy/ccm.yaml b/deploy/ccm.yaml index 60c38d78c807cc6ca6b5bade94be9b54bcc630f6..e6e3c27e4725d26c8a200eb8c1e5b4379e1f3909 100644 --- a/deploy/ccm.yaml +++ b/deploy/ccm.yaml @@ -72,6 +72,18 @@ spec: secretKeyRef: key: token name: hcloud + - name: ROBOT_PASSWORD + valueFrom: + secretKeyRef: + key: robot-password + name: hcloud + optional: true + - name: ROBOT_USER + valueFrom: + secretKeyRef: + key: robot-user + name: hcloud + optional: true image: hetznercloud/hcloud-cloud-controller-manager:v1.18.0 # x-release-please-version ports: - name: metrics diff --git a/go.mod b/go.mod index 2769d875148b2869c15f386475453daa22bee34a..1af19b13c5ee18d2e07b83acdbde0fe174587056 100644 --- a/go.mod +++ b/go.mod @@ -7,6 +7,7 @@ require ( github.com/prometheus/client_golang v1.17.0 github.com/spf13/pflag v1.0.5 github.com/stretchr/testify v1.8.4 + github.com/syself/hrobot-go v0.2.5 k8s.io/api v0.28.4 k8s.io/apimachinery v0.28.4 k8s.io/client-go v0.28.4 diff --git a/go.sum b/go.sum index 178d55712a1ee3342e3e5b6bef4ea9a65b8d990e..2faa769da1bf25aed82bfd50ecef1c5ecfeb8f37 100644 --- a/go.sum +++ b/go.sum @@ -108,10 +108,7 @@ github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeME github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU= github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= github.com/go-gl/glfw/v3.3/glfw v0.0.0-20200222043503-6f7a984d4dc4/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= -github.com/go-logr/logr v1.2.0/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= -github.com/go-logr/logr v1.2.4 h1:g01GSCwiDw2xSZfjJ2/T9M+S6pFdcNtFYsp+Y43HYDQ= -github.com/go-logr/logr v1.2.4/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/logr v1.3.0 h1:2y3SDp0ZXuc6/cjLSZ+Q3ir+QB9T/iG5yYRXqsagWSY= github.com/go-logr/logr v1.3.0/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= @@ -301,6 +298,8 @@ github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= +github.com/syself/hrobot-go v0.2.5 h1:Zs7GDFRd6fDn4YHYE9e5CGtRm6KYmMZwMMnm7OC/09g= +github.com/syself/hrobot-go v0.2.5/go.mod h1:Oy47yZs+fJKcSh38S3OiNJdY34MXb0pkk796UnpYBnc= github.com/tmc/grpc-websocket-proxy v0.0.0-20220101234140-673ab2c3ae75 h1:6fotK7otjonDflCTK0BCfls4SPy3NcCVb5dqqmbRknE= github.com/tmc/grpc-websocket-proxy v0.0.0-20220101234140-673ab2c3ae75/go.mod h1:KO6IkyS8Y3j8OdNO85qEYBsRPuteD+YciPomcXdrMnk= github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2 h1:eY9dn8+vbi4tKz5Qo6v2eYzo7kUS51QINcR5jNpbZS8= @@ -667,44 +666,24 @@ honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWh honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg= honnef.co/go/tools v0.0.1-2020.1.3/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= honnef.co/go/tools v0.0.1-2020.1.4/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= -k8s.io/api v0.28.3 h1:Gj1HtbSdB4P08C8rs9AR94MfSGpRhJgsS+GF9V26xMM= -k8s.io/api v0.28.3/go.mod h1:MRCV/jr1dW87/qJnZ57U5Pak65LGmQVkKTzf3AtKFHc= k8s.io/api v0.28.4 h1:8ZBrLjwosLl/NYgv1P7EQLqoO8MGQApnbgH8tu3BMzY= k8s.io/api v0.28.4/go.mod h1:axWTGrY88s/5YE+JSt4uUi6NMM+gur1en2REMR7IRj0= -k8s.io/apimachinery v0.28.3 h1:B1wYx8txOaCQG0HmYF6nbpU8dg6HvA06x5tEffvOe7A= -k8s.io/apimachinery v0.28.3/go.mod h1:uQTKmIqs+rAYaq+DFaoD2X7pcjLOqbQX2AOiO0nIpb8= k8s.io/apimachinery v0.28.4 h1:zOSJe1mc+GxuMnFzD4Z/U1wst50X28ZNsn5bhgIIao8= k8s.io/apimachinery v0.28.4/go.mod h1:wI37ncBvfAoswfq626yPTe6Bz1c22L7uaJ8dho83mgg= -k8s.io/apiserver v0.28.3 h1:8Ov47O1cMyeDzTXz0rwcfIIGAP/dP7L8rWbEljRcg5w= -k8s.io/apiserver v0.28.3/go.mod h1:YIpM+9wngNAv8Ctt0rHG4vQuX/I5rvkEMtZtsxW2rNM= k8s.io/apiserver v0.28.4 h1:BJXlaQbAU/RXYX2lRz+E1oPe3G3TKlozMMCZWu5GMgg= k8s.io/apiserver v0.28.4/go.mod h1:Idq71oXugKZoVGUUL2wgBCTHbUR+FYTWa4rq9j4n23w= -k8s.io/client-go v0.28.3 h1:2OqNb72ZuTZPKCl+4gTKvqao0AMOl9f3o2ijbAj3LI4= -k8s.io/client-go v0.28.3/go.mod h1:LTykbBp9gsA7SwqirlCXBWtK0guzfhpoW4qSm7i9dxo= k8s.io/client-go v0.28.4 h1:Np5ocjlZcTrkyRJ3+T3PkXDpe4UpatQxj85+xjaD2wY= k8s.io/client-go v0.28.4/go.mod h1:0VDZFpgoZfelyP5Wqu0/r/TRYcLYuJ2U1KEeoaPa1N4= -k8s.io/cloud-provider v0.28.3 h1:9u+JjA3zIn0nqLOOa8tWnprFkffguSAhfBvo8p7LhBQ= -k8s.io/cloud-provider v0.28.3/go.mod h1:shAJxdrKu+SwwGUhkodxByPjaH8KBFZqXo6jU1F0ehI= k8s.io/cloud-provider v0.28.4 h1:7obmeuJJ5CYTO9HANDqemf/d2v95U+F0t8aeH4jNOsQ= k8s.io/cloud-provider v0.28.4/go.mod h1:xbhmGZ7wRHgXFP3SNsvdmFRO87KJIvirDYQA5ydMgGA= -k8s.io/component-base v0.28.3 h1:rDy68eHKxq/80RiMb2Ld/tbH8uAE75JdCqJyi6lXMzI= -k8s.io/component-base v0.28.3/go.mod h1:fDJ6vpVNSk6cRo5wmDa6eKIG7UlIQkaFmZN2fYgIUD8= k8s.io/component-base v0.28.4 h1:c/iQLWPdUgI90O+T9TeECg8o7N3YJTiuz2sKxILYcYo= k8s.io/component-base v0.28.4/go.mod h1:m9hR0uvqXDybiGL2nf/3Lf0MerAfQXzkfWhUY58JUbU= -k8s.io/component-helpers v0.28.3 h1:te9ieTGzcztVktUs92X53P6BamAoP73MK0qQP0WmDqc= -k8s.io/component-helpers v0.28.3/go.mod h1:oJR7I9ist5UAQ3y/CTdbw6CXxdMZ1Lw2Ua/EZEwnVLs= k8s.io/component-helpers v0.28.4 h1:+X9VXT5+jUsRdC26JyMZ8Fjfln7mSjgumafocE509C4= k8s.io/component-helpers v0.28.4/go.mod h1:8LzMalOQ0K10tkBJWBWq8h0HTI9HDPx4WT3QvTFn9Ro= -k8s.io/controller-manager v0.28.3 h1:2s0wBvrGuRwMYEnl5Ed+qkK1kAfZR6H+0Ut1R2tHLRg= -k8s.io/controller-manager v0.28.3/go.mod h1:lYu5hxBVmfK5NrpmeVrioPH4ROnE4OxmUM3xx6JWlLs= k8s.io/controller-manager v0.28.4 h1:8uJmo1pD6fWYk4mC/JfZQU6zPvuCgEHf3pd5G39ldDU= k8s.io/controller-manager v0.28.4/go.mod h1:pnO+UK2mcWNu1MxucqI8xHPD/8UBm04IUmp2u/3vbnM= -k8s.io/klog/v2 v2.100.1 h1:7WCHKK6K8fNhTqfBhISHQ97KrnJNFZMcQvKp7gP/tmg= -k8s.io/klog/v2 v2.100.1/go.mod h1:y1WjHnz7Dj687irZUWR/WLkLc5N1YHtjLdmgWjndZn0= k8s.io/klog/v2 v2.110.1 h1:U/Af64HJf7FcwMcXyKm2RPM22WZzyR7OSpYj5tg3cL0= k8s.io/klog/v2 v2.110.1/go.mod h1:YGtd1984u+GgbuZ7e08/yBuAfKLSO0+uR1Fhi6ExXjo= -k8s.io/kms v0.28.3 h1:jYwwAe96XELNjYWv1G4kNzizcFoZ50OOElvPansbw70= -k8s.io/kms v0.28.3/go.mod h1:kSMjU2tg7vjqqoWVVCcmPmNZ/CofPsoTbSxAipCvZuE= k8s.io/kms v0.28.4 h1:PMgY/3CQTWP9eIKmNQiTgjLIZ0ns6O+voagzD2/4mSg= k8s.io/kms v0.28.4/go.mod h1:HL4/lR/bhjAJPbqycKtfhWiKh1Sp21cpHOL8P4oo87w= k8s.io/kube-openapi v0.0.0-20230717233707-2695361300d9 h1:LyMgNKD2P8Wn1iAwQU5OhxCKlKJy0sHc+PcDwFB24dQ= diff --git a/hack/dev-up.sh b/hack/dev-up.sh index 921672c998e4b79373349eac3fa94d20787ca90d..b4596711f82500fd1844b1f48f8532d7871654f9 100755 --- a/hack/dev-up.sh +++ b/hack/dev-up.sh @@ -164,7 +164,17 @@ if [[ -n "${DEBUG:-}" ]]; then set -x; fi # Create HCLOUD_TOKEN Secret for hcloud-cloud-controller-manager. ( trap error ERR if ! kubectl -n kube-system get secret hcloud >/dev/null 2>&1; then - kubectl -n kube-system create secret generic hcloud --from-literal="token=$HCLOUD_TOKEN" --from-literal="network=$scope_name" + data=( + --from-literal="token=$HCLOUD_TOKEN" + --from-literal="network=$scope_name" + ) + if [[ -v ROBOT_USER ]]; then + data+=( + --from-literal="robot-user=$ROBOT_USER" + --from-literal="robot-password=$ROBOT_PASSWORD" + ) + fi + kubectl -n kube-system create secret generic hcloud "${data[@]}" fi) & wait ) & @@ -172,8 +182,10 @@ if [[ -n "${DEBUG:-}" ]]; then set -x; fi echo "Success - cluster fully initialized and ready, why not see for yourself?" echo '$ kubectl get nodes' kubectl get nodes + export CONTROL_IP=$(hcloud server ip "$scope_name-1") } >&2 echo "export KUBECONFIG=$KUBECONFIG" $SCRIPT_DIR/registry-port-forward.sh echo "export SKAFFOLD_DEFAULT_REPO=localhost:30666" +echo "export CONTROL_IP=$CONTROL_IP" diff --git a/hack/robot-e2e/ansible.cfg b/hack/robot-e2e/ansible.cfg new file mode 100644 index 0000000000000000000000000000000000000000..0fdad75d3f22f63d41674810876bb4e98e35bbf3 --- /dev/null +++ b/hack/robot-e2e/ansible.cfg @@ -0,0 +1,7 @@ +[defaults] +inventory = ${PWD}/inventory.yml +host_key_checking = False +stdout_callback = community.general.yaml + +[ssh_connection] +pipelining = True diff --git a/hack/robot-e2e/autosetup.j2 b/hack/robot-e2e/autosetup.j2 new file mode 100644 index 0000000000000000000000000000000000000000..473a629f652590b658f91a45896730d6af996cff --- /dev/null +++ b/hack/robot-e2e/autosetup.j2 @@ -0,0 +1,15 @@ +HOSTNAME {{ server_name }} + +DRIVE1 /dev/sda +DRIVE2 /dev/sdb + +# We do not care at all about data consistency/availability, as we reprovision for every test run +SWRAID 1 +SWRAIDLEVEL 0 + +BOOTLOADER grub + +PART /boot ext3 1024M +PART / ext4 all + +IMAGE /root/.oldroot/nfs/images/Ubuntu-2204-jammy-amd64-base.tar.gz diff --git a/hack/robot-e2e/e2e-setup-robot-server.yml b/hack/robot-e2e/e2e-setup-robot-server.yml new file mode 100644 index 0000000000000000000000000000000000000000..4414965d1a1e4ae0bca7b4e8ee1d8b69f4cbfbb9 --- /dev/null +++ b/hack/robot-e2e/e2e-setup-robot-server.yml @@ -0,0 +1,101 @@ +--- +- name: Prepare Reinstall + hosts: localhost + connection: local + gather_facts: false + + vars: + scope: dev + # Additional SSH keys to add to the server for debugging. Must already exist in Robot. + authorized_keys: [] + + module_defaults: + group/community.hrobot.robot: + hetzner_user: "{{ lookup('ansible.builtin.env', 'ROBOT_USER') }}" + hetzner_password: "{{ lookup('ansible.builtin.env', 'ROBOT_PASSWORD') }}" + + tasks: + - name: Get Server Info + community.hrobot.server_info: + server_number: "{{ server_number }}" + register: server_info + + - name: Set Server Facts + ansible.builtin.set_fact: + server_ip: "{{ server_info.servers[0].server_ip }}" + server_name: "{{ server_info.servers[0].server_name }}" + + - name: Create SSH Key + community.hrobot.ssh_key: + name: "hccm-{{ scope }}" + public_key: "{{ lookup('file', '../.ssh-{{ scope }}.pub') }}" + state: present + register: ssh_key + + - name: Enable Rescue System + community.hrobot.boot: + server_number: "{{ server_number }}" + rescue: + authorized_keys: "{{ authorized_keys + [ ssh_key.fingerprint ] }}" + os: linux + + - name: Reset Server (to get to Rescue System) + community.hrobot.reset: + server_number: "{{ server_number }}" + reset_type: hardware # only type that does not require a separate reset for starting again + + - name: Wait for SSH + ansible.builtin.wait_for: + host: "{{ server_ip }}" + port: "{{ 22 }}" + search_regex: SSH + +- name: Install OS to Server + hosts: all + gather_facts: false + tasks: + - name: Write autosetup + ansible.builtin.template: + src: autosetup.j2 + dest: /autosetup + vars: + server_name: "{{ hostvars['localhost']['server_name'] }}" + + - name: installimage + # -t => Take over rescue system SSH public keys + ansible.builtin.command: /root/.oldroot/nfs/install/installimage -t yes + + - name: Reboot + ansible.builtin.reboot: + # 5 minutes should be enough for a reboot, and in case + # there is some issue, we can abort earlier. + reboot_timeout: 300 + + - name: Create k3s directory + ansible.builtin.file: + path: /etc/rancher/k3s + state: directory + + - name: Prepare Local Registry + ansible.builtin.copy: + src: ../k3s-registries.yaml + dest: /etc/rancher/k3s/registries.yaml + +- name: Join Kubernetes Cluster + hosts: localhost + connection: local + gather_facts: false + vars: + control_ip: "{{ lookup('ansible.builtin.env', 'CONTROL_IP') }}" + k3s_channel: stable + scope: dev + + tasks: + - name: k3sup + ansible.builtin.command: >- + k3sup join + --server-ip={{ control_ip | ansible.builtin.mandatory }} + --ip={{ server_ip }} + --k3s-channel={{ k3s_channel }} + --k3s-extra-args="--kubelet-arg cloud-provider=external --node-label instance.hetzner.cloud/is-root-server=true" + --ssh-key ../.ssh-{{ scope }} diff --git a/hack/robot-e2e/inventory.yml b/hack/robot-e2e/inventory.yml new file mode 100644 index 0000000000000000000000000000000000000000..c3f8c2f024eff0095fa9f6e5d5623eb6bb60783e --- /dev/null +++ b/hack/robot-e2e/inventory.yml @@ -0,0 +1,8 @@ +all: + hosts: + # TODO: Dynamic inventory + hccm-test: + ansible_host: 142.132.203.104 + ansible_user: root + ansible_ssh_private_key_file: ../.ssh-{{ scope }} + ansible_ssh_common_args: '-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null' diff --git a/hack/robot-e2e/requirements.yml b/hack/robot-e2e/requirements.yml new file mode 100644 index 0000000000000000000000000000000000000000..d2df60aa8b0f38634e51244937a5bf4f4a06f32d --- /dev/null +++ b/hack/robot-e2e/requirements.yml @@ -0,0 +1,3 @@ +collections: + - name: community.hrobot + - name: community.general \ No newline at end of file diff --git a/hcloud/cloud.go b/hcloud/cloud.go index 35c487fe3b67fe9ccd3774128a4d2696fd181ec8..bd467e6dd4981ce65cc69db7d5f2728b1cab6503 100644 --- a/hcloud/cloud.go +++ b/hcloud/cloud.go @@ -23,12 +23,14 @@ import ( "os" "strings" + hrobot "github.com/syself/hrobot-go" cloudprovider "k8s.io/cloud-provider" "k8s.io/klog/v2" "github.com/hetznercloud/hcloud-cloud-controller-manager/internal/config" "github.com/hetznercloud/hcloud-cloud-controller-manager/internal/hcops" "github.com/hetznercloud/hcloud-cloud-controller-manager/internal/metrics" + "github.com/hetznercloud/hcloud-cloud-controller-manager/internal/robot" "github.com/hetznercloud/hcloud-go/v2/hcloud" "github.com/hetznercloud/hcloud-go/v2/hcloud/metadata" ) @@ -41,9 +43,10 @@ const ( var providerVersion = "unknown" type cloud struct { - client *hcloud.Client - cfg config.HCCMConfiguration - networkID int64 + client *hcloud.Client + robotClient robot.Client + cfg config.HCCMConfiguration + networkID int64 } func newCloud(_ io.Reader) (cloudprovider.Interface, error) { @@ -79,6 +82,12 @@ func newCloud(_ io.Reader) (cloudprovider.Interface, error) { client := hcloud.NewClient(opts...) metadataClient := metadata.NewClient() + var robotClient robot.Client + if cfg.Robot.Enabled { + c := hrobot.NewBasicAuthClient(cfg.Robot.User, cfg.Robot.Password) + robotClient = robot.NewClient(c, cfg.Robot.CacheTimeout) + } + var networkID int64 if cfg.Network.NameOrID != "" { n, _, err := client.Network.Get(context.Background(), cfg.Network.NameOrID) @@ -110,9 +119,10 @@ func newCloud(_ io.Reader) (cloudprovider.Interface, error) { klog.Infof("Hetzner Cloud k8s cloud controller %s started\n", providerVersion) return &cloud{ - client: client, - cfg: cfg, - networkID: networkID, + client: client, + robotClient: robotClient, + cfg: cfg, + networkID: networkID, }, nil } @@ -125,7 +135,7 @@ func (c *cloud) Instances() (cloudprovider.Instances, bool) { } func (c *cloud) InstancesV2() (cloudprovider.InstancesV2, bool) { - return newInstances(c.client, c.cfg.Instance.AddressFamily, c.networkID), true + return newInstances(c.client, c.robotClient, c.cfg.Instance.AddressFamily, c.networkID), true } func (c *cloud) Zones() (cloudprovider.Zones, bool) { @@ -140,6 +150,7 @@ func (c *cloud) LoadBalancer() (cloudprovider.LoadBalancer, bool) { lbOps := &hcops.LoadBalancerOps{ LBClient: &c.client.LoadBalancer, + RobotClient: c.robotClient, CertOps: &hcops.CertificateOps{CertClient: &c.client.Certificate}, ActionClient: &c.client.Action, NetworkClient: &c.client.Network, diff --git a/hcloud/cloud_test.go b/hcloud/cloud_test.go index 5dc7e1fe90f944f5a8eaab05909ce4024e5f04f9..5400b7c4652316bcdef28a5eddab07ef9c008650 100644 --- a/hcloud/cloud_test.go +++ b/hcloud/cloud_test.go @@ -26,6 +26,7 @@ import ( "time" "github.com/stretchr/testify/assert" + hrobot "github.com/syself/hrobot-go" "github.com/hetznercloud/hcloud-cloud-controller-manager/internal/testsupport" "github.com/hetznercloud/hcloud-go/v2/hcloud" @@ -33,9 +34,10 @@ import ( ) type testEnv struct { - Server *httptest.Server - Mux *http.ServeMux - Client *hcloud.Client + Server *httptest.Server + Mux *http.ServeMux + Client *hcloud.Client + RobotClient hrobot.RobotClient } func (env *testEnv) Teardown() { @@ -43,6 +45,7 @@ func (env *testEnv) Teardown() { env.Server = nil env.Mux = nil env.Client = nil + env.RobotClient = nil } func newTestEnv() testEnv { @@ -54,10 +57,13 @@ func newTestEnv() testEnv { hcloud.WithBackoffFunc(func(_ int) time.Duration { return 0 }), hcloud.WithDebugWriter(os.Stdout), ) + robotClient := hrobot.NewBasicAuthClient("", "") + robotClient.SetBaseURL(server.URL + "/robot") return testEnv{ - Server: server, - Mux: mux, - Client: client, + Server: server, + Mux: mux, + Client: client, + RobotClient: robotClient, } } @@ -131,13 +137,15 @@ func TestCloud(t *testing.T) { "HCLOUD_ENDPOINT", env.Server.URL, "HCLOUD_TOKEN", "jr5g7ZHpPptyhJzZyHw2Pqu4g9gTqDvEceYpngPf79jN_NOT_VALID_dzhepnahq", "HCLOUD_METRICS_ENABLED", "false", + "ROBOT_USER", "user", + "ROBOT_PASSWORD", "pass123", ) defer resetEnv() env.Mux.HandleFunc("/servers", func(w http.ResponseWriter, r *http.Request) { json.NewEncoder(w).Encode( schema.ServerListResponse{ Servers: []schema.Server{ - schema.Server{ + { ID: 1, Name: "test", Status: "running", @@ -232,6 +240,8 @@ func TestCloud(t *testing.T) { "HCLOUD_NETWORK", "1", "HCLOUD_NETWORK_DISABLE_ATTACHED_CHECK", "true", "HCLOUD_METRICS_ENABLED", "false", + "ROBOT_USER", "", + "ROBOT_PASSWORD", "", ) defer resetEnv() diff --git a/hcloud/instances.go b/hcloud/instances.go index 877cbfd1e94688ba2f2ff727f6a38c086d6b336b..3148d6b3127d04e721aa2760eb6ba4ac72c4cc94 100644 --- a/hcloud/instances.go +++ b/hcloud/instances.go @@ -18,50 +18,107 @@ package hcloud import ( "context" + "errors" "fmt" + hrobotmodels "github.com/syself/hrobot-go/models" corev1 "k8s.io/api/core/v1" cloudprovider "k8s.io/cloud-provider" "github.com/hetznercloud/hcloud-cloud-controller-manager/internal/config" "github.com/hetznercloud/hcloud-cloud-controller-manager/internal/metrics" "github.com/hetznercloud/hcloud-cloud-controller-manager/internal/providerid" + "github.com/hetznercloud/hcloud-cloud-controller-manager/internal/robot" "github.com/hetznercloud/hcloud-go/v2/hcloud" ) type instances struct { client *hcloud.Client + robotClient robot.Client addressFamily config.AddressFamily networkID int64 } -func newInstances(client *hcloud.Client, addressFamily config.AddressFamily, networkID int64) *instances { - return &instances{client, addressFamily, networkID} +var ( + errServerNotFound = errors.New("server not found") + errMissingRobotClient = errors.New("no robot client configured, make sure to enable Robot support in the configuration") +) + +func newInstances(client *hcloud.Client, robotClient robot.Client, addressFamily config.AddressFamily, networkID int64) *instances { + return &instances{client, robotClient, addressFamily, networkID} } -// lookupServer attempts to locate the corresponding hcloud.Server for a given corev1.Node +// lookupServer attempts to locate the corresponding [*hcloud.Server] or [*hrobotmodels.Server] for a given [*corev1.Node]. // It returns an error if the Node has an invalid provider ID or if API requests failed. -// It can return a nil [*hcloud.Server] if neither the ProviderID nor the Name matches an existing server. -func (i *instances) lookupServer(ctx context.Context, node *corev1.Node) (*hcloud.Server, error) { - var server *hcloud.Server +// It can return nil server if neither the ProviderID nor the Name matches an existing server. +func (i *instances) lookupServer( + ctx context.Context, + node *corev1.Node, +) (genericServer, error) { if node.Spec.ProviderID != "" { - serverID, err := providerid.ToServerID(node.Spec.ProviderID) + var serverID int64 + serverID, isCloudServer, err := providerid.ToServerID(node.Spec.ProviderID) + if err != nil { return nil, fmt.Errorf("failed to convert provider id to server id: %w", err) } - server, _, err = i.client.Server.GetByID(ctx, serverID) + if isCloudServer { + server, err := getCloudServerByID(ctx, i.client, serverID) + if err != nil { + return nil, fmt.Errorf("failed to get hcloud server \"%d\": %w", serverID, err) + } + + if server == nil { + return nil, nil + } + + return hcloudServer{server}, nil + } + + if i.robotClient == nil { + return nil, errMissingRobotClient + } + server, err := getRobotServerByID(i.robotClient, int(serverID), node) if err != nil { - return nil, fmt.Errorf("failed to lookup server \"%d\": %w", serverID, err) + return nil, fmt.Errorf("failed to get robot server \"%d\": %w", serverID, err) + } + + if server == nil { + return nil, nil } - } else { - var err error - server, _, err = i.client.Server.GetByName(ctx, node.Name) + + return robotServer{server, i.robotClient}, nil + } + + // If the node has no provider ID we try to find the server by name from + // both sources. In case we find two servers, we return an error. + cloudServer, err := getCloudServerByName(ctx, i.client, node.Name) + if err != nil { + return nil, fmt.Errorf("failed to get hcloud server %q: %w", node.Name, err) + } + + var hrobotServer *hrobotmodels.Server + if i.robotClient != nil { + hrobotServer, err = getRobotServerByName(i.robotClient, node) if err != nil { - return nil, fmt.Errorf("failed to lookup server \"%s\": %w", node.Name, err) + return nil, fmt.Errorf("failed to get robot server %q: %w", node.Name, err) } } - return server, nil + + if cloudServer != nil && hrobotServer != nil { + return nil, fmt.Errorf("found both a cloud & robot server for name %q", node.Name) + } + + switch { + case cloudServer != nil: + return hcloudServer{cloudServer}, nil + case hrobotServer != nil: + return robotServer{hrobotServer, i.robotClient}, nil + default: + // Both nil + return nil, nil + } } func (i *instances) InstanceExists(ctx context.Context, node *corev1.Node) (bool, error) { @@ -70,7 +127,7 @@ func (i *instances) InstanceExists(ctx context.Context, node *corev1.Node) (bool server, err := i.lookupServer(ctx, node) if err != nil { - return false, err + return false, fmt.Errorf("%s: %w", op, err) } return server != nil, nil @@ -82,13 +139,21 @@ func (i *instances) InstanceShutdown(ctx context.Context, node *corev1.Node) (bo server, err := i.lookupServer(ctx, node) if err != nil { - return false, err + return false, fmt.Errorf("%s: %w", op, err) } + if server == nil { - return false, fmt.Errorf("failed to find server status: no matching server found for node '%s'", node.Name) + return false, fmt.Errorf( + "%s: failed to get instance metadata: no matching server found for node '%s': %w", + op, node.Name, errServerNotFound) + } + + isShutdown, err := server.IsShutdown() + if err != nil { + return false, fmt.Errorf("%s: %w", op, err) } - return server.Status == hcloud.ServerStatusOff, nil + return isShutdown, nil } func (i *instances) InstanceMetadata(ctx context.Context, node *corev1.Node) (*cloudprovider.InstanceMetadata, error) { @@ -97,22 +162,24 @@ func (i *instances) InstanceMetadata(ctx context.Context, node *corev1.Node) (*c server, err := i.lookupServer(ctx, node) if err != nil { - return nil, err + return nil, fmt.Errorf("%s: %w", op, err) } + if server == nil { - return nil, fmt.Errorf("failed to get instance metadata: no matching server found for node '%s'", node.Name) + return nil, fmt.Errorf( + "%s: failed to get instance metadata: no matching server found for node '%s': %w", + op, node.Name, errServerNotFound) } - return &cloudprovider.InstanceMetadata{ - ProviderID: providerid.FromServerID(server.ID), - InstanceType: server.ServerType.Name, - NodeAddresses: nodeAddresses(i.addressFamily, i.networkID, server), - Zone: server.Datacenter.Name, - Region: server.Datacenter.Location.Name, - }, nil + metadata, err := server.Metadata(i.addressFamily, i.networkID) + if err != nil { + return nil, fmt.Errorf("%s: %w", op, err) + } + + return metadata, nil } -func nodeAddresses(addressFamily config.AddressFamily, networkID int64, server *hcloud.Server) []corev1.NodeAddress { +func hcloudNodeAddresses(addressFamily config.AddressFamily, networkID int64, server *hcloud.Server) []corev1.NodeAddress { var addresses []corev1.NodeAddress addresses = append( addresses, @@ -154,3 +221,80 @@ func nodeAddresses(addressFamily config.AddressFamily, networkID int64, server * } return addresses } + +func robotNodeAddresses(addressFamily config.AddressFamily, server *hrobotmodels.Server) []corev1.NodeAddress { + var addresses []corev1.NodeAddress + addresses = append( + addresses, + corev1.NodeAddress{Type: corev1.NodeHostName, Address: server.Name}, + ) + + if addressFamily == config.AddressFamilyIPv6 || addressFamily == config.AddressFamilyDualStack { + // For a given IPv6 network of 2a01:f48:111:4221::, the instance address is 2a01:f48:111:4221::1 + hostAddress := server.ServerIPv6Net + hostAddress += "1" + + addresses = append( + addresses, + corev1.NodeAddress{Type: corev1.NodeExternalIP, Address: hostAddress}, + ) + } + + if addressFamily == config.AddressFamilyIPv4 || addressFamily == config.AddressFamilyDualStack { + addresses = append( + addresses, + corev1.NodeAddress{Type: corev1.NodeExternalIP, Address: server.ServerIP}, + ) + } + + return addresses +} + +type genericServer interface { + IsShutdown() (bool, error) + Metadata(addressFamily config.AddressFamily, networkID int64) (*cloudprovider.InstanceMetadata, error) +} + +type hcloudServer struct { + *hcloud.Server +} + +func (s hcloudServer) IsShutdown() (bool, error) { + return s.Status == hcloud.ServerStatusOff, nil +} + +func (s hcloudServer) Metadata(addressFamily config.AddressFamily, networkID int64) (*cloudprovider.InstanceMetadata, error) { + return &cloudprovider.InstanceMetadata{ + ProviderID: providerid.FromCloudServerID(s.ID), + InstanceType: s.ServerType.Name, + NodeAddresses: hcloudNodeAddresses(addressFamily, networkID, s.Server), + Zone: s.Datacenter.Name, + Region: s.Datacenter.Location.Name, + }, nil +} + +type robotServer struct { + *hrobotmodels.Server + robotClient robot.Client +} + +func (s robotServer) IsShutdown() (bool, error) { + resetStatus, err := s.robotClient.ResetGet(s.ServerNumber) + if err != nil { + return false, err + } + + // OperationStatus is not supported for server models using the tower case, in that case the value is "not supported" + // When the server is powered off, the OperatingStatus is "shut off" + return resetStatus.OperatingStatus == "shut off", nil +} + +func (s robotServer) Metadata(addressFamily config.AddressFamily, _ int64) (*cloudprovider.InstanceMetadata, error) { + return &cloudprovider.InstanceMetadata{ + ProviderID: providerid.FromRobotServerNumber(s.ServerNumber), + InstanceType: getInstanceTypeOfRobotServer(s.Server), + NodeAddresses: robotNodeAddresses(addressFamily, s.Server), + Zone: getZoneOfRobotServer(s.Server), + Region: getRegionOfRobotServer(s.Server), + }, nil +} diff --git a/hcloud/instances_test.go b/hcloud/instances_test.go index fa3c717fc9bb45011506fbe7709c1885beced990..0631c86c8f8b861577cd3d59bee5470b6829aa48 100644 --- a/hcloud/instances_test.go +++ b/hcloud/instances_test.go @@ -24,6 +24,7 @@ import ( "reflect" "testing" + hrobotmodels "github.com/syself/hrobot-go/models" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" cloudprovider "k8s.io/cloud-provider" @@ -58,8 +59,37 @@ func TestInstances_InstanceExists(t *testing.T) { } json.NewEncoder(w).Encode(schema.ServerListResponse{Servers: servers}) }) + env.Mux.HandleFunc("/robot/server/321", func(w http.ResponseWriter, r *http.Request) { + json.NewEncoder(w).Encode(hrobotmodels.ServerResponse{ + Server: hrobotmodels.Server{ + ServerIP: "233.252.0.123", + ServerIPv6Net: "2a01:f48:111:4221::", + ServerNumber: 321, + Name: "robot-server1", + }, + }) + }) - instances := newInstances(env.Client, config.AddressFamilyIPv4, 0) + env.Mux.HandleFunc("/robot/server/322", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusNotFound) + json.NewEncoder(w).Encode(schema.ErrorResponse{Error: schema.Error{Code: string(hrobotmodels.ErrorCodeServerNotFound)}}) + }) + + env.Mux.HandleFunc("/robot/server", func(w http.ResponseWriter, r *http.Request) { + json.NewEncoder(w).Encode([]hrobotmodels.ServerResponse{ + { + Server: hrobotmodels.Server{ + ServerIP: "233.252.0.123", + ServerIPv6Net: "2a01:f48:111:4221::", + ServerNumber: 321, + Name: "robot-server1", + }, + }, + }) + }) + + instances := newInstances(env.Client, env.RobotClient, config.AddressFamilyIPv4, 0) tests := []struct { name string @@ -72,12 +102,43 @@ func TestInstances_InstanceExists(t *testing.T) { Spec: corev1.NodeSpec{ProviderID: "hcloud://1"}, }, expected: true, + }, { + name: "existing robot server by id", + node: &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "robot-server1", + }, + Spec: corev1.NodeSpec{ProviderID: "hrobot://321"}, + }, + expected: true, + }, + { + name: "existing robot server by (legacy) id", + node: &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "robot-server1", + }, + Spec: corev1.NodeSpec{ProviderID: "hcloud://bm-321"}, + }, + expected: true, }, { name: "missing server by id", node: &corev1.Node{ Spec: corev1.NodeSpec{ProviderID: "hcloud://2"}, }, expected: false, + }, { + name: "missing robot server by id", + node: &corev1.Node{ + Spec: corev1.NodeSpec{ProviderID: "hrobot://322"}, + }, + expected: false, + }, { + name: "missing robot server by (legacy) id", + node: &corev1.Node{ + Spec: corev1.NodeSpec{ProviderID: "hcloud://bm-322"}, + }, + expected: false, }, { name: "existing server by name", node: &corev1.Node{ @@ -86,6 +147,14 @@ func TestInstances_InstanceExists(t *testing.T) { }, }, expected: true, + }, { + name: "existing robot server by name", + node: &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "robot-server1", + }, + }, + expected: true, }, { name: "missing server by name", node: &corev1.Node{ @@ -94,6 +163,14 @@ func TestInstances_InstanceExists(t *testing.T) { }, }, expected: false, + }, { + name: "missing robot server by name", + node: &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "robot-barfoo", + }, + }, + expected: false, }, } @@ -132,7 +209,57 @@ func TestInstances_InstanceShutdown(t *testing.T) { }) }) - instances := newInstances(env.Client, config.AddressFamilyIPv4, 0) + instances := newInstances(env.Client, env.RobotClient, config.AddressFamilyIPv4, 0) + env.Mux.HandleFunc("/robot/server/3", func(w http.ResponseWriter, r *http.Request) { + json.NewEncoder(w).Encode(hrobotmodels.ServerResponse{ + Server: hrobotmodels.Server{ + ServerIP: "233.252.0.123", + ServerIPv6Net: "2a01:f48:111:4221::", + ServerNumber: 3, + Name: "robot-server3", + }, + }) + }) + + env.Mux.HandleFunc("/robot/server/4", func(w http.ResponseWriter, r *http.Request) { + json.NewEncoder(w).Encode(hrobotmodels.ServerResponse{ + Server: hrobotmodels.Server{ + ServerIP: "233.252.0.123", + ServerIPv6Net: "2a01:f48:111:4221::", + ServerNumber: 4, + Name: "robot-server4", + }, + }) + }) + + env.Mux.HandleFunc("/robot/server/5", func(w http.ResponseWriter, r *http.Request) { + json.NewEncoder(w).Encode(hrobotmodels.ServerResponse{ + Server: hrobotmodels.Server{ + ServerIP: "233.252.0.123", + ServerIPv6Net: "2a01:f48:111:4221::", + ServerNumber: 5, + Name: "robot-server5", + }, + }) + }) + + env.Mux.HandleFunc("/robot/reset/3", func(w http.ResponseWriter, r *http.Request) { + json.NewEncoder(w).Encode(hrobotmodels.ResetResponse{Reset: hrobotmodels.Reset{ + OperatingStatus: "running", + }}) + }) + + env.Mux.HandleFunc("/robot/reset/4", func(w http.ResponseWriter, r *http.Request) { + json.NewEncoder(w).Encode(hrobotmodels.ResetResponse{Reset: hrobotmodels.Reset{ + OperatingStatus: "shut down", + }}) + }) + + env.Mux.HandleFunc("/robot/reset/5", func(w http.ResponseWriter, r *http.Request) { + json.NewEncoder(w).Encode(hrobotmodels.ResetResponse{Reset: hrobotmodels.Reset{ + OperatingStatus: "not supported", + }}) + }) tests := []struct { name string @@ -140,20 +267,47 @@ func TestInstances_InstanceShutdown(t *testing.T) { expected bool }{ { - name: "running server", + name: "[cloud] running", node: &corev1.Node{ Spec: corev1.NodeSpec{ProviderID: "hcloud://1"}, }, expected: false, }, { - name: "shutdown server", + name: "[cloud] shutdown", node: &corev1.Node{ Spec: corev1.NodeSpec{ProviderID: "hcloud://2"}, }, expected: true, + }, { + name: "[robot] running", + node: &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "robot-server3", + }, + Spec: corev1.NodeSpec{ProviderID: "hrobot://3"}, + }, + expected: false, + }, { + name: "[robot] shutdown", + node: &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "robot-server4", + }, + Spec: corev1.NodeSpec{ProviderID: "hrobot://4"}, + }, + expected: false, + }, + { + name: "[robot] status unavailable", + node: &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "robot-server5", + }, + Spec: corev1.NodeSpec{ProviderID: "hrobot://5"}, + }, + expected: false, }, } - for _, test := range tests { t.Run(test.name, func(t *testing.T) { exists, err := instances.InstanceShutdown(context.TODO(), test.node) @@ -189,7 +343,7 @@ func TestInstances_InstanceMetadata(t *testing.T) { }) }) - instances := newInstances(env.Client, config.AddressFamilyIPv4, 0) + instances := newInstances(env.Client, env.RobotClient, config.AddressFamilyIPv4, 0) metadata, err := instances.InstanceMetadata(context.TODO(), &corev1.Node{ Spec: corev1.NodeSpec{ProviderID: "hcloud://1"}, @@ -214,6 +368,50 @@ func TestInstances_InstanceMetadata(t *testing.T) { } } +func TestInstances_InstanceMetadataRobotServer(t *testing.T) { + env := newTestEnv() + defer env.Teardown() + env.Mux.HandleFunc("/robot/server/321", func(w http.ResponseWriter, r *http.Request) { + json.NewEncoder(w).Encode(hrobotmodels.ServerResponse{ + Server: hrobotmodels.Server{ + ServerIP: "233.252.0.123", + ServerIPv6Net: "2a01:f48:111:4221::", + ServerNumber: 321, + Product: "robot-product 1", + Name: "robot-server1", + Dc: "NBG1-DC1", + }, + }) + }) + + instances := newInstances(env.Client, env.RobotClient, config.AddressFamilyIPv4, 0) + + metadata, err := instances.InstanceMetadata(context.TODO(), &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "robot-server1", + }, + Spec: corev1.NodeSpec{ProviderID: "hrobot://321"}, + }) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + + expectedMetadata := &cloudprovider.InstanceMetadata{ + ProviderID: "hrobot://321", + InstanceType: "robot-product-1", + NodeAddresses: []corev1.NodeAddress{ + {Type: corev1.NodeHostName, Address: "robot-server1"}, + {Type: corev1.NodeExternalIP, Address: "233.252.0.123"}, + }, + Zone: "nbg1-dc1", + Region: "nbg1", + } + + if !reflect.DeepEqual(metadata, expectedMetadata) { + t.Fatalf("Expected metadata %+v but got %+v", *expectedMetadata, *metadata) + } +} + func TestNodeAddresses(t *testing.T) { tests := []struct { name string @@ -377,7 +575,7 @@ func TestNodeAddresses(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { - addresses := nodeAddresses(test.addressFamily, test.privateNetwork, test.server) + addresses := hcloudNodeAddresses(test.addressFamily, test.privateNetwork, test.server) if !reflect.DeepEqual(addresses, test.expected) { t.Fatalf("Expected addresses %+v but got %+v", test.expected, addresses) @@ -385,3 +583,64 @@ func TestNodeAddresses(t *testing.T) { }) } } + +func TestNodeAddressesRobotServer(t *testing.T) { + tests := []struct { + name string + addressFamily config.AddressFamily + server *hrobotmodels.Server + privateNetwork int + expected []corev1.NodeAddress + }{ + { + name: "public ipv4", + addressFamily: config.AddressFamilyIPv4, + server: &hrobotmodels.Server{ + Name: "foobar", + ServerIP: "203.0.113.7", + ServerIPv6Net: "2001:db8:1234::", + }, + expected: []corev1.NodeAddress{ + {Type: corev1.NodeHostName, Address: "foobar"}, + {Type: corev1.NodeExternalIP, Address: "203.0.113.7"}, + }, + }, + { + name: "public ipv6", + addressFamily: config.AddressFamilyIPv6, + server: &hrobotmodels.Server{ + Name: "foobar", + ServerIP: "203.0.113.7", + ServerIPv6Net: "2001:db8:1234::", + }, + expected: []corev1.NodeAddress{ + {Type: corev1.NodeHostName, Address: "foobar"}, + {Type: corev1.NodeExternalIP, Address: "2001:db8:1234::1"}, + }, + }, + { + name: "public dual stack", + addressFamily: config.AddressFamilyDualStack, + server: &hrobotmodels.Server{ + Name: "foobar", + ServerIP: "203.0.113.7", + ServerIPv6Net: "2001:db8:1234::", + }, + expected: []corev1.NodeAddress{ + {Type: corev1.NodeHostName, Address: "foobar"}, + {Type: corev1.NodeExternalIP, Address: "2001:db8:1234::1"}, + {Type: corev1.NodeExternalIP, Address: "203.0.113.7"}, + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + addresses := robotNodeAddresses(test.addressFamily, test.server) + + if !reflect.DeepEqual(addresses, test.expected) { + t.Fatalf("%s: expected addresses %+v but got %+v", test.name, test.expected, addresses) + } + }) + } +} diff --git a/hcloud/instances_util.go b/hcloud/instances_util.go new file mode 100644 index 0000000000000000000000000000000000000000..2a4136fe9e8656148cf3205b8c47b7225f867074 --- /dev/null +++ b/hcloud/instances_util.go @@ -0,0 +1,117 @@ +/* +Copyright 2018 Hetzner Cloud GmbH. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package hcloud + +import ( + "context" + "fmt" + "strings" + + hrobotmodels "github.com/syself/hrobot-go/models" + corev1 "k8s.io/api/core/v1" + + "github.com/hetznercloud/hcloud-cloud-controller-manager/internal/metrics" + "github.com/hetznercloud/hcloud-cloud-controller-manager/internal/robot" + "github.com/hetznercloud/hcloud-go/v2/hcloud" +) + +func getCloudServerByName(ctx context.Context, c *hcloud.Client, name string) (*hcloud.Server, error) { + const op = "hcloud/getCloudServerByName" + metrics.OperationCalled.WithLabelValues(op).Inc() + + server, _, err := c.Server.GetByName(ctx, name) + if err != nil { + return nil, fmt.Errorf("%s: %w", op, err) + } + + return server, nil +} + +func getCloudServerByID(ctx context.Context, c *hcloud.Client, id int64) (*hcloud.Server, error) { + const op = "hcloud/getCloudServerByID" + metrics.OperationCalled.WithLabelValues(op).Inc() + + server, _, err := c.Server.GetByID(ctx, id) + if err != nil { + return nil, fmt.Errorf("%s: %w", op, err) + } + return server, nil +} + +func getRobotServerByName(c robot.Client, node *corev1.Node) (server *hrobotmodels.Server, err error) { + const op = "hcloud/getRobotServerByName" + + if c == nil { + return nil, errMissingRobotClient + } + + serverList, err := c.ServerGetList() + if err != nil { + return nil, fmt.Errorf("%s: %w", op, err) + } + + for i, s := range serverList { + if s.Name == node.Name { + server = &serverList[i] + } + } + + return server, nil +} + +func getRobotServerByID(c robot.Client, id int, node *corev1.Node) (*hrobotmodels.Server, error) { + const op = "hcloud/getRobotServerByID" + + if c == nil { + return nil, errMissingRobotClient + } + + server, err := c.ServerGet(id) + if err != nil && !hrobotmodels.IsError(err, hrobotmodels.ErrorCodeServerNotFound) { + return nil, fmt.Errorf("%s: %w", op, err) + } + + if server == nil { + return nil, nil + } + + // check whether name matches - otherwise this server does not belong to the respective node anymore + if server.Name != node.Name { + return nil, nil + } + + // return nil, nil if server could not be found + return server, nil +} + +func getInstanceTypeOfRobotServer(server *hrobotmodels.Server) string { + if server == nil { + panic("getInstanceTypeOfRobotServer called with nil server") + } + return strings.ReplaceAll(server.Product, " ", "-") +} + +func getZoneOfRobotServer(server *hrobotmodels.Server) string { + return strings.ToLower(server.Dc) +} + +func getRegionOfRobotServer(server *hrobotmodels.Server) string { + zone := getZoneOfRobotServer(server) + // zone is a Hetzner DC, e.g. "hel1-dc2" + // the cloud location is equal to the first part of the zone, e.g. "hel1" and that is was has historically been used in the Region label. + return strings.Split(zone, "-")[0] +} diff --git a/internal/config/config.go b/internal/config/config.go index bb972e079718bb04b9000080287fc972050e0c29..dacfab4fef708e493be7d9964f4e967c7d3a8ddb 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -5,6 +5,7 @@ import ( "fmt" "os" "strconv" + "time" ) const ( @@ -13,6 +14,11 @@ const ( hcloudNetwork = "HCLOUD_NETWORK" hcloudDebug = "HCLOUD_DEBUG" + robotEnabled = "ROBOT_ENABLED" + robotUser = "ROBOT_USER" + robotPassword = "ROBOT_PASSWORD" + robotCacheTimeout = "ROBOT_CACHE_TIMEOUT" + hcloudInstancesAddressFamily = "HCLOUD_INSTANCES_ADDRESS_FAMILY" // Disable the "master/server is attached to the network" check against the metadata service. @@ -36,6 +42,13 @@ type HCloudClientConfiguration struct { Debug bool } +type RobotConfiguration struct { + Enabled bool + User string + Password string + CacheTimeout time.Duration +} + type MetricsConfiguration struct { Enabled bool Address string @@ -73,6 +86,7 @@ type RouteConfiguration struct { type HCCMConfiguration struct { HCloudClient HCloudClientConfiguration + Robot RobotConfiguration Metrics MetricsConfiguration Instance InstanceConfiguration LoadBalancer LoadBalancerConfiguration @@ -97,6 +111,20 @@ func Read() (HCCMConfiguration, error) { errs = append(errs, err) } + cfg.Robot.Enabled, err = getEnvBool(robotEnabled, false) + if err != nil { + errs = append(errs, err) + } + cfg.Robot.User = os.Getenv(robotUser) + cfg.Robot.Password = os.Getenv(robotPassword) + cfg.Robot.CacheTimeout, err = getEnvDuration(robotCacheTimeout) + if err != nil { + errs = append(errs, err) + } + if cfg.Robot.CacheTimeout == 0 { + cfg.Robot.CacheTimeout = 5 * time.Minute + } + cfg.Metrics.Enabled, err = getEnvBool(hcloudMetricsEnabled, true) if err != nil { errs = append(errs, err) @@ -168,6 +196,19 @@ func (c HCCMConfiguration) Validate() (err error) { errs = append(errs, fmt.Errorf("invalid value for %q/%q, only one of them can be set", hcloudLoadBalancersLocation, hcloudLoadBalancersNetworkZone)) } + if c.Robot.Enabled { + if c.Robot.User == "" { + errs = append(errs, fmt.Errorf("environment variable %q is required if Robot support is enabled", robotUser)) + } + if c.Robot.Password == "" { + errs = append(errs, fmt.Errorf("environment variable %q is required if Robot support is enabled", robotPassword)) + } + + if c.Route.Enabled { + errs = append(errs, fmt.Errorf("using Routes with Robot is not supported")) + } + } + if len(errs) > 0 { return errors.Join(errs...) } @@ -189,3 +230,19 @@ func getEnvBool(key string, defaultValue bool) (bool, error) { return b, nil } + +// getEnvDuration returns the duration parsed from the environment variable with the given key and a potential error +// parsing the var. Returns false if the env var is unset. +func getEnvDuration(key string) (time.Duration, error) { + v := os.Getenv(key) + if v == "" { + return 0, nil + } + + b, err := time.ParseDuration(v) + if err != nil { + return 0, fmt.Errorf("failed to parse %s: %v", key, err) + } + + return b, nil +} diff --git a/internal/config/config_test.go b/internal/config/config_test.go index c0994656033532c595cc538cd90f0829211c3caf..860e4647c9f417a05835d58f810edb35d7854823 100644 --- a/internal/config/config_test.go +++ b/internal/config/config_test.go @@ -3,6 +3,7 @@ package config import ( "errors" "testing" + "time" "github.com/stretchr/testify/assert" @@ -20,6 +21,7 @@ func TestRead(t *testing.T) { name: "minimal", env: []string{}, want: HCCMConfiguration{ + Robot: RobotConfiguration{CacheTimeout: 5 * time.Minute}, Metrics: MetricsConfiguration{Enabled: true, Address: ":8233"}, Instance: InstanceConfiguration{AddressFamily: AddressFamilyIPv4}, LoadBalancer: LoadBalancerConfiguration{Enabled: true}, @@ -35,6 +37,7 @@ func TestRead(t *testing.T) { }, want: HCCMConfiguration{ HCloudClient: HCloudClientConfiguration{Token: "jr5g7ZHpPptyhJzZyHw2Pqu4g9gTqDvEceYpngPf79jN_NOT_VALID_dzhepnahq"}, + Robot: RobotConfiguration{CacheTimeout: 5 * time.Minute}, Metrics: MetricsConfiguration{Enabled: true, Address: ":8233"}, Instance: InstanceConfiguration{AddressFamily: AddressFamilyIPv4}, Network: NetworkConfiguration{ @@ -58,6 +61,28 @@ func TestRead(t *testing.T) { Endpoint: "https://api.example.com", Debug: true, }, + Robot: RobotConfiguration{CacheTimeout: 5 * time.Minute}, + Metrics: MetricsConfiguration{Enabled: true, Address: ":8233"}, + Instance: InstanceConfiguration{AddressFamily: AddressFamilyIPv4}, + LoadBalancer: LoadBalancerConfiguration{Enabled: true}, + }, + wantErr: nil, + }, + { + name: "robot", + env: []string{ + "ROBOT_ENABLED", "true", + "ROBOT_USER", "foobar", + "ROBOT_PASSWORD", "secret-password", + "ROBOT_CACHE_TIMEOUT", "1m", + }, + want: HCCMConfiguration{ + Robot: RobotConfiguration{ + Enabled: true, + User: "foobar", + Password: "secret-password", + CacheTimeout: 1 * time.Minute, + }, Metrics: MetricsConfiguration{Enabled: true, Address: ":8233"}, Instance: InstanceConfiguration{AddressFamily: AddressFamilyIPv4}, LoadBalancer: LoadBalancerConfiguration{Enabled: true}, @@ -70,6 +95,7 @@ func TestRead(t *testing.T) { "HCLOUD_INSTANCES_ADDRESS_FAMILY", "ipv6", }, want: HCCMConfiguration{ + Robot: RobotConfiguration{CacheTimeout: 5 * time.Minute}, Metrics: MetricsConfiguration{Enabled: true, Address: ":8233"}, Instance: InstanceConfiguration{AddressFamily: AddressFamilyIPv6}, LoadBalancer: LoadBalancerConfiguration{Enabled: true}, @@ -83,6 +109,7 @@ func TestRead(t *testing.T) { "HCLOUD_NETWORK", "foobar", }, want: HCCMConfiguration{ + Robot: RobotConfiguration{CacheTimeout: 5 * time.Minute}, Metrics: MetricsConfiguration{Enabled: true, Address: ":8233"}, Instance: InstanceConfiguration{AddressFamily: AddressFamilyIPv4}, LoadBalancer: LoadBalancerConfiguration{Enabled: true}, @@ -101,6 +128,7 @@ func TestRead(t *testing.T) { "HCLOUD_NETWORK_ROUTES_ENABLED", "false", }, want: HCCMConfiguration{ + Robot: RobotConfiguration{CacheTimeout: 5 * time.Minute}, Metrics: MetricsConfiguration{Enabled: true, Address: ":8233"}, Instance: InstanceConfiguration{AddressFamily: AddressFamilyIPv4}, LoadBalancer: LoadBalancerConfiguration{Enabled: true}, @@ -122,6 +150,7 @@ func TestRead(t *testing.T) { "HCLOUD_LOAD_BALANCERS_DISABLE_IPV6", "true", }, want: HCCMConfiguration{ + Robot: RobotConfiguration{CacheTimeout: 5 * time.Minute}, Metrics: MetricsConfiguration{Enabled: true, Address: ":8233"}, Instance: InstanceConfiguration{AddressFamily: AddressFamilyIPv4}, LoadBalancer: LoadBalancerConfiguration{ @@ -141,6 +170,7 @@ func TestRead(t *testing.T) { // Required to parse HCLOUD_NETWORK_ROUTES_ENABLED "HCLOUD_NETWORK", "foobar", + "ROBOT_ENABLED", "no", "HCLOUD_DEBUG", "foo", "HCLOUD_METRICS_ENABLED", "bar", "HCLOUD_LOAD_BALANCERS_ENABLED", "nej", @@ -151,6 +181,7 @@ func TestRead(t *testing.T) { "HCLOUD_NETWORK_ROUTES_ENABLED", "si", }, wantErr: errors.New(`failed to parse HCLOUD_DEBUG: strconv.ParseBool: parsing "foo": invalid syntax +failed to parse ROBOT_ENABLED: strconv.ParseBool: parsing "no": invalid syntax failed to parse HCLOUD_METRICS_ENABLED: strconv.ParseBool: parsing "bar": invalid syntax failed to parse HCLOUD_LOAD_BALANCERS_ENABLED: strconv.ParseBool: parsing "nej": invalid syntax failed to parse HCLOUD_LOAD_BALANCERS_DISABLE_PRIVATE_INGRESS: strconv.ParseBool: parsing "nyet": invalid syntax @@ -159,6 +190,13 @@ failed to parse HCLOUD_LOAD_BALANCERS_DISABLE_IPV6: strconv.ParseBool: parsing " failed to parse HCLOUD_NETWORK_DISABLE_ATTACHED_CHECK: strconv.ParseBool: parsing "oui": invalid syntax failed to parse HCLOUD_NETWORK_ROUTES_ENABLED: strconv.ParseBool: parsing "si": invalid syntax`), }, + { + name: "error parsing duration values", + env: []string{ + "ROBOT_CACHE_TIMEOUT", "biweekly", + }, + wantErr: errors.New(`failed to parse ROBOT_CACHE_TIMEOUT: time: invalid duration "biweekly"`), + }, } for _, tt := range tests { @@ -180,6 +218,7 @@ failed to parse HCLOUD_NETWORK_ROUTES_ENABLED: strconv.ParseBool: parsing "si": func TestHCCMConfiguration_Validate(t *testing.T) { type fields struct { HCloudClient HCloudClientConfiguration + Robot RobotConfiguration Metrics MetricsConfiguration Instance InstanceConfiguration LoadBalancer LoadBalancerConfiguration @@ -251,11 +290,41 @@ func TestHCCMConfiguration_Validate(t *testing.T) { }, wantErr: errors.New("invalid value for \"HCLOUD_LOAD_BALANCERS_LOCATION\"/\"HCLOUD_LOAD_BALANCERS_NETWORK_ZONE\", only one of them can be set"), }, + { + name: "robot enabled but missing credentials", + fields: fields{ + HCloudClient: HCloudClientConfiguration{Token: "jr5g7ZHpPptyhJzZyHw2Pqu4g9gTqDvEceYpngPf79jN_NOT_VALID_dzhepnahq"}, + Instance: InstanceConfiguration{AddressFamily: AddressFamilyIPv4}, + + Robot: RobotConfiguration{ + Enabled: true, + }, + }, + wantErr: errors.New(`environment variable "ROBOT_USER" is required if Robot support is enabled +environment variable "ROBOT_PASSWORD" is required if Robot support is enabled`), + }, + { + name: "robot & routes activated", + fields: fields{ + + HCloudClient: HCloudClientConfiguration{Token: "jr5g7ZHpPptyhJzZyHw2Pqu4g9gTqDvEceYpngPf79jN_NOT_VALID_dzhepnahq"}, + Instance: InstanceConfiguration{AddressFamily: AddressFamilyIPv4}, + Route: RouteConfiguration{Enabled: true}, + Robot: RobotConfiguration{ + Enabled: true, + + User: "foo", + Password: "bar", + }, + }, + wantErr: errors.New("using Routes with Robot is not supported"), + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { c := HCCMConfiguration{ HCloudClient: tt.fields.HCloudClient, + Robot: tt.fields.Robot, Metrics: tt.fields.Metrics, Instance: tt.fields.Instance, LoadBalancer: tt.fields.LoadBalancer, diff --git a/internal/hcops/load_balancer.go b/internal/hcops/load_balancer.go index c1c4ca4f5fea78a558fe64289f2b9288e91fbe18..526b4b8b28bb781fc5bab62435bd43e984c1156a 100644 --- a/internal/hcops/load_balancer.go +++ b/internal/hcops/load_balancer.go @@ -4,6 +4,7 @@ import ( "context" "errors" "fmt" + "net" "sync" "time" @@ -14,6 +15,7 @@ import ( "github.com/hetznercloud/hcloud-cloud-controller-manager/internal/config" "github.com/hetznercloud/hcloud-cloud-controller-manager/internal/metrics" "github.com/hetznercloud/hcloud-cloud-controller-manager/internal/providerid" + "github.com/hetznercloud/hcloud-cloud-controller-manager/internal/robot" "github.com/hetznercloud/hcloud-go/v2/hcloud" ) @@ -50,6 +52,9 @@ type HCloudLoadBalancerClient interface { AddServerTarget(ctx context.Context, lb *hcloud.LoadBalancer, opts hcloud.LoadBalancerAddServerTargetOpts) (*hcloud.Action, *hcloud.Response, error) RemoveServerTarget(ctx context.Context, lb *hcloud.LoadBalancer, server *hcloud.Server) (*hcloud.Action, *hcloud.Response, error) + AddIPTarget(ctx context.Context, lb *hcloud.LoadBalancer, opts hcloud.LoadBalancerAddIPTargetOpts) (*hcloud.Action, *hcloud.Response, error) + RemoveIPTarget(ctx context.Context, lb *hcloud.LoadBalancer, server net.IP) (*hcloud.Action, *hcloud.Response, error) + AttachToNetwork(ctx context.Context, lb *hcloud.LoadBalancer, opts hcloud.LoadBalancerAttachToNetworkOpts) (*hcloud.Action, *hcloud.Response, error) DetachFromNetwork(ctx context.Context, lb *hcloud.LoadBalancer, opts hcloud.LoadBalancerDetachFromNetworkOpts) (*hcloud.Action, *hcloud.Response, error) @@ -68,6 +73,7 @@ type LoadBalancerOps struct { LBClient HCloudLoadBalancerClient ActionClient HCloudActionClient NetworkClient HCloudNetworkClient + RobotClient robot.Client CertOps *CertificateOps RetryDelay time.Duration NetworkID int64 @@ -363,7 +369,6 @@ func (l *LoadBalancerOps) changeIPv4RDNS(ctx context.Context, lb *hcloud.LoadBal } action, _, err := l.LBClient.ChangeDNSPtr(ctx, lb, lb.PublicNet.IPv4.IP.String(), &rdns) - if err != nil { return false, fmt.Errorf("%s: %w", op, err) } @@ -389,7 +394,6 @@ func (l *LoadBalancerOps) changeIPv6RDNS(ctx context.Context, lb *hcloud.LoadBal } action, _, err := l.LBClient.ChangeDNSPtr(ctx, lb, lb.PublicNet.IPv6.IP.String(), &rdns) - if err != nil { return false, fmt.Errorf("%s: %w", op, err) } @@ -565,15 +569,24 @@ func (l *LoadBalancerOps) ReconcileHCLBTargets( var ( // Set of all K8S server IDs currently assigned as nodes to this // cluster. - k8sNodeIDs = make(map[int64]bool) - k8sNodeNames = make(map[int64]string) + k8sNodeIDsHCloud = make(map[int64]bool) + k8sNodeIDsRobot = make(map[int]bool) + k8sNodeNames = make(map[int64]string) + robotIPsToIDs = make(map[string]int) + robotIDToIPv4 = make(map[int]string) // Set of server IDs assigned as targets to the HC Load Balancer. Some // of the entries may get deleted during reconcilement. In this case // the hclbTargetIDs[id] is always false. If hclbTargetIDs[id] is true, // the node with this server id is assigned to the K8S cluster. hclbTargetIDs = make(map[int64]bool) + // Set of server IPs assigned as targets to the HC Load Balancer. Some + // of the entries may get deleted during reconcilement. In this case + // the hclbTargetIPs[id] is always false. If hclbTargetIPs[id] is true, + // the node with this server id is assigned to the K8S cluster. + hclbTargetIPs = make(map[string]bool) + changed bool ) @@ -587,45 +600,96 @@ func (l *LoadBalancerOps) ReconcileHCLBTargets( // Extract HC server IDs of all K8S nodes assigned to the K8S cluster. for _, node := range nodes { - id, err := providerid.ToServerID(node.Spec.ProviderID) + id, isCloudServer, err := providerid.ToServerID(node.Spec.ProviderID) if err != nil { return changed, fmt.Errorf("%s: %w", op, err) } - k8sNodeIDs[id] = true + if isCloudServer { + k8sNodeIDsHCloud[id] = true + } else { + k8sNodeIDsRobot[int(id)] = true + } k8sNodeNames[id] = node.Name } + // List all robot servers to check whether the ip targets of the load balancer + // correspond to a dedicated server + + if l.Cfg.Robot.Enabled { + dedicatedServers, err := l.RobotClient.ServerGetList() + if err != nil { + return changed, fmt.Errorf("%s: failed to get list of dedicated servers: %w", op, err) + } + + for _, s := range dedicatedServers { + robotIPsToIDs[s.ServerIP] = s.ServerNumber + robotIDToIPv4[s.ServerNumber] = s.ServerIP + } + } + // Extract IDs of the hc Load Balancer's server targets. Along the way, // Remove all server targets from the HC Load Balancer which are currently // not assigned as nodes to the K8S Load Balancer. for _, target := range lb.Targets { - if target.Type != hcloud.LoadBalancerTargetTypeServer { - continue - } + if target.Type == hcloud.LoadBalancerTargetTypeServer { + id := target.Server.Server.ID + recreate := target.UsePrivateIP != usePrivateIP + hclbTargetIDs[id] = k8sNodeIDsHCloud[id] && !recreate + if hclbTargetIDs[id] { + continue + } - id := target.Server.Server.ID - recreate := target.UsePrivateIP != usePrivateIP - hclbTargetIDs[id] = k8sNodeIDs[id] && !recreate - if hclbTargetIDs[id] { - continue + klog.InfoS("remove target", "op", op, "service", svc.ObjectMeta.Name, "targetName", k8sNodeNames[id]) + // Target needs to be re-created or node currently not in use by k8s + // Load Balancer. Remove it from the HC Load Balancer + a, _, err := l.LBClient.RemoveServerTarget(ctx, lb, target.Server.Server) + if err != nil { + return changed, fmt.Errorf("%s: target: %s: %w", op, k8sNodeNames[id], err) + } + if err := WatchAction(ctx, l.ActionClient, a); err != nil { + return changed, fmt.Errorf("%s: target: %s: %w", op, k8sNodeNames[id], err) + } + changed = true } - klog.InfoS("remove target", "op", op, "service", svc.ObjectMeta.Name, "targetName", k8sNodeNames[id]) - // Target needs to be re-created or node currently not in use by k8s - // Load Balancer. Remove it from the HC Load Balancer - a, _, err := l.LBClient.RemoveServerTarget(ctx, lb, target.Server.Server) - if err != nil { - return changed, fmt.Errorf("%s: target: %s: %w", op, k8sNodeNames[id], err) - } - if err := WatchAction(ctx, l.ActionClient, a); err != nil { - return changed, fmt.Errorf("%s: target: %s: %w", op, k8sNodeNames[id], err) + // Cleanup of IP Targets happens whether Robot Support is enabled or not. + // If it is not enabled, we remove all IP targets. + if target.Type == hcloud.LoadBalancerTargetTypeIP { + ip := target.IP.IP + id, foundServer := robotIPsToIDs[ip] + hclbTargetIPs[ip] = foundServer && k8sNodeIDsRobot[id] + if hclbTargetIPs[ip] { + continue + } + + klog.InfoS("remove target", "op", op, "service", svc.ObjectMeta.Name, "targetName", k8sNodeNames[int64(id)]) + // Node currently not in use by k8s Load Balancer. Remove it from the HC Load Balancer. + a, _, err := l.LBClient.RemoveIPTarget(ctx, lb, net.ParseIP(ip)) + if err != nil { + var e error + if foundServer { + e = fmt.Errorf("%s: target: %s: %w", op, k8sNodeNames[int64(id)], err) + } else { + e = fmt.Errorf("%s: targetIP: %s: %w", op, ip, err) + } + return changed, e + } + if err := WatchAction(ctx, l.ActionClient, a); err != nil { + var e error + if foundServer { + e = fmt.Errorf("%s: target: %s: %w", op, k8sNodeNames[int64(id)], err) + } else { + e = fmt.Errorf("%s: targetIP: %s: %w", op, ip, err) + } + return changed, e + } + changed = true } - changed = true } // Assign the servers which are currently assigned as nodes // to the K8S Load Balancer as server targets to the HC Load Balancer. - for id := range k8sNodeIDs { + for id := range k8sNodeIDsHCloud { // Don't assign the node again if it is already assigned to the HC load // balancer. if hclbTargetIDs[id] { @@ -651,6 +715,41 @@ func (l *LoadBalancerOps) ReconcileHCLBTargets( changed = true } + if l.Cfg.Robot.Enabled { + // Assign the dedicated servers which are currently assigned as nodes + // to the K8S Load Balancer as IP targets to the HC Load Balancer. + for id := range k8sNodeIDsRobot { + ip := robotIDToIPv4[id] + + // Don't assign the node again if it is already assigned to the HC load + // balancer. + if hclbTargetIPs[ip] { + continue + } + if ip == "" { + klog.InfoS("k8s node found but no corresponding server in robot", "id", id) + continue + } + + klog.InfoS("add target", "op", op, "service", svc.ObjectMeta.Name, "targetName", k8sNodeNames[int64(id)], "ip", ip) + opts := hcloud.LoadBalancerAddIPTargetOpts{ + IP: net.ParseIP(ip), + } + a, _, err := l.LBClient.AddIPTarget(ctx, lb, opts) + if err != nil { + if hcloud.IsError(err, hcloud.ErrorCodeResourceLimitExceeded) { + klog.InfoS("resource limit exceeded", "err", err.Error(), "op", op, "service", svc.ObjectMeta.Name, "targetName", k8sNodeNames[int64(id)]) + return false, nil + } + return changed, fmt.Errorf("%s: target %s: %w", op, k8sNodeNames[int64(id)], err) + } + if err := WatchAction(ctx, l.ActionClient, a); err != nil { + return changed, fmt.Errorf("%s: target %s: %w", op, k8sNodeNames[int64(id)], err) + } + changed = true + } + } + return changed, nil } diff --git a/internal/hcops/load_balancer_test.go b/internal/hcops/load_balancer_test.go index fa579f7bfbe2906d9963b208caaa30128110537e..227e5b76209eac2e0a04d83623df75c3285a9707 100644 --- a/internal/hcops/load_balancer_test.go +++ b/internal/hcops/load_balancer_test.go @@ -10,6 +10,7 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/mock" + hrobotmodels "github.com/syself/hrobot-go/models" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" @@ -608,6 +609,7 @@ type LBReconcilementTestCase struct { servicePorts []corev1.ServicePort k8sNodes []*corev1.Node initialLB *hcloud.LoadBalancer + robotServers []hrobotmodels.Server mock func(t *testing.T, tt *LBReconcilementTestCase) perform func(t *testing.T, tt *LBReconcilementTestCase) @@ -1140,10 +1142,26 @@ func TestLoadBalancerOps_ReconcileHCLBTargets(t *testing.T) { k8sNodes: []*corev1.Node{ {Spec: corev1.NodeSpec{ProviderID: "hcloud://1"}}, {Spec: corev1.NodeSpec{ProviderID: "hcloud://2"}}, + {Spec: corev1.NodeSpec{ProviderID: "hrobot://3"}, ObjectMeta: metav1.ObjectMeta{Name: "robot-3"}}, + {Spec: corev1.NodeSpec{ProviderID: "hrobot://4"}, ObjectMeta: metav1.ObjectMeta{Name: "robot-4"}}, }, initialLB: &hcloud.LoadBalancer{ ID: 1, }, + robotServers: []hrobotmodels.Server{ + { + ServerNumber: 3, + Name: "robot-3", + ServerIP: "1.2.3.4", + ServerIPv6Net: "2a01:f48:111:4221::", + }, + { + ServerNumber: 4, + Name: "robot-4", + ServerIP: "1.2.3.5", + ServerIPv6Net: "2a01:f48:111:4222::", + }, + }, mock: func(t *testing.T, tt *LBReconcilementTestCase) { opts := hcloud.LoadBalancerAddServerTargetOpts{Server: &hcloud.Server{ID: 1}, UsePrivateIP: hcloud.Ptr(false)} action := tt.fx.MockAddServerTarget(tt.initialLB, opts, nil) @@ -1152,12 +1170,26 @@ func TestLoadBalancerOps_ReconcileHCLBTargets(t *testing.T) { opts = hcloud.LoadBalancerAddServerTargetOpts{Server: &hcloud.Server{ID: 2}, UsePrivateIP: hcloud.Ptr(false)} action = tt.fx.MockAddServerTarget(tt.initialLB, opts, nil) tt.fx.MockWatchProgress(action, nil) + + optsIP := hcloud.LoadBalancerAddIPTargetOpts{IP: net.ParseIP("1.2.3.4")} + action = tt.fx.MockAddIPTarget(tt.initialLB, optsIP, nil) + tt.fx.MockWatchProgress(action, nil) + + optsIP = hcloud.LoadBalancerAddIPTargetOpts{IP: net.ParseIP("1.2.3.5")} + action = tt.fx.MockAddIPTarget(tt.initialLB, optsIP, nil) + tt.fx.MockWatchProgress(action, nil) + + tt.fx.MockListRobotServers(tt.robotServers, nil) }, perform: func(t *testing.T, tt *LBReconcilementTestCase) { changed, err := tt.fx.LBOps.ReconcileHCLBTargets(tt.fx.Ctx, tt.initialLB, tt.service, tt.k8sNodes) assert.NoError(t, err) assert.True(t, changed) }, + cfg: config.HCCMConfiguration{ + LoadBalancer: config.LoadBalancerConfiguration{DisableIPv6: false}, + Robot: config.RobotConfiguration{Enabled: true}, + }, }, { name: "remove unused k8s nodes from hc Load Balancer", @@ -1184,6 +1216,22 @@ func TestLoadBalancerOps_ReconcileHCLBTargets(t *testing.T) { Type: hcloud.LoadBalancerTargetTypeServer, Server: &hcloud.LoadBalancerTargetServer{Server: &hcloud.Server{ID: 4}}, }, + { + Type: hcloud.LoadBalancerTargetTypeIP, + IP: &hcloud.LoadBalancerTargetIP{IP: "1.2.3.4"}, + }, + }, + }, + robotServers: []hrobotmodels.Server{ + { + ServerNumber: 5, + ServerIP: "1.2.3.4", + ServerIPv6Net: "2a01:f48:111:4221::", + }, + { + ServerNumber: 6, + ServerIP: "1.2.3.5", + ServerIPv6Net: "2a01:f48:111:4222::", }, }, mock: func(t *testing.T, tt *LBReconcilementTestCase) { @@ -1192,12 +1240,18 @@ func TestLoadBalancerOps_ReconcileHCLBTargets(t *testing.T) { action = tt.fx.MockRemoveServerTarget(tt.initialLB, &hcloud.Server{ID: 4}, nil) tt.fx.MockWatchProgress(action, nil) + + action = tt.fx.MockRemoveIPTarget(tt.initialLB, net.ParseIP("1.2.3.4"), nil) + tt.fx.MockWatchProgress(action, nil) + + tt.fx.MockListRobotServers(tt.robotServers, nil) }, perform: func(t *testing.T, tt *LBReconcilementTestCase) { changed, err := tt.fx.LBOps.ReconcileHCLBTargets(tt.fx.Ctx, tt.initialLB, tt.service, tt.k8sNodes) assert.NoError(t, err) assert.True(t, changed) }, + cfg: config.HCCMConfiguration{LoadBalancer: config.LoadBalancerConfiguration{DisableIPv6: true}}, }, { name: "enable use of private network via default", @@ -1205,6 +1259,7 @@ func TestLoadBalancerOps_ReconcileHCLBTargets(t *testing.T) { LoadBalancer: config.LoadBalancerConfiguration{ // Make sure the annotation overrides the default UsePrivateIP: true, + DisableIPv6: true, }, }, k8sNodes: []*corev1.Node{ @@ -1224,6 +1279,8 @@ func TestLoadBalancerOps_ReconcileHCLBTargets(t *testing.T) { opts = hcloud.LoadBalancerAddServerTargetOpts{Server: &hcloud.Server{ID: 2}, UsePrivateIP: hcloud.Ptr(true)} action = tt.fx.MockAddServerTarget(tt.initialLB, opts, nil) tt.fx.MockWatchProgress(action, nil) + + tt.fx.MockListRobotServers(nil, nil) }, perform: func(t *testing.T, tt *LBReconcilementTestCase) { changed, err := tt.fx.LBOps.ReconcileHCLBTargets(tt.fx.Ctx, tt.initialLB, tt.service, tt.k8sNodes) @@ -1237,6 +1294,7 @@ func TestLoadBalancerOps_ReconcileHCLBTargets(t *testing.T) { LoadBalancer: config.LoadBalancerConfiguration{ // Make sure the annotation overrides the default UsePrivateIP: false, + DisableIPv6: true, }, }, k8sNodes: []*corev1.Node{ @@ -1259,6 +1317,8 @@ func TestLoadBalancerOps_ReconcileHCLBTargets(t *testing.T) { opts = hcloud.LoadBalancerAddServerTargetOpts{Server: &hcloud.Server{ID: 2}, UsePrivateIP: hcloud.Ptr(true)} action = tt.fx.MockAddServerTarget(tt.initialLB, opts, nil) tt.fx.MockWatchProgress(action, nil) + + tt.fx.MockListRobotServers(nil, nil) }, perform: func(t *testing.T, tt *LBReconcilementTestCase) { changed, err := tt.fx.LBOps.ReconcileHCLBTargets(tt.fx.Ctx, tt.initialLB, tt.service, tt.k8sNodes) @@ -1272,6 +1332,7 @@ func TestLoadBalancerOps_ReconcileHCLBTargets(t *testing.T) { LoadBalancer: config.LoadBalancerConfiguration{ // Make sure the annotation overrides the default UsePrivateIP: true, + DisableIPv6: true, }, }, k8sNodes: []*corev1.Node{ @@ -1300,6 +1361,8 @@ func TestLoadBalancerOps_ReconcileHCLBTargets(t *testing.T) { } action = tt.fx.MockAddServerTarget(tt.initialLB, opts, nil) tt.fx.MockWatchProgress(action, nil) + + tt.fx.MockListRobotServers(nil, nil) }, perform: func(t *testing.T, tt *LBReconcilementTestCase) { changed, err := tt.fx.LBOps.ReconcileHCLBTargets(tt.fx.Ctx, tt.initialLB, tt.service, tt.k8sNodes) diff --git a/internal/hcops/testing.go b/internal/hcops/testing.go index 2ed223cb7acae82a94a9874659c87f03b37ac67b..166c47893b5761c0ac741b104e1e0ba6394b3e1f 100644 --- a/internal/hcops/testing.go +++ b/internal/hcops/testing.go @@ -3,8 +3,11 @@ package hcops import ( "context" "math/rand" + "net" "testing" + hrobotmodels "github.com/syself/hrobot-go/models" + "github.com/hetznercloud/hcloud-cloud-controller-manager/internal/mocks" "github.com/hetznercloud/hcloud-go/v2/hcloud" ) @@ -16,6 +19,7 @@ type LoadBalancerOpsFixture struct { CertClient *mocks.CertificateClient ActionClient *mocks.ActionClient NetworkClient *mocks.NetworkClient + RobotClient *mocks.RobotClient LBOps *LoadBalancerOps @@ -29,6 +33,7 @@ func NewLoadBalancerOpsFixture(t *testing.T) *LoadBalancerOpsFixture { LBClient: &mocks.LoadBalancerClient{}, CertClient: &mocks.CertificateClient{}, NetworkClient: &mocks.NetworkClient{}, + RobotClient: &mocks.RobotClient{}, T: t, } @@ -36,12 +41,14 @@ func NewLoadBalancerOpsFixture(t *testing.T) *LoadBalancerOpsFixture { fx.LBClient.Test(t) fx.CertClient.Test(t) fx.NetworkClient.Test(t) + fx.RobotClient.Test(t) fx.LBOps = &LoadBalancerOps{ LBClient: fx.LBClient, CertOps: &CertificateOps{CertClient: fx.CertClient}, ActionClient: fx.ActionClient, NetworkClient: fx.NetworkClient, + RobotClient: fx.RobotClient, } return fx @@ -98,6 +105,28 @@ func (fx *LoadBalancerOpsFixture) MockRemoveServerTarget( return action } +func (fx *LoadBalancerOpsFixture) MockAddIPTarget( + lb *hcloud.LoadBalancer, opts hcloud.LoadBalancerAddIPTargetOpts, err error, +) *hcloud.Action { + action := &hcloud.Action{ID: rand.Int63()} + fx.LBClient.On("AddIPTarget", fx.Ctx, lb, opts).Return(action, nil, err) + return action +} + +func (fx *LoadBalancerOpsFixture) MockRemoveIPTarget( + lb *hcloud.LoadBalancer, ip net.IP, err error, +) *hcloud.Action { + action := &hcloud.Action{ID: rand.Int63()} + fx.LBClient.On("RemoveIPTarget", fx.Ctx, lb, ip).Return(action, nil, err) + return action +} + +func (fx *LoadBalancerOpsFixture) MockListRobotServers( + serverList []hrobotmodels.Server, err error, +) { + fx.RobotClient.On("ServerGetList").Return(serverList, err) +} + func (fx *LoadBalancerOpsFixture) MockWatchProgress(a *hcloud.Action, err error) { fx.ActionClient.MockWatchProgress(fx.Ctx, a, err) } diff --git a/internal/mocks/casts.go b/internal/mocks/casts.go index 8757463e137a11604dfb34c82370277018aa31c7..4a88c650bb762d657c96d82362412ae843bbfd9d 100644 --- a/internal/mocks/casts.go +++ b/internal/mocks/casts.go @@ -2,6 +2,7 @@ package mocks import ( "github.com/stretchr/testify/mock" + hrobotmodels "github.com/syself/hrobot-go/models" "github.com/hetznercloud/hcloud-go/v2/hcloud" ) @@ -38,6 +39,14 @@ func getLoadBalancerPtrS(args mock.Arguments, i int) []*hcloud.LoadBalancer { return v.([]*hcloud.LoadBalancer) } +func getRobotServers(args mock.Arguments, i int) []hrobotmodels.Server { + v := args.Get(i) + if v == nil { + return nil + } + return v.([]hrobotmodels.Server) +} + func getNetworkPtr(args mock.Arguments, i int) *hcloud.Network { v := args.Get(i) if v == nil { diff --git a/internal/mocks/loadbalancer.go b/internal/mocks/loadbalancer.go index 643da086775253af803d8d76c8cae0af8bd14e06..991071ef298c38d456bc3ccee90f8199ccd62ca7 100644 --- a/internal/mocks/loadbalancer.go +++ b/internal/mocks/loadbalancer.go @@ -2,6 +2,7 @@ package mocks import ( "context" + "net" "github.com/stretchr/testify/mock" @@ -90,6 +91,18 @@ func (m *LoadBalancerClient) RemoveServerTarget(ctx context.Context, lb *hcloud. return getActionPtr(args, 0), getResponsePtr(args, 1), args.Error(2) } +func (m *LoadBalancerClient) AddIPTarget( + ctx context.Context, lb *hcloud.LoadBalancer, opts hcloud.LoadBalancerAddIPTargetOpts, +) (*hcloud.Action, *hcloud.Response, error) { + args := m.Called(ctx, lb, opts) + return getActionPtr(args, 0), getResponsePtr(args, 1), args.Error(2) +} + +func (m *LoadBalancerClient) RemoveIPTarget(ctx context.Context, lb *hcloud.LoadBalancer, ip net.IP) (*hcloud.Action, *hcloud.Response, error) { + args := m.Called(ctx, lb, ip) + return getActionPtr(args, 0), getResponsePtr(args, 1), args.Error(2) +} + func (m *LoadBalancerClient) UpdateService( ctx context.Context, lb *hcloud.LoadBalancer, listenPort int, opts hcloud.LoadBalancerUpdateServiceOpts, ) (*hcloud.Action, *hcloud.Response, error) { diff --git a/internal/mocks/robot.go b/internal/mocks/robot.go new file mode 100644 index 0000000000000000000000000000000000000000..b6351faf2b52fec42c401e8fae607916bb9c8709 --- /dev/null +++ b/internal/mocks/robot.go @@ -0,0 +1,82 @@ +package mocks + +import ( + "github.com/stretchr/testify/mock" + hrobotmodels "github.com/syself/hrobot-go/models" +) + +type RobotClient struct { + mock.Mock +} + +func (m *RobotClient) ServerGetList() ([]hrobotmodels.Server, error) { + args := m.Called() + return getRobotServers(args, 0), args.Error(1) +} + +func (m *RobotClient) BootLinuxDelete(id int) (*hrobotmodels.Linux, error) { + panic("this method should not be called") +} +func (m *RobotClient) BootLinuxGet(id int) (*hrobotmodels.Linux, error) { + panic("this method should not be called") +} +func (m *RobotClient) BootLinuxSet(id int, input *hrobotmodels.LinuxSetInput) (*hrobotmodels.Linux, error) { + panic("this method should not be called") +} +func (m *RobotClient) BootRescueDelete(id int) (*hrobotmodels.Rescue, error) { + panic("this method should not be called") +} +func (m *RobotClient) BootRescueGet(id int) (*hrobotmodels.Rescue, error) { + panic("this method should not be called") +} +func (m *RobotClient) BootRescueSet(id int, input *hrobotmodels.RescueSetInput) (*hrobotmodels.Rescue, error) { + panic("this method should not be called") +} +func (m *RobotClient) FailoverGet(ip string) (*hrobotmodels.Failover, error) { + panic("this method should not be called") +} +func (m *RobotClient) FailoverGetList() ([]hrobotmodels.Failover, error) { + panic("this method should not be called") +} +func (m *RobotClient) GetVersion() string { + panic("this method should not be called") +} +func (m *RobotClient) IPGetList() ([]hrobotmodels.IP, error) { + panic("this method should not be called") +} +func (m *RobotClient) KeyGetList() ([]hrobotmodels.Key, error) { + panic("this method should not be called") +} +func (m *RobotClient) KeySet(input *hrobotmodels.KeySetInput) (*hrobotmodels.Key, error) { + panic("this method should not be called") +} +func (m *RobotClient) RDnsGet(ip string) (*hrobotmodels.Rdns, error) { + panic("this method should not be called") +} +func (m *RobotClient) RDnsGetList() ([]hrobotmodels.Rdns, error) { + panic("this method should not be called") +} +func (m *RobotClient) ResetGet(id int) (*hrobotmodels.Reset, error) { + panic("this method should not be called") +} +func (m *RobotClient) ResetSet(id int, input *hrobotmodels.ResetSetInput) (*hrobotmodels.ResetPost, error) { + panic("this method should not be called") +} +func (m *RobotClient) ServerGet(id int) (*hrobotmodels.Server, error) { + panic("this method should not be called") +} +func (m *RobotClient) ServerReverse(id int) (*hrobotmodels.Cancellation, error) { + panic("this method should not be called") +} +func (m *RobotClient) ServerSetName(id int, input *hrobotmodels.ServerSetNameInput) (*hrobotmodels.Server, error) { + panic("this method should not be called") +} +func (m *RobotClient) SetBaseURL(baseURL string) { + panic("this method should not be called") +} +func (m *RobotClient) SetUserAgent(userAgent string) { + panic("this method should not be called") +} +func (m *RobotClient) ValidateCredentials() error { + panic("this method should not be called") +} diff --git a/internal/providerid/providerid.go b/internal/providerid/providerid.go index ada5d69d763931719e227c86169b5e9a243e3b1b..53892bff90678f7936700dfc2970552288920523 100644 --- a/internal/providerid/providerid.go +++ b/internal/providerid/providerid.go @@ -7,30 +7,63 @@ import ( ) const ( - // providerPrefix is the prefix for all provider IDs. It MUST not be changed, - // otherwise existing nodes will not be recognized anymore. - providerPrefix = "hcloud://" + // prefixCloud is the prefix for Cloud Server provider IDs. + // + // It MUST not be changed, otherwise existing nodes will not be recognized anymore. + prefixCloud = "hcloud://" + + // prefixRobot is the prefix for Robot Server provider IDs. + // + // It MUST not be changed, otherwise existing nodes will not be recognized anymore. + prefixRobot = "hrobot://" + + // prefixRobot is the prefix used by the Syself Fork for Robot Server provider IDs. + // This Prefix is no longer used for new nodes, instead [prefixRobot] should be used. + // + // It MUST not be changed, otherwise existing nodes will not be recognized anymore. + prefixRobotLegacy = "hcloud://bm-" ) -// ToServerID converts a ProviderID to a server ID. -func ToServerID(providerID string) (int64, error) { - if !strings.HasPrefix(providerID, providerPrefix) { - return 0, fmt.Errorf("providerID does not have the expected prefix %s: %s", providerPrefix, providerID) +// ToServerID parses the Cloud or Robot Server ID from a ProviderID. +// +// This method supports all formats for the ProviderID that were ever used. +// If a format is ever dropped from this method the Nodes that still use that +// format will get abandoned and can no longer be processed by HCCM. +func ToServerID(providerID string) (id int64, isCloudServer bool, err error) { + idString := "" + switch { + case strings.HasPrefix(providerID, prefixRobot): + idString = strings.ReplaceAll(providerID, prefixRobot, "") + + case strings.HasPrefix(providerID, prefixRobotLegacy): + // This case needs to be before [prefixCloud], as [prefixCloud] is a superset of [prefixRobotLegacy] + idString = strings.ReplaceAll(providerID, prefixRobotLegacy, "") + + case strings.HasPrefix(providerID, prefixCloud): + isCloudServer = true + idString = strings.ReplaceAll(providerID, prefixCloud, "") + + default: + return 0, false, fmt.Errorf("providerID does not have one of the the expected prefixes (%s, %s, %s): %s", prefixCloud, prefixRobot, prefixRobotLegacy, providerID) } - idString := strings.ReplaceAll(providerID, providerPrefix, "") if idString == "" { - return 0, fmt.Errorf("providerID is missing a serverID: %s", providerID) + return 0, false, fmt.Errorf("providerID is missing a serverID: %s", providerID) } - id, err := strconv.ParseInt(idString, 10, 64) + id, err = strconv.ParseInt(idString, 10, 64) if err != nil { - return 0, fmt.Errorf("unable to parse server id: %s", providerID) + return 0, false, fmt.Errorf("unable to parse server id: %s", providerID) } - return id, nil + return id, isCloudServer, nil +} + +// FromCloudServerID generates the canonical ProviderID for a Cloud Server. +func FromCloudServerID(serverID int64) string { + return fmt.Sprintf("%s%d", prefixCloud, serverID) } -// FromServerID converts a server ID to a ProviderID. -func FromServerID(serverID int64) string { - return fmt.Sprintf("%s%d", providerPrefix, serverID) +// FromRobotServerNumber generates the canonical ProviderID for a Robot Server. +func FromRobotServerNumber(serverNumber int) string { + return fmt.Sprintf("%s%d", prefixRobot, serverNumber) } diff --git a/internal/providerid/providerid_test.go b/internal/providerid/providerid_test.go new file mode 100644 index 0000000000000000000000000000000000000000..eed44f7a442026988df06fcd9897890548cc5ff5 --- /dev/null +++ b/internal/providerid/providerid_test.go @@ -0,0 +1,216 @@ +package providerid + +import ( + "strings" + "testing" +) + +func TestFromCloudServerID(t *testing.T) { + tests := []struct { + name string + serverID int64 + want string + }{ + { + name: "simple id", + serverID: 1234, + want: "hcloud://1234", + }, + { + name: "large id", + serverID: 2251799813685247, + want: "hcloud://2251799813685247", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := FromCloudServerID(tt.serverID); got != tt.want { + t.Errorf("FromCloudServerID() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestFromRobotServerNumber(t *testing.T) { + tests := []struct { + name string + serverNumber int + want string + }{ + { + name: "simple id", + serverNumber: 4321, + want: "hrobot://4321", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := FromRobotServerNumber(tt.serverNumber); got != tt.want { + t.Errorf("FromRobotServerNumber() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestToServerID(t *testing.T) { + tests := []struct { + name string + providerID string + wantID int64 + wantIsCloudServer bool + wantErr bool + }{ + { + name: "[cloud] simple id", + providerID: "hcloud://1234", + wantID: 1234, + wantIsCloudServer: true, + wantErr: false, + }, + { + name: "[cloud] large id", + providerID: "hcloud://2251799813685247", + wantID: 2251799813685247, + wantIsCloudServer: true, + wantErr: false, + }, + { + name: "[cloud] invalid id", + providerID: "hcloud://my-cloud", + wantID: 0, + wantIsCloudServer: false, + wantErr: true, + }, + { + name: "[cloud] missing id", + providerID: "hcloud://", + wantID: 0, + wantIsCloudServer: false, + wantErr: true, + }, + { + name: "[robot] simple id", + providerID: "hrobot://4321", + wantID: 4321, + wantIsCloudServer: false, + wantErr: false, + }, + { + name: "[robot] invalid id", + providerID: "hrobot://my-robot", + wantID: 0, + wantIsCloudServer: false, + wantErr: true, + }, + { + name: "[robot] missing id", + providerID: "hrobot://", + wantID: 0, + wantIsCloudServer: false, + wantErr: true, + }, + { + name: "[robot-syself] simple id", + providerID: "hcloud://bm-4321", + wantID: 4321, + wantIsCloudServer: false, + wantErr: false, + }, + { + name: "[robot-syself] invalid id", + providerID: "hcloud://bm-my-robot", + wantID: 0, + wantIsCloudServer: false, + wantErr: true, + }, + { + name: "[robot-syself] missing id", + providerID: "hcloud://bm-", + wantID: 0, + wantIsCloudServer: false, + wantErr: true, + }, + { + name: "unknown format", + providerID: "foobar/321", + wantID: 0, + wantIsCloudServer: false, + wantErr: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + gotID, gotIsCloudServer, err := ToServerID(tt.providerID) + if (err != nil) != tt.wantErr { + t.Errorf("ToServerID() error = %v, wantErr %v", err, tt.wantErr) + return + } + if gotID != tt.wantID { + t.Errorf("ToServerID() gotID = %v, want %v", gotID, tt.wantID) + } + if gotIsCloudServer != tt.wantIsCloudServer { + t.Errorf("ToServerID() gotIsCloudServer = %v, want %v", gotIsCloudServer, tt.wantIsCloudServer) + } + }) + } +} + +func FuzzRoundTripCloud(f *testing.F) { + f.Add(int64(123123123)) + + f.Fuzz(func(t *testing.T, serverID int64) { + providerID := FromCloudServerID(serverID) + id, isCloudServer, err := ToServerID(providerID) + if err != nil { + t.Fatal(err) + } + if id != serverID { + t.Fatalf("expected %d, got %d", serverID, id) + } + if !isCloudServer { + t.Fatalf("expected %t, got %t", true, isCloudServer) + } + }) +} + +func FuzzRoundTripRobot(f *testing.F) { + f.Add(123123123) + + f.Fuzz(func(t *testing.T, serverNumber int) { + providerID := FromRobotServerNumber(serverNumber) + id, isCloudServer, err := ToServerID(providerID) + if err != nil { + t.Fatal(err) + } + if int(id) != serverNumber { + t.Fatalf("expected %d, got %d", serverNumber, id) + } + if isCloudServer { + t.Fatalf("expected %t, got %t", false, isCloudServer) + } + }) +} + +func FuzzToServerId(f *testing.F) { + f.Add("hcloud://123123123") + f.Add("hrobot://123123123") + f.Add("hcloud://bm-123123123") + + f.Fuzz(func(t *testing.T, providerID string) { + _, _, err := ToServerID(providerID) + if err != nil { + if strings.HasPrefix(err.Error(), "providerID does not have one of the the expected prefixes") { + return + } + if strings.HasPrefix(err.Error(), "providerID is missing a serverID") { + return + } + if strings.HasPrefix(err.Error(), "unable to parse server id") { + return + } + + t.Fatal(err) + } + }) +} diff --git a/internal/robot/client.go b/internal/robot/client.go new file mode 100644 index 0000000000000000000000000000000000000000..aa3522bf2c5fe57a3ce9fe5bcdbe4cd2c66fe945 --- /dev/null +++ b/internal/robot/client.go @@ -0,0 +1,90 @@ +package robot + +import ( + "sync" + "time" + + hrobot "github.com/syself/hrobot-go" + hrobotmodels "github.com/syself/hrobot-go/models" +) + +type cacheRobotClient struct { + robotClient hrobot.RobotClient + timeout time.Duration + + lastUpdate time.Time + // mutex is necessary to synchronize parallel access to the cache + mutex sync.Mutex + + // cache + servers []hrobotmodels.Server + serversByID map[int]*hrobotmodels.Server +} + +func NewClient(robotClient hrobot.RobotClient, cacheTimeout time.Duration) Client { + return &cacheRobotClient{ + timeout: cacheTimeout, + robotClient: robotClient, + + serversByID: make(map[int]*hrobotmodels.Server), + } +} + +func (c *cacheRobotClient) ServerGet(id int) (*hrobotmodels.Server, error) { + c.mutex.Lock() + defer c.mutex.Unlock() + + if err := c.updateCacheIfNecessary(); err != nil { + return nil, err + } + + server, found := c.serversByID[id] + if !found { + // return not found error + return nil, hrobotmodels.Error{Code: hrobotmodels.ErrorCodeServerNotFound, Message: "server not found"} + } + + return server, nil +} + +func (c *cacheRobotClient) ServerGetList() ([]hrobotmodels.Server, error) { + c.mutex.Lock() + defer c.mutex.Unlock() + + if err := c.updateCacheIfNecessary(); err != nil { + return nil, err + } + + return c.servers, nil +} + +// Make sure to lock the mutext before calling updateCacheIfNecessary. +func (c *cacheRobotClient) updateCacheIfNecessary() error { + nextUpdate := c.lastUpdate.Add(c.timeout) + if time.Now().Before(nextUpdate) { + return nil + } + + servers, err := c.robotClient.ServerGetList() + if err != nil { + return err + } + + // populate servers + c.servers = servers + + // remove all entries from map and populate it freshly + c.serversByID = make(map[int]*hrobotmodels.Server) + for i, server := range servers { + c.serversByID[server.ServerNumber] = &servers[i] + } + + // set time of last update + c.lastUpdate = time.Now() + return nil +} + +// ResetGet does not use the cache, as we need up to date information for its function. +func (c *cacheRobotClient) ResetGet(id int) (*hrobotmodels.Reset, error) { + return c.robotClient.ResetGet(id) +} diff --git a/internal/robot/interface.go b/internal/robot/interface.go new file mode 100644 index 0000000000000000000000000000000000000000..64e3d934ce4119887f48afde5940d83dc85f90a6 --- /dev/null +++ b/internal/robot/interface.go @@ -0,0 +1,11 @@ +package robot + +import ( + hrobotmodels "github.com/syself/hrobot-go/models" +) + +type Client interface { + ServerGet(id int) (*hrobotmodels.Server, error) + ServerGetList() ([]hrobotmodels.Server, error) + ResetGet(id int) (*hrobotmodels.Reset, error) +} diff --git a/skaffold.yaml b/skaffold.yaml index 049727d5132dce0e10f5d75a613815695456643e..5e407e575e71c1a1dc207a678cd60496de03332d 100644 --- a/skaffold.yaml +++ b/skaffold.yaml @@ -22,3 +22,14 @@ manifests: chartPath: chart setValues: networking.enabled: true + +profiles: + # Clusters with Robot Servers do not support the native Routing functionality right now. + - name: robot + patches: + - op: replace + path: /manifests/helm/releases/0/setValues/networking.enabled + value: false + - op: add + path: /manifests/helm/releases/0/setValues/robot.enabled + value: true diff --git a/tests/e2e/cloud_test.go b/tests/e2e/cloud_test.go index d18ab167f1f8a183ceac1f1247560f792016b4ec..ac2b7fdf694c75d84d3687d1205c9dd47f8ae8c1 100644 --- a/tests/e2e/cloud_test.go +++ b/tests/e2e/cloud_test.go @@ -1,4 +1,4 @@ -//go:build e2e +//go:build e2e && !robot package e2e diff --git a/tests/e2e/robot_test.go b/tests/e2e/robot_test.go new file mode 100644 index 0000000000000000000000000000000000000000..58de11015aabb800b8e85902d851be05a66d5af9 --- /dev/null +++ b/tests/e2e/robot_test.go @@ -0,0 +1,93 @@ +//go:build e2e && robot + +package e2e + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "github.com/hetznercloud/hcloud-cloud-controller-manager/internal/annotation" + "github.com/hetznercloud/hcloud-cloud-controller-manager/internal/providerid" +) + +func TestRobotClientIsAvailable(t *testing.T) { + assert.NotNil(t, testCluster.hrobot) +} + +func TestNodeSetCorrectNodeLabelsAndIPAddressesRobot(t *testing.T) { + t.Parallel() + ctx := context.Background() + + // Get a random Robot server from all Nodes in the cluster + nodes, err := testCluster.k8sClient.CoreV1().Nodes().List(ctx, metav1.ListOptions{ + LabelSelector: "instance.hetzner.cloud/is-root-server=true", + }) + assert.NoError(t, err) + assert.GreaterOrEqual(t, len(nodes.Items), 1) + node := nodes.Items[0] + + // Parse the server number from the ProviderID + id, isCloudServer, err := providerid.ToServerID(node.Spec.ProviderID) + assert.NoError(t, err) + assert.False(t, isCloudServer) + + // Get the server from the Robot API to cross-check Labels + server, err := testCluster.hrobot.ServerGet(int(id)) + assert.NoError(t, err) + + labels := node.Labels + expectedLabels := map[string]string{ + "kubernetes.io/hostname": server.Name, + "kubernetes.io/os": "linux", + "kubernetes.io/arch": "amd64", + } + for expectedLabel, expectedValue := range expectedLabels { + assert.Equal(t, expectedValue, labels[expectedLabel], "node does not have expected label %s", expectedLabel) + } + + expectedLabelsSet := []string{ + "node.kubernetes.io/instance-type", + "topology.kubernetes.io/region", + "topology.kubernetes.io/zone", + } + for _, expectedLabel := range expectedLabelsSet { + _, ok := labels[expectedLabel] + assert.True(t, ok, "node is missing expected label %s", expectedLabel) + } + + for _, address := range node.Status.Addresses { + if address.Type == corev1.NodeExternalIP { + expectedIP := server.ServerIP + assert.Equal(t, expectedIP, address.Address, "node has unexpected external ip") + } + } +} + +func TestServiceLoadBalancersRobot(t *testing.T) { + t.Parallel() + + lbTest := lbTestHelper{ + t: t, + K8sClient: testCluster.k8sClient, + podName: "loadbalancer-robot-only", + } + + pod := lbTest.DeployTestPod() + + lbSvc := lbTest.ServiceDefinition(pod, map[string]string{ + string(annotation.LBLocation): "nbg1", + // Only add the Robot server as a Load Balancer target + string(annotation.LBNodeSelector): "instance.hetzner.cloud/is-root-server=true", + }) + + lbSvc, err := lbTest.CreateService(lbSvc) + assert.NoError(t, err) + + WaitForHTTPAvailable(t, lbSvc.Status.LoadBalancer.Ingress[0].IP, false) + + lbTest.TearDown() +} diff --git a/tests/e2e/testing.go b/tests/e2e/testing.go index ddf02a4dc35e76d2296d3aba8e146fbe60fbc3fb..6bc232351e09e35bbe25529fa5b7e0bb3c936956 100644 --- a/tests/e2e/testing.go +++ b/tests/e2e/testing.go @@ -14,6 +14,7 @@ import ( "testing" "time" + hrobot "github.com/syself/hrobot-go" corev1 "k8s.io/api/core/v1" k8serrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -35,6 +36,7 @@ func init() { type TestCluster struct { hcloud *hcloud.Client + hrobot hrobot.RobotClient k8sClient *kubernetes.Clientset certificates []*hcloud.Certificate scope string @@ -48,6 +50,7 @@ func (tc *TestCluster) Start() error { } tc.scope = scopeButcher.ReplaceAllString(tc.scope, "-") + // Hetzner Cloud Client token := os.Getenv("HCLOUD_TOKEN") if token == "" { buf, err := os.ReadFile(fmt.Sprintf("../../hack/.token-%s", tc.scope)) @@ -68,6 +71,13 @@ func (tc *TestCluster) Start() error { hcloudClient := hcloud.NewClient(opts...) tc.hcloud = hcloudClient + // Hetzner Robot Client + if enabled := os.Getenv("ROBOT_ENABLED"); enabled == "true" { + robotUser := os.Getenv("ROBOT_USER") + robotPassword := os.Getenv("ROBOT_PASSWORD") + tc.hrobot = hrobot.NewBasicAuthClient(robotUser, robotPassword) + } + err := os.Setenv("KUBECONFIG", "../../hack/.kubeconfig-"+tc.scope) if err != nil { return err