From 59d4f65480f3779c9a2f35ae37a3da257fd120c5 Mon Sep 17 00:00:00 2001 From: Sheogorath <sheogorath@shivering-isles.com> Date: Sun, 26 Mar 2023 21:40:33 +0200 Subject: [PATCH] fix(drivers): Downgrade amd-gpu driver deployment to version 1.18.0 Due to a currently unsolved issue, the device plugin reports 0 available AMD GPUs on system that have fully functional GPUs ready to go. This change in behaviour appeared when upgrading from Kubernetes 1.24.11 to 1.25.8 and is currently unclear. References: https://github.com/RadeonOpenCompute/k8s-device-plugin/issues/31 --- infrastructure/drivers/amd-gpu.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/infrastructure/drivers/amd-gpu.yaml b/infrastructure/drivers/amd-gpu.yaml index 000f78a8c..09c6e7216 100644 --- a/infrastructure/drivers/amd-gpu.yaml +++ b/infrastructure/drivers/amd-gpu.yaml @@ -25,6 +25,11 @@ spec: version: 0.6.0 interval: 5m values: + nfd: + enabled: true + dp: + image: + tag: 1.18.0 namespace: drivers-system securityContext: allowPrivilegeEscalation: false -- GitLab