From 59d4f65480f3779c9a2f35ae37a3da257fd120c5 Mon Sep 17 00:00:00 2001
From: Sheogorath <sheogorath@shivering-isles.com>
Date: Sun, 26 Mar 2023 21:40:33 +0200
Subject: [PATCH] fix(drivers): Downgrade amd-gpu driver deployment to version
 1.18.0

Due to a currently unsolved issue, the device plugin reports 0 available
AMD GPUs on system that have fully functional GPUs ready to go. This
change in behaviour appeared when upgrading from Kubernetes 1.24.11 to
1.25.8 and is currently unclear.

References:
https://github.com/RadeonOpenCompute/k8s-device-plugin/issues/31
---
 infrastructure/drivers/amd-gpu.yaml | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/infrastructure/drivers/amd-gpu.yaml b/infrastructure/drivers/amd-gpu.yaml
index 000f78a8c..09c6e7216 100644
--- a/infrastructure/drivers/amd-gpu.yaml
+++ b/infrastructure/drivers/amd-gpu.yaml
@@ -25,6 +25,11 @@ spec:
       version: 0.6.0
   interval: 5m
   values:
+    nfd:
+      enabled: true
+    dp:
+      image:
+        tag: 1.18.0
     namespace: drivers-system
     securityContext:
       allowPrivilegeEscalation: false
-- 
GitLab