Fix no-schedule issue for the GPU operator

2021-02-25 11:28:22 +01:00 · 2021-02-25 11:28:22 +01:00 · d196efcb82
parent 7759c754cb
commit d196efcb82
1 changed files with 18 additions and 1 deletions
--- a/docs/gpu.md
+++ b/docs/gpu.md
@ -41,22 +41,37 @@ In order to match the kOps environment, create a `values.yaml` file with the fol
 operator:
  nodeSelector:
    kops.k8s.io/instancegroup: gpu-nodes
+  tolerations:
+  - key: nvidia.com/gpu
+    operator: Exists

 driver:
  nodeSelector:
    kops.k8s.io/instancegroup: gpu-nodes
+  tolerations:
+  - key: nvidia.com/gpu
+    operator: Exists

 toolkit:
  nodeSelector:
    kops.k8s.io/instancegroup: gpu-nodes
+  tolerations:
+  - key: nvidia.com/gpu
+    operator: Exists

 devicePlugin:
  nodeSelector:
    kops.k8s.io/instancegroup: gpu-nodes
+  tolerations:
+  - key: nvidia.com/gpu
+    operator: Exists

 dcgmExporter:
  nodeSelector:
    kops.k8s.io/instancegroup: gpu-nodes
+  tolerations:
+  - key: nvidia.com/gpu
+    operator: Exists

 gfd:
  nodeSelector:
@ -64,12 +79,14 @@ gfd:
  tolerations:
  - key: nvidia.com/gpu
    operator: Exists
-    effect: NoSchedule

 node-feature-discovery:
  worker:
    nodeSelector:
      kops.k8s.io/instancegroup: gpu-nodes
+    tolerations:
+    - key: nvidia.com/gpu
+      operator: Exists
 ```

 Once you have installed the the _helm chart_ you should be able to see the GPU operator resources being spawned in the `gpu-operator-resources` namespace.