production-stack/tutorials/assets/values-01-minimal-amd-examp...

29 lines
630 B
YAML

servingEngineSpec:
runtimeClassName: ""
modelSpec:
- name: "opt125m"
repository: "rocm/vllm"
tag: "instinct_main"
modelURL: facebook/opt-125m
replicaCount: 1
requestCPU: 6
requestMemory: "16Gi"
requestGPU: 1
requestGPUType: "amd.com/gpu"
hf_token: <YOUR HF TOKEN>
nodeSelectorTerms:
- matchExpressions:
- key: "your-node-label"
operator: "In"
values: ["your-node-label-value"]
tolerations:
- key: "your-node-taint"
operator: "Equal"
value: "your-node-taint-value"
effect: "NoSchedule"