inference: Fix failing llama_config unit tests

Signed-off-by: Piotr Stankiewicz <piotr.stankiewicz@docker.com>
This commit is contained in:
Piotr Stankiewicz 2025-07-15 16:39:43 +02:00
parent ca187f9908
commit 7d39c7624c
2 changed files with 6 additions and 1 deletions

View File

@ -9,10 +9,11 @@ import (
"runtime"
"strconv"
"strings"
"time"
)
// getVRAMSize returns total system GPU memory in bytes
func getVRAMSize(ctx context.Context, modelRuntimeInstallPath string) (uint64, error) {
func getVRAMSize(modelRuntimeInstallPath string) (uint64, error) {
if runtime.GOARCH == "arm64" {
// TODO(p1-0tr): For now, on windows/arm64, stick to the old behaviour. This will
// require backend.GetRequiredMemoryForModel to return 1 as well.
@ -21,6 +22,7 @@ func getVRAMSize(ctx context.Context, modelRuntimeInstallPath string) (uint64, e
nvGPUInfoBin := filepath.Join(modelRuntimeInstallPath, "com.docker.nv-gpu-info.exe")
ctx, _ := context.WithTimeout(context.Background(), 30*time.Second)
cmd := exec.CommandContext(ctx, nvGPUInfoBin)
out, err := cmd.CombinedOutput()
if err != nil {

View File

@ -91,6 +91,7 @@ func TestGetArgs(t *testing.T) {
"--metrics",
"--model", modelPath,
"--host", socket,
"--ctx-size", "4096",
},
},
{
@ -106,6 +107,7 @@ func TestGetArgs(t *testing.T) {
"--model", modelPath,
"--host", socket,
"--embeddings",
"--ctx-size", "4096",
},
},
{
@ -165,6 +167,7 @@ func TestGetArgs(t *testing.T) {
"--model", modelPath,
"--host", socket,
"--embeddings",
"--ctx-size", "4096",
"--some", "flag", // model config takes precedence
},
},