inference: Fix failing llama_config unit tests

Signed-off-by: Piotr Stankiewicz <piotr.stankiewicz@docker.com>
2025-07-15 16:39:43 +02:00 · 2025-07-15 16:39:43 +02:00 · 7d39c7624c
parent ca187f9908
commit 7d39c7624c
2 changed files with 6 additions and 1 deletions
--- a/pkg/gpuinfo/memory_windows.go
+++ b/pkg/gpuinfo/memory_windows.go
@ -9,10 +9,11 @@ import (
 	"runtime"
 	"strconv"
 	"strings"
+	"time"
 )

 // getVRAMSize returns total system GPU memory in bytes
-func getVRAMSize(ctx context.Context, modelRuntimeInstallPath string) (uint64, error) {
+func getVRAMSize(modelRuntimeInstallPath string) (uint64, error) {
 	if runtime.GOARCH == "arm64" {
 		// TODO(p1-0tr): For now, on windows/arm64, stick to the old behaviour. This will
 		// require backend.GetRequiredMemoryForModel to return 1 as well.
@ -21,6 +22,7 @@ func getVRAMSize(ctx context.Context, modelRuntimeInstallPath string) (uint64, e

 	nvGPUInfoBin := filepath.Join(modelRuntimeInstallPath, "com.docker.nv-gpu-info.exe")

+	ctx, _ := context.WithTimeout(context.Background(), 30*time.Second)
 	cmd := exec.CommandContext(ctx, nvGPUInfoBin)
 	out, err := cmd.CombinedOutput()
 	if err != nil {
--- a/pkg/inference/backends/llamacpp/llamacpp_config_test.go
+++ b/pkg/inference/backends/llamacpp/llamacpp_config_test.go
@ -91,6 +91,7 @@ func TestGetArgs(t *testing.T) {
 				"--metrics",
 				"--model", modelPath,
 				"--host", socket,
+				"--ctx-size", "4096",
 			},
 		},
 		{
@ -106,6 +107,7 @@ func TestGetArgs(t *testing.T) {
 				"--model", modelPath,
 				"--host", socket,
 				"--embeddings",
+				"--ctx-size", "4096",
 			},
 		},
 		{
@ -165,6 +167,7 @@ func TestGetArgs(t *testing.T) {
 				"--model", modelPath,
 				"--host", socket,
 				"--embeddings",
+				"--ctx-size", "4096",
 				"--some", "flag", // model config takes precedence
 			},
 		},