inference: Fix failing llama_config unit tests
Signed-off-by: Piotr Stankiewicz <piotr.stankiewicz@docker.com>
This commit is contained in:
parent
ca187f9908
commit
7d39c7624c
|
|
@ -9,10 +9,11 @@ import (
|
|||
"runtime"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// getVRAMSize returns total system GPU memory in bytes
|
||||
func getVRAMSize(ctx context.Context, modelRuntimeInstallPath string) (uint64, error) {
|
||||
func getVRAMSize(modelRuntimeInstallPath string) (uint64, error) {
|
||||
if runtime.GOARCH == "arm64" {
|
||||
// TODO(p1-0tr): For now, on windows/arm64, stick to the old behaviour. This will
|
||||
// require backend.GetRequiredMemoryForModel to return 1 as well.
|
||||
|
|
@ -21,6 +22,7 @@ func getVRAMSize(ctx context.Context, modelRuntimeInstallPath string) (uint64, e
|
|||
|
||||
nvGPUInfoBin := filepath.Join(modelRuntimeInstallPath, "com.docker.nv-gpu-info.exe")
|
||||
|
||||
ctx, _ := context.WithTimeout(context.Background(), 30*time.Second)
|
||||
cmd := exec.CommandContext(ctx, nvGPUInfoBin)
|
||||
out, err := cmd.CombinedOutput()
|
||||
if err != nil {
|
||||
|
|
|
|||
|
|
@ -91,6 +91,7 @@ func TestGetArgs(t *testing.T) {
|
|||
"--metrics",
|
||||
"--model", modelPath,
|
||||
"--host", socket,
|
||||
"--ctx-size", "4096",
|
||||
},
|
||||
},
|
||||
{
|
||||
|
|
@ -106,6 +107,7 @@ func TestGetArgs(t *testing.T) {
|
|||
"--model", modelPath,
|
||||
"--host", socket,
|
||||
"--embeddings",
|
||||
"--ctx-size", "4096",
|
||||
},
|
||||
},
|
||||
{
|
||||
|
|
@ -165,6 +167,7 @@ func TestGetArgs(t *testing.T) {
|
|||
"--model", modelPath,
|
||||
"--host", socket,
|
||||
"--embeddings",
|
||||
"--ctx-size", "4096",
|
||||
"--some", "flag", // model config takes precedence
|
||||
},
|
||||
},
|
||||
|
|
|
|||
Loading…
Reference in New Issue