inference: Fix nv-gpu-info path and wrap errors

Signed-off-by: Piotr Stankiewicz <piotr.stankiewicz@docker.com>
This commit is contained in:
Piotr Stankiewicz 2025-07-17 13:38:35 +02:00 committed by Piotr
parent ecc3f8dde4
commit 263e4c7732
3 changed files with 10 additions and 5 deletions

View File

@ -20,7 +20,7 @@ func getVRAMSize(modelRuntimeInstallPath string) (uint64, error) {
return 1, nil
}
nvGPUInfoBin := filepath.Join(modelRuntimeInstallPath, "com.docker.nv-gpu-info.exe")
nvGPUInfoBin := filepath.Join(modelRuntimeInstallPath, "bin", "com.docker.nv-gpu-info.exe")
ctx, _ := context.WithTimeout(context.Background(), 30*time.Second)
cmd := exec.CommandContext(ctx, nvGPUInfoBin)

View File

@ -226,19 +226,19 @@ func (l *llamaCpp) GetDiskUsage() (int64, error) {
func (l *llamaCpp) GetRequiredMemoryForModel(model string, config *inference.BackendConfiguration) (*inference.RequiredMemory, error) {
mdl, err := l.modelManager.GetModel(model)
if err != nil {
return nil, err
return nil, fmt.Errorf("getting model(%s): %w", model, err)
}
mdlPath, err := mdl.GGUFPath()
if err != nil {
return nil, err
return nil, fmt.Errorf("getting gguf path for model(%s): %w", model, err)
}
mdlGguf, err := parser.ParseGGUFFile(mdlPath)
if err != nil {
return nil, err
return nil, fmt.Errorf("parsing gguf(%s): %w", mdlPath, err)
}
mdlConfig, err := mdl.Config()
if err != nil {
return nil, err
return nil, fmt.Errorf("accessing model(%s) config: %w", model, err)
}
contextSize := GetContextSize(&mdlConfig, config)

View File

@ -145,9 +145,12 @@ func newLoader(
}
// Compute the amount of available memory.
// TODO(p1-0tr): improve error handling
vramSize, err := gpuInfo.GetVRAMSize()
if err != nil {
log.Warnf("Could not read VRAM size: %s", err)
} else {
log.Infof("Running on system with %dMB VRAM", vramSize/1024.0/1024.0)
}
hostInfo, err := sysinfo.Host()
if err != nil {
@ -156,6 +159,8 @@ func newLoader(
ramSize, err := hostInfo.Memory()
if err != nil {
log.Warnf("Could not read host RAM size: %s", err)
} else {
log.Infof("Running on system with %dMB RAM", ramSize.Total/1024.0/1024.0)
}
totalMemory := memory{