Use nv-gpu-info on Windows to get VRAM size

Signed-off-by: Piotr Stankiewicz <piotr.stankiewicz@docker.com>
2025-07-14 15:12:05 +02:00 · 2025-07-14 15:12:05 +02:00 · ea3bb71830
parent 6e096b2caa
commit ea3bb71830
7 changed files with 38 additions and 103 deletions
--- a/main.go
+++ b/main.go
@ -90,7 +90,7 @@ func main() {
 		log.Fatalf("unable to initialize %s backend: %v", llamacpp.Name, err)
 	}

-	gpuInfo := gpuinfo.New()
+	gpuInfo := gpuinfo.New(llamaServerPath)

 	scheduler := scheduling.NewScheduler(
 		log,
--- a/pkg/gpuinfo/gpuinfo.go
+++ b/pkg/gpuinfo/gpuinfo.go
@ -1,11 +1,17 @@
 package gpuinfo

-type GPUInfo struct{}
+type GPUInfo struct {
+	// modelRuntimeInstallPath is the location where DMR installed it's llama-server
+	// and accompanying tools
+	modelRuntimeInstallPath string
+}

-func New() *GPUInfo {
-	return &GPUInfo{}
+func New(modelRuntimeInstallPath string) *GPUInfo {
+	return &GPUInfo{
+		modelRuntimeInstallPath: modelRuntimeInstallPath,
+	}
 }

 func (g *GPUInfo) GetVRAMSize() (uint64, error) {
-	return getVRAMSize()
+	return getVRAMSize(g.modelRuntimeInstallPath)
 }
--- a/pkg/gpuinfo/memory_darwin.go
+++ b/pkg/gpuinfo/memory_darwin.go
@ -8,7 +8,7 @@ import "C"
 import "errors"

 // getVRAMSize returns total system GPU memory in bytes
-func getVRAMSize() (uint64, error) {
+func getVRAMSize(_ string) (uint64, error) {
 	vramSize := C.getVRAMSize()
 	if vramSize == 0 {
 		return 0, errors.New("could not get metal VRAM size")
--- a/pkg/gpuinfo/memory_linux.go
+++ b/pkg/gpuinfo/memory_linux.go
@ -8,7 +8,7 @@ import "C"
 import "errors"

 // getVRAMSize returns total system GPU memory in bytes
-func getVRAMSize() (uint64, error) {
+func getVRAMSize(_ string) (uint64, error) {
 	vramSize := C.getVRAMSize()
 	if vramSize == 0 {
 		return 0, errors.New("could not get nvidia VRAM size")
--- a/pkg/gpuinfo/memory_windows.go
+++ b/pkg/gpuinfo/memory_windows.go
@ -1,16 +1,31 @@
 package gpuinfo

-/*
-#include "nvapi.h"
-*/
-import "C"
-import "errors"
+import (
+	"bufio"
+	"context"
+	"errors"
+	"os/exec"
+	"path/filepath"
+	"strconv"
+	"strings"
+)

 // getVRAMSize returns total system GPU memory in bytes
-func getVRAMSize() (uint64, error) {
-	vramSize := C.getVRAMSize()
-	if vramSize == 0 {
-		return 0, errors.New("could not get nvapi VRAM size")
+func getVRAMSize(ctx context.Context, modelRuntimeInstallPath string) (uint64, error) {
+	nvGPUInfoBin := filepath.Join(modelRuntimeInstallPath, "com.docker.nv-gpu-info.exe")
+
+	cmd := exec.CommandContext(ctx, nvGPUInfoBin)
+	out, err := cmd.CombinedOutput()
+	if err != nil {
+		return 0, err
 	}
-	return uint64(vramSize), nil
+	sc := bufio.NewScanner(strings.NewReader(string(out)))
+	for sc.Scan() {
+		vram, found := strings.CutPrefix(sc.Text(), "GPU[0]: dedicated memory:")
+		if found {
+			vram = strings.TrimSpace(vram)
+			return strconv.ParseUint(vram, 10, 64)
+		}
+	}
+	return 0, errors.New("unexpected nv-gpu-info output format")
 }
--- a/pkg/gpuinfo/nvapi.c
+++ b/pkg/gpuinfo/nvapi.c
@ -1,80 +0,0 @@
-// +build windows
-
-#include "nvapi.h"
-
-typedef enum {
-    NVAPI_OK = 0
-} NvAPI_Status;
-
-typedef unsigned int NvU32;
-typedef struct {
-    NvU32 version;
-    NvU32 dedicatedVideoMemory;
-    NvU32 availableDedicatedVideoMemory;
-    NvU32 systemVideoMemory;
-    NvU32 sharedSystemMemory;
-} NV_DISPLAY_DRIVER_MEMORY_INFO;
-
-typedef void* NvPhysicalGpuHandle;
-
-#define NV_DISPLAY_DRIVER_MEMORY_INFO_VER 0x10028
-
-size_t getVRAMSize() {
-    HMODULE handle;
-    NvAPI_Status (*NvAPI_Initialize)(void);
-    NvAPI_Status (*NvAPI_EnumPhysicalGPUs)(NvPhysicalGpuHandle* handles, NvU32* count);
-    NvAPI_Status (*NvAPI_GPU_GetMemoryInfo)(NvPhysicalGpuHandle handle, NV_DISPLAY_DRIVER_MEMORY_INFO* memInfo);
-    NvAPI_Status (*NvAPI_Unload)(void);
-    
-    NvAPI_Status status;
-    NvPhysicalGpuHandle handles[64];
-    NvU32 count = 0;
-    NV_DISPLAY_DRIVER_MEMORY_INFO memInfo;
-    
-    // Try to load nvapi64.dll first, then fallback to nvapi.dll
-    handle = LoadLibraryA("nvapi64.dll");
-    if (!handle) {
-        handle = LoadLibraryA("nvapi.dll");
-        if (!handle) {
-            return 0;
-        }
-    }
-    
-    // Load required functions
-    NvAPI_Initialize = (NvAPI_Status(*)(void))GetProcAddress(handle, "NvAPI_Initialize");
-    NvAPI_EnumPhysicalGPUs = (NvAPI_Status(*)(NvPhysicalGpuHandle*, NvU32*))GetProcAddress(handle, "NvAPI_EnumPhysicalGPUs");
-    NvAPI_GPU_GetMemoryInfo = (NvAPI_Status(*)(NvPhysicalGpuHandle, NV_DISPLAY_DRIVER_MEMORY_INFO*))GetProcAddress(handle, "NvAPI_GPU_GetMemoryInfo");
-    NvAPI_Unload = (NvAPI_Status(*)(void))GetProcAddress(handle, "NvAPI_Unload");
-    
-    if (!NvAPI_Initialize || !NvAPI_EnumPhysicalGPUs || !NvAPI_GPU_GetMemoryInfo || !NvAPI_Unload) {
-        FreeLibrary(handle);
-        return 0;
-    }
-    
-    status = NvAPI_Initialize();
-    if (status != NVAPI_OK) {
-        FreeLibrary(handle);
-        return 0;
-    }
-    
-    status = NvAPI_EnumPhysicalGPUs(handles, &count);
-    if (status != NVAPI_OK || count == 0) {
-        NvAPI_Unload();
-        FreeLibrary(handle);
-        return 0;
-    }
-    
-    memInfo.version = NV_DISPLAY_DRIVER_MEMORY_INFO_VER;
-    status = NvAPI_GPU_GetMemoryInfo(handles[0], &memInfo);
-    if (status != NVAPI_OK) {
-        NvAPI_Unload();
-        FreeLibrary(handle);
-        return 0;
-    }
-    
-    NvAPI_Unload();
-    FreeLibrary(handle);
-    
-    // Return dedicated video memory in bytes (convert from KB)
-    return (size_t)memInfo.dedicatedVideoMemory * 1024;
-}
--- a/pkg/gpuinfo/nvapi.h
+++ b/pkg/gpuinfo/nvapi.h
@ -1,6 +0,0 @@
-// +build windows
-
-#include <stddef.h>
-#include <windows.h>
-
-size_t getVRAMSize();