Use nv-gpu-info on Windows to get VRAM size

Signed-off-by: Piotr Stankiewicz <piotr.stankiewicz@docker.com>
This commit is contained in:
Piotr Stankiewicz 2025-07-14 15:12:05 +02:00 committed by Piotr
parent 6e096b2caa
commit ea3bb71830
7 changed files with 38 additions and 103 deletions

View File

@ -90,7 +90,7 @@ func main() {
log.Fatalf("unable to initialize %s backend: %v", llamacpp.Name, err)
}
gpuInfo := gpuinfo.New()
gpuInfo := gpuinfo.New(llamaServerPath)
scheduler := scheduling.NewScheduler(
log,

View File

@ -1,11 +1,17 @@
package gpuinfo
type GPUInfo struct{}
type GPUInfo struct {
// modelRuntimeInstallPath is the location where DMR installed it's llama-server
// and accompanying tools
modelRuntimeInstallPath string
}
func New() *GPUInfo {
return &GPUInfo{}
func New(modelRuntimeInstallPath string) *GPUInfo {
return &GPUInfo{
modelRuntimeInstallPath: modelRuntimeInstallPath,
}
}
func (g *GPUInfo) GetVRAMSize() (uint64, error) {
return getVRAMSize()
return getVRAMSize(g.modelRuntimeInstallPath)
}

View File

@ -8,7 +8,7 @@ import "C"
import "errors"
// getVRAMSize returns total system GPU memory in bytes
func getVRAMSize() (uint64, error) {
func getVRAMSize(_ string) (uint64, error) {
vramSize := C.getVRAMSize()
if vramSize == 0 {
return 0, errors.New("could not get metal VRAM size")

View File

@ -8,7 +8,7 @@ import "C"
import "errors"
// getVRAMSize returns total system GPU memory in bytes
func getVRAMSize() (uint64, error) {
func getVRAMSize(_ string) (uint64, error) {
vramSize := C.getVRAMSize()
if vramSize == 0 {
return 0, errors.New("could not get nvidia VRAM size")

View File

@ -1,16 +1,31 @@
package gpuinfo
/*
#include "nvapi.h"
*/
import "C"
import "errors"
import (
"bufio"
"context"
"errors"
"os/exec"
"path/filepath"
"strconv"
"strings"
)
// getVRAMSize returns total system GPU memory in bytes
func getVRAMSize() (uint64, error) {
vramSize := C.getVRAMSize()
if vramSize == 0 {
return 0, errors.New("could not get nvapi VRAM size")
func getVRAMSize(ctx context.Context, modelRuntimeInstallPath string) (uint64, error) {
nvGPUInfoBin := filepath.Join(modelRuntimeInstallPath, "com.docker.nv-gpu-info.exe")
cmd := exec.CommandContext(ctx, nvGPUInfoBin)
out, err := cmd.CombinedOutput()
if err != nil {
return 0, err
}
return uint64(vramSize), nil
sc := bufio.NewScanner(strings.NewReader(string(out)))
for sc.Scan() {
vram, found := strings.CutPrefix(sc.Text(), "GPU[0]: dedicated memory:")
if found {
vram = strings.TrimSpace(vram)
return strconv.ParseUint(vram, 10, 64)
}
}
return 0, errors.New("unexpected nv-gpu-info output format")
}

View File

@ -1,80 +0,0 @@
// +build windows
#include "nvapi.h"
typedef enum {
NVAPI_OK = 0
} NvAPI_Status;
typedef unsigned int NvU32;
typedef struct {
NvU32 version;
NvU32 dedicatedVideoMemory;
NvU32 availableDedicatedVideoMemory;
NvU32 systemVideoMemory;
NvU32 sharedSystemMemory;
} NV_DISPLAY_DRIVER_MEMORY_INFO;
typedef void* NvPhysicalGpuHandle;
#define NV_DISPLAY_DRIVER_MEMORY_INFO_VER 0x10028
size_t getVRAMSize() {
HMODULE handle;
NvAPI_Status (*NvAPI_Initialize)(void);
NvAPI_Status (*NvAPI_EnumPhysicalGPUs)(NvPhysicalGpuHandle* handles, NvU32* count);
NvAPI_Status (*NvAPI_GPU_GetMemoryInfo)(NvPhysicalGpuHandle handle, NV_DISPLAY_DRIVER_MEMORY_INFO* memInfo);
NvAPI_Status (*NvAPI_Unload)(void);
NvAPI_Status status;
NvPhysicalGpuHandle handles[64];
NvU32 count = 0;
NV_DISPLAY_DRIVER_MEMORY_INFO memInfo;
// Try to load nvapi64.dll first, then fallback to nvapi.dll
handle = LoadLibraryA("nvapi64.dll");
if (!handle) {
handle = LoadLibraryA("nvapi.dll");
if (!handle) {
return 0;
}
}
// Load required functions
NvAPI_Initialize = (NvAPI_Status(*)(void))GetProcAddress(handle, "NvAPI_Initialize");
NvAPI_EnumPhysicalGPUs = (NvAPI_Status(*)(NvPhysicalGpuHandle*, NvU32*))GetProcAddress(handle, "NvAPI_EnumPhysicalGPUs");
NvAPI_GPU_GetMemoryInfo = (NvAPI_Status(*)(NvPhysicalGpuHandle, NV_DISPLAY_DRIVER_MEMORY_INFO*))GetProcAddress(handle, "NvAPI_GPU_GetMemoryInfo");
NvAPI_Unload = (NvAPI_Status(*)(void))GetProcAddress(handle, "NvAPI_Unload");
if (!NvAPI_Initialize || !NvAPI_EnumPhysicalGPUs || !NvAPI_GPU_GetMemoryInfo || !NvAPI_Unload) {
FreeLibrary(handle);
return 0;
}
status = NvAPI_Initialize();
if (status != NVAPI_OK) {
FreeLibrary(handle);
return 0;
}
status = NvAPI_EnumPhysicalGPUs(handles, &count);
if (status != NVAPI_OK || count == 0) {
NvAPI_Unload();
FreeLibrary(handle);
return 0;
}
memInfo.version = NV_DISPLAY_DRIVER_MEMORY_INFO_VER;
status = NvAPI_GPU_GetMemoryInfo(handles[0], &memInfo);
if (status != NVAPI_OK) {
NvAPI_Unload();
FreeLibrary(handle);
return 0;
}
NvAPI_Unload();
FreeLibrary(handle);
// Return dedicated video memory in bytes (convert from KB)
return (size_t)memInfo.dedicatedVideoMemory * 1024;
}

View File

@ -1,6 +0,0 @@
// +build windows
#include <stddef.h>
#include <windows.h>
size_t getVRAMSize();