Use nv-gpu-info on Windows to get VRAM size
Signed-off-by: Piotr Stankiewicz <piotr.stankiewicz@docker.com>
This commit is contained in:
parent
6e096b2caa
commit
ea3bb71830
2
main.go
2
main.go
|
|
@ -90,7 +90,7 @@ func main() {
|
|||
log.Fatalf("unable to initialize %s backend: %v", llamacpp.Name, err)
|
||||
}
|
||||
|
||||
gpuInfo := gpuinfo.New()
|
||||
gpuInfo := gpuinfo.New(llamaServerPath)
|
||||
|
||||
scheduler := scheduling.NewScheduler(
|
||||
log,
|
||||
|
|
|
|||
|
|
@ -1,11 +1,17 @@
|
|||
package gpuinfo
|
||||
|
||||
type GPUInfo struct{}
|
||||
type GPUInfo struct {
|
||||
// modelRuntimeInstallPath is the location where DMR installed it's llama-server
|
||||
// and accompanying tools
|
||||
modelRuntimeInstallPath string
|
||||
}
|
||||
|
||||
func New() *GPUInfo {
|
||||
return &GPUInfo{}
|
||||
func New(modelRuntimeInstallPath string) *GPUInfo {
|
||||
return &GPUInfo{
|
||||
modelRuntimeInstallPath: modelRuntimeInstallPath,
|
||||
}
|
||||
}
|
||||
|
||||
func (g *GPUInfo) GetVRAMSize() (uint64, error) {
|
||||
return getVRAMSize()
|
||||
return getVRAMSize(g.modelRuntimeInstallPath)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ import "C"
|
|||
import "errors"
|
||||
|
||||
// getVRAMSize returns total system GPU memory in bytes
|
||||
func getVRAMSize() (uint64, error) {
|
||||
func getVRAMSize(_ string) (uint64, error) {
|
||||
vramSize := C.getVRAMSize()
|
||||
if vramSize == 0 {
|
||||
return 0, errors.New("could not get metal VRAM size")
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ import "C"
|
|||
import "errors"
|
||||
|
||||
// getVRAMSize returns total system GPU memory in bytes
|
||||
func getVRAMSize() (uint64, error) {
|
||||
func getVRAMSize(_ string) (uint64, error) {
|
||||
vramSize := C.getVRAMSize()
|
||||
if vramSize == 0 {
|
||||
return 0, errors.New("could not get nvidia VRAM size")
|
||||
|
|
|
|||
|
|
@ -1,16 +1,31 @@
|
|||
package gpuinfo
|
||||
|
||||
/*
|
||||
#include "nvapi.h"
|
||||
*/
|
||||
import "C"
|
||||
import "errors"
|
||||
import (
|
||||
"bufio"
|
||||
"context"
|
||||
"errors"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// getVRAMSize returns total system GPU memory in bytes
|
||||
func getVRAMSize() (uint64, error) {
|
||||
vramSize := C.getVRAMSize()
|
||||
if vramSize == 0 {
|
||||
return 0, errors.New("could not get nvapi VRAM size")
|
||||
func getVRAMSize(ctx context.Context, modelRuntimeInstallPath string) (uint64, error) {
|
||||
nvGPUInfoBin := filepath.Join(modelRuntimeInstallPath, "com.docker.nv-gpu-info.exe")
|
||||
|
||||
cmd := exec.CommandContext(ctx, nvGPUInfoBin)
|
||||
out, err := cmd.CombinedOutput()
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return uint64(vramSize), nil
|
||||
sc := bufio.NewScanner(strings.NewReader(string(out)))
|
||||
for sc.Scan() {
|
||||
vram, found := strings.CutPrefix(sc.Text(), "GPU[0]: dedicated memory:")
|
||||
if found {
|
||||
vram = strings.TrimSpace(vram)
|
||||
return strconv.ParseUint(vram, 10, 64)
|
||||
}
|
||||
}
|
||||
return 0, errors.New("unexpected nv-gpu-info output format")
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,80 +0,0 @@
|
|||
// +build windows
|
||||
|
||||
#include "nvapi.h"
|
||||
|
||||
typedef enum {
|
||||
NVAPI_OK = 0
|
||||
} NvAPI_Status;
|
||||
|
||||
typedef unsigned int NvU32;
|
||||
typedef struct {
|
||||
NvU32 version;
|
||||
NvU32 dedicatedVideoMemory;
|
||||
NvU32 availableDedicatedVideoMemory;
|
||||
NvU32 systemVideoMemory;
|
||||
NvU32 sharedSystemMemory;
|
||||
} NV_DISPLAY_DRIVER_MEMORY_INFO;
|
||||
|
||||
typedef void* NvPhysicalGpuHandle;
|
||||
|
||||
#define NV_DISPLAY_DRIVER_MEMORY_INFO_VER 0x10028
|
||||
|
||||
size_t getVRAMSize() {
|
||||
HMODULE handle;
|
||||
NvAPI_Status (*NvAPI_Initialize)(void);
|
||||
NvAPI_Status (*NvAPI_EnumPhysicalGPUs)(NvPhysicalGpuHandle* handles, NvU32* count);
|
||||
NvAPI_Status (*NvAPI_GPU_GetMemoryInfo)(NvPhysicalGpuHandle handle, NV_DISPLAY_DRIVER_MEMORY_INFO* memInfo);
|
||||
NvAPI_Status (*NvAPI_Unload)(void);
|
||||
|
||||
NvAPI_Status status;
|
||||
NvPhysicalGpuHandle handles[64];
|
||||
NvU32 count = 0;
|
||||
NV_DISPLAY_DRIVER_MEMORY_INFO memInfo;
|
||||
|
||||
// Try to load nvapi64.dll first, then fallback to nvapi.dll
|
||||
handle = LoadLibraryA("nvapi64.dll");
|
||||
if (!handle) {
|
||||
handle = LoadLibraryA("nvapi.dll");
|
||||
if (!handle) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
// Load required functions
|
||||
NvAPI_Initialize = (NvAPI_Status(*)(void))GetProcAddress(handle, "NvAPI_Initialize");
|
||||
NvAPI_EnumPhysicalGPUs = (NvAPI_Status(*)(NvPhysicalGpuHandle*, NvU32*))GetProcAddress(handle, "NvAPI_EnumPhysicalGPUs");
|
||||
NvAPI_GPU_GetMemoryInfo = (NvAPI_Status(*)(NvPhysicalGpuHandle, NV_DISPLAY_DRIVER_MEMORY_INFO*))GetProcAddress(handle, "NvAPI_GPU_GetMemoryInfo");
|
||||
NvAPI_Unload = (NvAPI_Status(*)(void))GetProcAddress(handle, "NvAPI_Unload");
|
||||
|
||||
if (!NvAPI_Initialize || !NvAPI_EnumPhysicalGPUs || !NvAPI_GPU_GetMemoryInfo || !NvAPI_Unload) {
|
||||
FreeLibrary(handle);
|
||||
return 0;
|
||||
}
|
||||
|
||||
status = NvAPI_Initialize();
|
||||
if (status != NVAPI_OK) {
|
||||
FreeLibrary(handle);
|
||||
return 0;
|
||||
}
|
||||
|
||||
status = NvAPI_EnumPhysicalGPUs(handles, &count);
|
||||
if (status != NVAPI_OK || count == 0) {
|
||||
NvAPI_Unload();
|
||||
FreeLibrary(handle);
|
||||
return 0;
|
||||
}
|
||||
|
||||
memInfo.version = NV_DISPLAY_DRIVER_MEMORY_INFO_VER;
|
||||
status = NvAPI_GPU_GetMemoryInfo(handles[0], &memInfo);
|
||||
if (status != NVAPI_OK) {
|
||||
NvAPI_Unload();
|
||||
FreeLibrary(handle);
|
||||
return 0;
|
||||
}
|
||||
|
||||
NvAPI_Unload();
|
||||
FreeLibrary(handle);
|
||||
|
||||
// Return dedicated video memory in bytes (convert from KB)
|
||||
return (size_t)memInfo.dedicatedVideoMemory * 1024;
|
||||
}
|
||||
|
|
@ -1,6 +0,0 @@
|
|||
// +build windows
|
||||
|
||||
#include <stddef.h>
|
||||
#include <windows.h>
|
||||
|
||||
size_t getVRAMSize();
|
||||
Loading…
Reference in New Issue