VRAM size getter for linux

Signed-off-by: Piotr Stankiewicz <piotr.stankiewicz@docker.com>
This commit is contained in:
Piotr Stankiewicz 2025-07-11 14:44:23 +02:00
parent f90e4703f5
commit d559e1b755
5 changed files with 96 additions and 2 deletions

View File

@ -27,7 +27,7 @@ COPY --link . .
# Build the Go binary (static build)
RUN --mount=type=cache,target=/go/pkg/mod \
--mount=type=cache,target=/root/.cache/go-build \
CGO_ENABLED=0 GOOS=linux go build -ldflags="-s -w" -o model-runner ./main.go
CGO_ENABLED=1 GOOS=linux go build -ldflags="-s -w" -o model-runner ./main.go
# --- Get llama.cpp binary ---
FROM docker/docker-model-backend-llamacpp:${LLAMA_SERVER_VERSION}-${LLAMA_SERVER_VARIANT} AS llama-server

View File

@ -134,7 +134,7 @@ func newLoader(
// Compute the amount of available memory.
vramSize, err := getVRAMSize() // FIXME(p1-0tr): only implemented on macOS for now
if err != nil {
return nil // FIXME(p1-0tr): should forward the error
log.Warnf("Could not read VRAM size: %s", err)
}
totalMemory := vramSize

View File

@ -0,0 +1,17 @@
package scheduling
/*
#cgo LDFLAGS: -ldl
#include "nvidia.h"
*/
import "C"
import "errors"
// getVRAMSize returns total system GPU memory in bytes
func getVRAMSize() (uint64, error) {
vramSize := C.getVRAMSize()
if vramSize == 0 {
return 0, errors.New("could not get nvidia VRAM size")
}
return uint64(vramSize), nil
}

View File

@ -0,0 +1,71 @@
// +build linux
#include "nvidia.h"
typedef enum {
NVML_SUCCESS = 0
} nvmlReturn_t;
typedef struct {
unsigned long long total;
unsigned long long free;
unsigned long long used;
} nvmlMemory_t;
typedef void* nvmlDevice_t;
size_t getVRAMSize() {
void* handle;
nvmlReturn_t (*nvmlInit)(void);
nvmlReturn_t (*nvmlShutdown)(void);
nvmlReturn_t (*nvmlDeviceGetHandleByIndex)(unsigned int index, nvmlDevice_t* device);
nvmlReturn_t (*nvmlDeviceGetMemoryInfo)(nvmlDevice_t device, nvmlMemory_t* memory);
nvmlReturn_t result;
nvmlDevice_t device;
nvmlMemory_t memory;
// Try to load libnvidia-ml.so.1 first, then fallback to libnvidia-ml.so
handle = dlopen("libnvidia-ml.so.1", RTLD_LAZY);
if (!handle) {
handle = dlopen("libnvidia-ml.so", RTLD_LAZY);
if (!handle) {
return 0;
}
}
// Load required functions
nvmlInit = dlsym(handle, "nvmlInit");
nvmlShutdown = dlsym(handle, "nvmlShutdown");
nvmlDeviceGetHandleByIndex = dlsym(handle, "nvmlDeviceGetHandleByIndex");
nvmlDeviceGetMemoryInfo = dlsym(handle, "nvmlDeviceGetMemoryInfo");
if (!nvmlInit || !nvmlShutdown || !nvmlDeviceGetHandleByIndex || !nvmlDeviceGetMemoryInfo) {
dlclose(handle);
return 0;
}
result = nvmlInit();
if (result != NVML_SUCCESS) {
dlclose(handle);
return 0;
}
result = nvmlDeviceGetHandleByIndex(0, &device);
if (result != NVML_SUCCESS) {
nvmlShutdown();
dlclose(handle);
return 0;
}
result = nvmlDeviceGetMemoryInfo(device, &memory);
if (result != NVML_SUCCESS) {
nvmlShutdown();
dlclose(handle);
return 0;
}
nvmlShutdown();
dlclose(handle);
return memory.total;
}

View File

@ -0,0 +1,6 @@
// +build linux
#include <stddef.h>
#include <dlfcn.h>
size_t getVRAMSize();