VRAM size getter for linux
Signed-off-by: Piotr Stankiewicz <piotr.stankiewicz@docker.com>
This commit is contained in:
parent
f90e4703f5
commit
d559e1b755
|
|
@ -27,7 +27,7 @@ COPY --link . .
|
|||
# Build the Go binary (static build)
|
||||
RUN --mount=type=cache,target=/go/pkg/mod \
|
||||
--mount=type=cache,target=/root/.cache/go-build \
|
||||
CGO_ENABLED=0 GOOS=linux go build -ldflags="-s -w" -o model-runner ./main.go
|
||||
CGO_ENABLED=1 GOOS=linux go build -ldflags="-s -w" -o model-runner ./main.go
|
||||
|
||||
# --- Get llama.cpp binary ---
|
||||
FROM docker/docker-model-backend-llamacpp:${LLAMA_SERVER_VERSION}-${LLAMA_SERVER_VARIANT} AS llama-server
|
||||
|
|
|
|||
|
|
@ -134,7 +134,7 @@ func newLoader(
|
|||
// Compute the amount of available memory.
|
||||
vramSize, err := getVRAMSize() // FIXME(p1-0tr): only implemented on macOS for now
|
||||
if err != nil {
|
||||
return nil // FIXME(p1-0tr): should forward the error
|
||||
log.Warnf("Could not read VRAM size: %s", err)
|
||||
}
|
||||
totalMemory := vramSize
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,17 @@
|
|||
package scheduling
|
||||
|
||||
/*
|
||||
#cgo LDFLAGS: -ldl
|
||||
#include "nvidia.h"
|
||||
*/
|
||||
import "C"
|
||||
import "errors"
|
||||
|
||||
// getVRAMSize returns total system GPU memory in bytes
|
||||
func getVRAMSize() (uint64, error) {
|
||||
vramSize := C.getVRAMSize()
|
||||
if vramSize == 0 {
|
||||
return 0, errors.New("could not get nvidia VRAM size")
|
||||
}
|
||||
return uint64(vramSize), nil
|
||||
}
|
||||
|
|
@ -0,0 +1,71 @@
|
|||
// +build linux
|
||||
|
||||
#include "nvidia.h"
|
||||
|
||||
typedef enum {
|
||||
NVML_SUCCESS = 0
|
||||
} nvmlReturn_t;
|
||||
|
||||
typedef struct {
|
||||
unsigned long long total;
|
||||
unsigned long long free;
|
||||
unsigned long long used;
|
||||
} nvmlMemory_t;
|
||||
|
||||
typedef void* nvmlDevice_t;
|
||||
|
||||
size_t getVRAMSize() {
|
||||
void* handle;
|
||||
nvmlReturn_t (*nvmlInit)(void);
|
||||
nvmlReturn_t (*nvmlShutdown)(void);
|
||||
nvmlReturn_t (*nvmlDeviceGetHandleByIndex)(unsigned int index, nvmlDevice_t* device);
|
||||
nvmlReturn_t (*nvmlDeviceGetMemoryInfo)(nvmlDevice_t device, nvmlMemory_t* memory);
|
||||
|
||||
nvmlReturn_t result;
|
||||
nvmlDevice_t device;
|
||||
nvmlMemory_t memory;
|
||||
|
||||
// Try to load libnvidia-ml.so.1 first, then fallback to libnvidia-ml.so
|
||||
handle = dlopen("libnvidia-ml.so.1", RTLD_LAZY);
|
||||
if (!handle) {
|
||||
handle = dlopen("libnvidia-ml.so", RTLD_LAZY);
|
||||
if (!handle) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
// Load required functions
|
||||
nvmlInit = dlsym(handle, "nvmlInit");
|
||||
nvmlShutdown = dlsym(handle, "nvmlShutdown");
|
||||
nvmlDeviceGetHandleByIndex = dlsym(handle, "nvmlDeviceGetHandleByIndex");
|
||||
nvmlDeviceGetMemoryInfo = dlsym(handle, "nvmlDeviceGetMemoryInfo");
|
||||
|
||||
if (!nvmlInit || !nvmlShutdown || !nvmlDeviceGetHandleByIndex || !nvmlDeviceGetMemoryInfo) {
|
||||
dlclose(handle);
|
||||
return 0;
|
||||
}
|
||||
|
||||
result = nvmlInit();
|
||||
if (result != NVML_SUCCESS) {
|
||||
dlclose(handle);
|
||||
return 0;
|
||||
}
|
||||
|
||||
result = nvmlDeviceGetHandleByIndex(0, &device);
|
||||
if (result != NVML_SUCCESS) {
|
||||
nvmlShutdown();
|
||||
dlclose(handle);
|
||||
return 0;
|
||||
}
|
||||
|
||||
result = nvmlDeviceGetMemoryInfo(device, &memory);
|
||||
if (result != NVML_SUCCESS) {
|
||||
nvmlShutdown();
|
||||
dlclose(handle);
|
||||
return 0;
|
||||
}
|
||||
|
||||
nvmlShutdown();
|
||||
dlclose(handle);
|
||||
return memory.total;
|
||||
}
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
// +build linux
|
||||
|
||||
#include <stddef.h>
|
||||
#include <dlfcn.h>
|
||||
|
||||
size_t getVRAMSize();
|
||||
Loading…
Reference in New Issue