From 3d8c73c355da338b31658b802baed176ac3819a5 Mon Sep 17 00:00:00 2001 From: Jacob Howard Date: Mon, 28 Apr 2025 12:13:37 -0600 Subject: [PATCH] [AIE-151] native: support dynamic detection of OpenCL Signed-off-by: Jacob Howard --- .../backends/llamacpp/download_windows.go | 12 ++++++--- .../backends/llamacpp/gpuinfo_windows.go | 27 +++++++++++++++++-- 2 files changed, 33 insertions(+), 6 deletions(-) diff --git a/pkg/inference/backends/llamacpp/download_windows.go b/pkg/inference/backends/llamacpp/download_windows.go index 0dccce6..fd5cb9c 100644 --- a/pkg/inference/backends/llamacpp/download_windows.go +++ b/pkg/inference/backends/llamacpp/download_windows.go @@ -14,7 +14,7 @@ func (l *llamaCpp) ensureLatestLlamaCpp(ctx context.Context, log logging.Logger, llamaCppPath, vendoredServerStoragePath string, ) error { nvGPUInfoBin := filepath.Join(vendoredServerStoragePath, "com.docker.nv-gpu-info.exe") - var canUseCUDA11 bool + var canUseCUDA11, canUseOpenCL bool var err error ShouldUseGPUVariantLock.Lock() defer ShouldUseGPUVariantLock.Unlock() @@ -25,15 +25,19 @@ func (l *llamaCpp) ensureLatestLlamaCpp(ctx context.Context, log logging.Logger, l.status = fmt.Sprintf("failed to check CUDA 11 capability: %v", err) return fmt.Errorf("failed to check CUDA 11 capability: %w", err) } + } else if runtime.GOARCH == "arm64" { + canUseOpenCL, err = hasOpenCL() + if err != nil { + l.status = fmt.Sprintf("failed to check OpenCL capability: %v", err) + return fmt.Errorf("failed to check OpenCL capability: %w", err) + } } } desiredVersion := "latest" desiredVariant := "cpu" if canUseCUDA11 { desiredVariant = "cuda" - } - // TODO(p1-0tr): we should auto-detect if we can use opencl, but for now assume that we can - if runtime.GOARCH == "arm64" { + } else if canUseOpenCL { desiredVariant = "opencl" } l.status = fmt.Sprintf("looking for updates for %s variant", desiredVariant) diff --git a/pkg/inference/backends/llamacpp/gpuinfo_windows.go b/pkg/inference/backends/llamacpp/gpuinfo_windows.go index b19fc41..e5e392d 100644 --- a/pkg/inference/backends/llamacpp/gpuinfo_windows.go +++ b/pkg/inference/backends/llamacpp/gpuinfo_windows.go @@ -3,10 +3,12 @@ package llamacpp import ( "bufio" "context" + "errors" "fmt" "os/exec" "strconv" "strings" + "syscall" "github.com/jaypipes/ghw" ) @@ -56,6 +58,27 @@ func hasCUDA11CapableGPU(ctx context.Context, nvGPUInfoBin string) (bool, error) return false, nil } -func CanUseGPU(ctx context.Context, nvGPUInfoBin string) (bool, error) { - return hasCUDA11CapableGPU(ctx, nvGPUInfoBin) +func hasOpenCL() (bool, error) { + opencl, err := syscall.LoadLibrary("OpenCL.dll") + if err != nil { + if errors.Is(err, syscall.ERROR_MOD_NOT_FOUND) { + return false, nil + } + return false, fmt.Errorf("unable to load OpenCL DLL: %w", err) + } + // We could perform additional platform and device version checks here (if + // we scaffold out the relevant OpenCL API datatypes in Go), but since users + // can opt-out of GPU support, we can probably skip that and just let users + // disable it if things don't work. Alternatively, we could inspect the GPUs + // found by the ghw package, if it supports (e.g.) Adreno GPUs. + syscall.FreeLibrary(opencl) + return true, nil +} + +func CanUseGPU(ctx context.Context, nvGPUInfoBin string) (bool, error) { + haveCUDA11GPU, err := hasCUDA11CapableGPU(ctx, nvGPUInfoBin) + if haveCUDA11GPU || err != nil { + return haveCUDA11GPU, err + } + return hasOpenCL() }