Force a re-installation if EnableInferenceGPUVariant has changed

Signed-off-by: Dorin Geman <dorin.geman@docker.com>
This commit is contained in:
Dorin Geman 2025-04-16 23:46:20 +03:00 committed by Piotr Stankiewicz
parent 5d56ba5ad3
commit a3fb86a0bb
3 changed files with 13 additions and 3 deletions

View File

@ -23,6 +23,8 @@ const (
hubRepo = "docker-model-backend-llamacpp"
)
var ShouldUseGPUVariant bool
func downloadLatestLlamaCpp(ctx context.Context, log logging.Logger, httpClient *http.Client,
llamaCppPath, vendoredServerStoragePath, desiredVersion, desiredVariant string,
) error {

View File

@ -13,9 +13,13 @@ func ensureLatestLlamaCpp(ctx context.Context, log logging.Logger, httpClient *h
llamaCppPath, vendoredServerStoragePath string,
) error {
nvGPUInfoBin := filepath.Join(vendoredServerStoragePath, "com.docker.nv-gpu-info.exe")
canUseCUDA11, err := hasCUDA11CapableGPU(ctx, nvGPUInfoBin)
if err != nil {
return fmt.Errorf("failed to check CUDA 11 capability: %w", err)
var canUseCUDA11 bool
var err error
if ShouldUseGPUVariant {
canUseCUDA11, err = hasCUDA11CapableGPU(ctx, nvGPUInfoBin)
if err != nil {
return fmt.Errorf("failed to check CUDA 11 capability: %w", err)
}
}
desiredVersion := "latest"
desiredVariant := "cpu"

View File

@ -196,6 +196,10 @@ func (s *Scheduler) handleOpenAIInference(w http.ResponseWriter, r *http.Request
runner.ServeHTTP(w, upstreamRequest)
}
func (s *Scheduler) ResetInstaller(httpClient *http.Client) {
s.installer = newInstaller(s.log, s.backends, httpClient)
}
// ServeHTTP implements net/http.Handler.ServeHTTP.
func (s *Scheduler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
s.router.ServeHTTP(w, r)