Force a re-installation if EnableInferenceGPUVariant has changed

Signed-off-by: Dorin Geman <dorin.geman@docker.com>
This commit is contained in:
Dorin Geman 2025-04-16 23:46:20 +03:00 committed by Piotr Stankiewicz
parent 5d56ba5ad3
commit a3fb86a0bb
3 changed files with 13 additions and 3 deletions

View File

@ -23,6 +23,8 @@ const (
hubRepo = "docker-model-backend-llamacpp" hubRepo = "docker-model-backend-llamacpp"
) )
var ShouldUseGPUVariant bool
func downloadLatestLlamaCpp(ctx context.Context, log logging.Logger, httpClient *http.Client, func downloadLatestLlamaCpp(ctx context.Context, log logging.Logger, httpClient *http.Client,
llamaCppPath, vendoredServerStoragePath, desiredVersion, desiredVariant string, llamaCppPath, vendoredServerStoragePath, desiredVersion, desiredVariant string,
) error { ) error {

View File

@ -13,10 +13,14 @@ func ensureLatestLlamaCpp(ctx context.Context, log logging.Logger, httpClient *h
llamaCppPath, vendoredServerStoragePath string, llamaCppPath, vendoredServerStoragePath string,
) error { ) error {
nvGPUInfoBin := filepath.Join(vendoredServerStoragePath, "com.docker.nv-gpu-info.exe") nvGPUInfoBin := filepath.Join(vendoredServerStoragePath, "com.docker.nv-gpu-info.exe")
canUseCUDA11, err := hasCUDA11CapableGPU(ctx, nvGPUInfoBin) var canUseCUDA11 bool
var err error
if ShouldUseGPUVariant {
canUseCUDA11, err = hasCUDA11CapableGPU(ctx, nvGPUInfoBin)
if err != nil { if err != nil {
return fmt.Errorf("failed to check CUDA 11 capability: %w", err) return fmt.Errorf("failed to check CUDA 11 capability: %w", err)
} }
}
desiredVersion := "latest" desiredVersion := "latest"
desiredVariant := "cpu" desiredVariant := "cpu"
if canUseCUDA11 { if canUseCUDA11 {

View File

@ -196,6 +196,10 @@ func (s *Scheduler) handleOpenAIInference(w http.ResponseWriter, r *http.Request
runner.ServeHTTP(w, upstreamRequest) runner.ServeHTTP(w, upstreamRequest)
} }
func (s *Scheduler) ResetInstaller(httpClient *http.Client) {
s.installer = newInstaller(s.log, s.backends, httpClient)
}
// ServeHTTP implements net/http.Handler.ServeHTTP. // ServeHTTP implements net/http.Handler.ServeHTTP.
func (s *Scheduler) ServeHTTP(w http.ResponseWriter, r *http.Request) { func (s *Scheduler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
s.router.ServeHTTP(w, r) s.router.ServeHTTP(w, r)