Force a re-installation if EnableInferenceGPUVariant has changed
Signed-off-by: Dorin Geman <dorin.geman@docker.com>
This commit is contained in:
parent
5d56ba5ad3
commit
a3fb86a0bb
|
|
@ -23,6 +23,8 @@ const (
|
|||
hubRepo = "docker-model-backend-llamacpp"
|
||||
)
|
||||
|
||||
var ShouldUseGPUVariant bool
|
||||
|
||||
func downloadLatestLlamaCpp(ctx context.Context, log logging.Logger, httpClient *http.Client,
|
||||
llamaCppPath, vendoredServerStoragePath, desiredVersion, desiredVariant string,
|
||||
) error {
|
||||
|
|
|
|||
|
|
@ -13,9 +13,13 @@ func ensureLatestLlamaCpp(ctx context.Context, log logging.Logger, httpClient *h
|
|||
llamaCppPath, vendoredServerStoragePath string,
|
||||
) error {
|
||||
nvGPUInfoBin := filepath.Join(vendoredServerStoragePath, "com.docker.nv-gpu-info.exe")
|
||||
canUseCUDA11, err := hasCUDA11CapableGPU(ctx, nvGPUInfoBin)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to check CUDA 11 capability: %w", err)
|
||||
var canUseCUDA11 bool
|
||||
var err error
|
||||
if ShouldUseGPUVariant {
|
||||
canUseCUDA11, err = hasCUDA11CapableGPU(ctx, nvGPUInfoBin)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to check CUDA 11 capability: %w", err)
|
||||
}
|
||||
}
|
||||
desiredVersion := "latest"
|
||||
desiredVariant := "cpu"
|
||||
|
|
|
|||
|
|
@ -196,6 +196,10 @@ func (s *Scheduler) handleOpenAIInference(w http.ResponseWriter, r *http.Request
|
|||
runner.ServeHTTP(w, upstreamRequest)
|
||||
}
|
||||
|
||||
func (s *Scheduler) ResetInstaller(httpClient *http.Client) {
|
||||
s.installer = newInstaller(s.log, s.backends, httpClient)
|
||||
}
|
||||
|
||||
// ServeHTTP implements net/http.Handler.ServeHTTP.
|
||||
func (s *Scheduler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
|
||||
s.router.ServeHTTP(w, r)
|
||||
|
|
|
|||
Loading…
Reference in New Issue