Force a re-installation if EnableInferenceGPUVariant has changed
Signed-off-by: Dorin Geman <dorin.geman@docker.com>
This commit is contained in:
parent
5d56ba5ad3
commit
a3fb86a0bb
|
|
@ -23,6 +23,8 @@ const (
|
||||||
hubRepo = "docker-model-backend-llamacpp"
|
hubRepo = "docker-model-backend-llamacpp"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var ShouldUseGPUVariant bool
|
||||||
|
|
||||||
func downloadLatestLlamaCpp(ctx context.Context, log logging.Logger, httpClient *http.Client,
|
func downloadLatestLlamaCpp(ctx context.Context, log logging.Logger, httpClient *http.Client,
|
||||||
llamaCppPath, vendoredServerStoragePath, desiredVersion, desiredVariant string,
|
llamaCppPath, vendoredServerStoragePath, desiredVersion, desiredVariant string,
|
||||||
) error {
|
) error {
|
||||||
|
|
|
||||||
|
|
@ -13,9 +13,13 @@ func ensureLatestLlamaCpp(ctx context.Context, log logging.Logger, httpClient *h
|
||||||
llamaCppPath, vendoredServerStoragePath string,
|
llamaCppPath, vendoredServerStoragePath string,
|
||||||
) error {
|
) error {
|
||||||
nvGPUInfoBin := filepath.Join(vendoredServerStoragePath, "com.docker.nv-gpu-info.exe")
|
nvGPUInfoBin := filepath.Join(vendoredServerStoragePath, "com.docker.nv-gpu-info.exe")
|
||||||
canUseCUDA11, err := hasCUDA11CapableGPU(ctx, nvGPUInfoBin)
|
var canUseCUDA11 bool
|
||||||
if err != nil {
|
var err error
|
||||||
return fmt.Errorf("failed to check CUDA 11 capability: %w", err)
|
if ShouldUseGPUVariant {
|
||||||
|
canUseCUDA11, err = hasCUDA11CapableGPU(ctx, nvGPUInfoBin)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to check CUDA 11 capability: %w", err)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
desiredVersion := "latest"
|
desiredVersion := "latest"
|
||||||
desiredVariant := "cpu"
|
desiredVariant := "cpu"
|
||||||
|
|
|
||||||
|
|
@ -196,6 +196,10 @@ func (s *Scheduler) handleOpenAIInference(w http.ResponseWriter, r *http.Request
|
||||||
runner.ServeHTTP(w, upstreamRequest)
|
runner.ServeHTTP(w, upstreamRequest)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *Scheduler) ResetInstaller(httpClient *http.Client) {
|
||||||
|
s.installer = newInstaller(s.log, s.backends, httpClient)
|
||||||
|
}
|
||||||
|
|
||||||
// ServeHTTP implements net/http.Handler.ServeHTTP.
|
// ServeHTTP implements net/http.Handler.ServeHTTP.
|
||||||
func (s *Scheduler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
|
func (s *Scheduler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
|
||||||
s.router.ServeHTTP(w, r)
|
s.router.ServeHTTP(w, r)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue