inference: Support disabling pre-pull memory checks

Signed-off-by: Piotr Stankiewicz <piotr.stankiewicz@docker.com>
This commit is contained in:
Piotr Stankiewicz 2025-08-19 16:20:04 +02:00 committed by Piotr
parent 15e31feb30
commit 64c85dcd83
2 changed files with 16 additions and 10 deletions

View File

@ -14,6 +14,9 @@ import (
type ModelCreateRequest struct {
// From is the name of the model to pull.
From string `json:"from"`
// IgnoreRuntimeMemoryCheck indicates whether the server should check if it has sufficient
// memory to run the given model (assuming default configuration).
IgnoreRuntimeMemoryCheck bool `json:"ignore-runtime-memory-check,omitempty"`
}
// ToOpenAIList converts the model list to its OpenAI API representation. This function never

View File

@ -168,16 +168,19 @@ func (m *Manager) handleCreateModel(w http.ResponseWriter, r *http.Request) {
// Pull the model. In the future, we may support additional operations here
// besides pulling (such as model building).
proceed, err := m.memoryEstimator.HaveSufficientMemoryForModel(r.Context(), request.From, nil)
if err != nil {
m.log.Warnf("Failed to calculate memory required for model %q: %s", request.From, err)
// Prefer staying functional in case of unexpected estimation errors.
proceed = true
}
if !proceed {
m.log.Warnf("Runtime memory requirement for model %q exceeds total system memory", request.From)
http.Error(w, "Runtime memory requirement for model exceeds total system memory", http.StatusInsufficientStorage)
return
if !request.IgnoreRuntimeMemoryCheck {
m.log.Infof("Will estimate memory required for %q", request.From)
proceed, err := m.memoryEstimator.HaveSufficientMemoryForModel(r.Context(), request.From, nil)
if err != nil {
m.log.Warnf("Failed to calculate memory required for model %q: %s", request.From, err)
// Prefer staying functional in case of unexpected estimation errors.
proceed = true
}
if !proceed {
m.log.Warnf("Runtime memory requirement for model %q exceeds total system memory", request.From)
http.Error(w, "Runtime memory requirement for model exceeds total system memory", http.StatusInsufficientStorage)
return
}
}
if err := m.PullModel(request.From, r, w); err != nil {
if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) {