inference: Support disabling pre-pull memory checks
Signed-off-by: Piotr Stankiewicz <piotr.stankiewicz@docker.com>
This commit is contained in:
parent
15e31feb30
commit
64c85dcd83
|
|
@ -14,6 +14,9 @@ import (
|
||||||
type ModelCreateRequest struct {
|
type ModelCreateRequest struct {
|
||||||
// From is the name of the model to pull.
|
// From is the name of the model to pull.
|
||||||
From string `json:"from"`
|
From string `json:"from"`
|
||||||
|
// IgnoreRuntimeMemoryCheck indicates whether the server should check if it has sufficient
|
||||||
|
// memory to run the given model (assuming default configuration).
|
||||||
|
IgnoreRuntimeMemoryCheck bool `json:"ignore-runtime-memory-check,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// ToOpenAIList converts the model list to its OpenAI API representation. This function never
|
// ToOpenAIList converts the model list to its OpenAI API representation. This function never
|
||||||
|
|
|
||||||
|
|
@ -168,16 +168,19 @@ func (m *Manager) handleCreateModel(w http.ResponseWriter, r *http.Request) {
|
||||||
|
|
||||||
// Pull the model. In the future, we may support additional operations here
|
// Pull the model. In the future, we may support additional operations here
|
||||||
// besides pulling (such as model building).
|
// besides pulling (such as model building).
|
||||||
proceed, err := m.memoryEstimator.HaveSufficientMemoryForModel(r.Context(), request.From, nil)
|
if !request.IgnoreRuntimeMemoryCheck {
|
||||||
if err != nil {
|
m.log.Infof("Will estimate memory required for %q", request.From)
|
||||||
m.log.Warnf("Failed to calculate memory required for model %q: %s", request.From, err)
|
proceed, err := m.memoryEstimator.HaveSufficientMemoryForModel(r.Context(), request.From, nil)
|
||||||
// Prefer staying functional in case of unexpected estimation errors.
|
if err != nil {
|
||||||
proceed = true
|
m.log.Warnf("Failed to calculate memory required for model %q: %s", request.From, err)
|
||||||
}
|
// Prefer staying functional in case of unexpected estimation errors.
|
||||||
if !proceed {
|
proceed = true
|
||||||
m.log.Warnf("Runtime memory requirement for model %q exceeds total system memory", request.From)
|
}
|
||||||
http.Error(w, "Runtime memory requirement for model exceeds total system memory", http.StatusInsufficientStorage)
|
if !proceed {
|
||||||
return
|
m.log.Warnf("Runtime memory requirement for model %q exceeds total system memory", request.From)
|
||||||
|
http.Error(w, "Runtime memory requirement for model exceeds total system memory", http.StatusInsufficientStorage)
|
||||||
|
return
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if err := m.PullModel(request.From, r, w); err != nil {
|
if err := m.PullModel(request.From, r, w); err != nil {
|
||||||
if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) {
|
if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue