Add /engines/ps

Signed-off-by: Dorin Geman <dorin.geman@docker.com>
This commit is contained in:
Dorin Geman 2025-05-20 16:00:27 +03:00
parent e6fd394300
commit 13c093ca1e
No known key found for this signature in database
2 changed files with 55 additions and 0 deletions

View File

@ -2,6 +2,7 @@ package scheduling
import (
"strings"
"time"
"github.com/docker/model-runner/pkg/inference"
)
@ -42,3 +43,15 @@ type OpenAIInferenceRequest struct {
// Model is the requested model name.
Model string `json:"model"`
}
// BackendStatus represents information about a running backend
type BackendStatus struct {
// BackendName is the name of the backend
BackendName string `json:"backend_name"`
// ModelName is the name of the model loaded in the backend
ModelName string `json:"model_name"`
// Mode is the mode the backend is operating in
Mode string `json:"mode"`
// LastUsed represents when this (backend, model, mode) tuple was last used
LastUsed time.Time `json:"last_used,omitempty"`
}

View File

@ -8,6 +8,7 @@ import (
"fmt"
"io"
"net/http"
"time"
"github.com/docker/model-distribution/distribution"
"github.com/docker/model-runner/pkg/inference"
@ -81,6 +82,7 @@ func (s *Scheduler) routeHandlers() map[string]http.HandlerFunc {
m[route] = s.handleOpenAIInference
}
m["GET "+inference.InferencePrefix+"/status"] = s.GetBackendStatus
m["GET "+inference.InferencePrefix+"/ps"] = s.GetRunningBackends
return m
}
@ -224,6 +226,46 @@ func (s *Scheduler) ResetInstaller(httpClient *http.Client) {
s.installer = newInstaller(s.log, s.backends, httpClient)
}
// GetRunningBackends returns information about all running backends
func (s *Scheduler) GetRunningBackends(w http.ResponseWriter, r *http.Request) {
runningBackends := s.getLoaderStatus()
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(runningBackends); err != nil {
http.Error(w, fmt.Sprintf("Failed to encode response: %v", err), http.StatusInternalServerError)
return
}
}
// getLoaderStatus returns information about all running backends managed by the loader
func (s *Scheduler) getLoaderStatus() []BackendStatus {
if !s.loader.lock(context.Background()) {
return []BackendStatus{}
}
defer s.loader.unlock()
result := make([]BackendStatus, 0, len(s.loader.runners))
for key, slot := range s.loader.runners {
if s.loader.slots[slot] != nil {
status := BackendStatus{
BackendName: key.backend,
ModelName: key.model,
Mode: key.mode.String(),
LastUsed: time.Time{},
}
if s.loader.references[slot] == 0 {
status.LastUsed = s.loader.timestamps[slot]
}
result = append(result, status)
}
}
return result
}
// ServeHTTP implements net/http.Handler.ServeHTTP.
func (s *Scheduler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
s.router.ServeHTTP(w, r)