From ml.docker.internal to model-runner.docker.internal

This commit is contained in:
Ignasi 2025-03-10 16:50:51 +01:00 committed by Jacob Howard
parent 55843c8685
commit 7878bc7c69
No known key found for this signature in database
GPG Key ID: 3E8B8F7FEB46FC66
3 changed files with 27 additions and 27 deletions

View File

@ -1,6 +1,6 @@
package inference
// ExperimentalEndpointsPrefix is used to prefix all /ml routes on the Docker
// ExperimentalEndpointsPrefix is used to prefix all /engines routes on the Docker
// socket while they are still in their experimental stage. This prefix doesn't
// apply to endpoints on ml.docker.internal.
// apply to endpoints on model-runner.docker.internal.
const ExperimentalEndpointsPrefix = "/exp/vDD4.40"

View File

@ -59,14 +59,14 @@ func NewManager(log logger.ComponentLogger, httpClient *http.Client) *Manager {
m.router.HandleFunc("/", func(w http.ResponseWriter, _ *http.Request) {
http.Error(w, "not found", http.StatusNotFound)
})
m.router.HandleFunc("POST /ml/models/create", m.handleCreateModel)
m.router.HandleFunc("GET /ml/models/json", m.handleGetModels)
m.router.HandleFunc("GET /ml/models/{namespace}/{name}/json", m.handleGetModel)
m.router.HandleFunc("DELETE /ml/models/{namespace}/{name}", m.handleDeleteModel)
m.router.HandleFunc("GET /ml/{backend}/v1/models", m.handleOpenAIGetModels)
m.router.HandleFunc("GET /ml/{backend}/v1/models/{namespace}/{name}", m.handleOpenAIGetModel)
m.router.HandleFunc("GET /ml/v1/models", m.handleOpenAIGetModels)
m.router.HandleFunc("GET /ml/v1/models/{namespace}/{name}", m.handleOpenAIGetModel)
m.router.HandleFunc("POST /engines/models/create", m.handleCreateModel)
m.router.HandleFunc("GET /engines/models/json", m.handleGetModels)
m.router.HandleFunc("GET /engines/models/{namespace}/{name}/json", m.handleGetModel)
m.router.HandleFunc("DELETE /engines/models/{namespace}/{name}", m.handleDeleteModel)
m.router.HandleFunc("GET /engines/{backend}/v1/models", m.handleOpenAIGetModels)
m.router.HandleFunc("GET /engines/{backend}/v1/models/{namespace}/{name}", m.handleOpenAIGetModel)
m.router.HandleFunc("GET /engines/v1/models", m.handleOpenAIGetModels)
m.router.HandleFunc("GET /engines/v1/models/{namespace}/{name}", m.handleOpenAIGetModel)
// Populate the pull concurrency semaphore.
for i := 0; i < maximumConcurrentModelPulls; i++ {
@ -77,7 +77,7 @@ func NewManager(log logger.ComponentLogger, httpClient *http.Client) *Manager {
return m
}
// handleCreateModel handles POST /ml/models/create requests.
// handleCreateModel handles POST /engines/models/create requests.
func (m *Manager) handleCreateModel(w http.ResponseWriter, r *http.Request) {
if m.distributionClient == nil {
http.Error(w, "model distribution service unavailable", http.StatusServiceUnavailable)
@ -99,7 +99,7 @@ func (m *Manager) handleCreateModel(w http.ResponseWriter, r *http.Request) {
}
}
// handleGetModels handles GET /ml/models/json requests.
// handleGetModels handles GET /engines/models/json requests.
func (m *Manager) handleGetModels(w http.ResponseWriter, r *http.Request) {
if m.distributionClient == nil {
http.Error(w, "model distribution service unavailable", http.StatusServiceUnavailable)
@ -120,7 +120,7 @@ func (m *Manager) handleGetModels(w http.ResponseWriter, r *http.Request) {
}
}
// handleGetModel handles GET /ml/models/{namespace}/{name}/json requests.
// handleGetModel handles GET /engines/models/{namespace}/{name}/json requests.
func (m *Manager) handleGetModel(w http.ResponseWriter, r *http.Request) {
if m.distributionClient == nil {
http.Error(w, "model distribution service unavailable", http.StatusServiceUnavailable)
@ -145,7 +145,7 @@ func (m *Manager) handleGetModel(w http.ResponseWriter, r *http.Request) {
}
}
// handleDeleteModel handles DELETE /ml/models/{namespace}/{name} requests.
// handleDeleteModel handles DELETE /engines/models/{namespace}/{name} requests.
func (m *Manager) handleDeleteModel(w http.ResponseWriter, r *http.Request) {
if m.distributionClient == nil {
http.Error(w, "model distribution service unavailable", http.StatusServiceUnavailable)
@ -169,8 +169,8 @@ func (m *Manager) handleDeleteModel(w http.ResponseWriter, r *http.Request) {
}
}
// handleOpenAIGetModels handles GET /ml/{backend}/v1/models and
// GET /ml/v1/models requests.
// handleOpenAIGetModels handles GET /engines/{backend}/v1/models and
// GET /engines/v1/models requests.
func (m *Manager) handleOpenAIGetModels(w http.ResponseWriter, r *http.Request) {
if m.distributionClient == nil {
http.Error(w, "model distribution service unavailable", http.StatusServiceUnavailable)
@ -191,8 +191,8 @@ func (m *Manager) handleOpenAIGetModels(w http.ResponseWriter, r *http.Request)
}
}
// handleOpenAIGetModel handles GET /ml/{backend}/v1/models/{namespace}/{name}
// and GET /ml/v1/models/{namespace}/{name} requests.
// handleOpenAIGetModel handles GET /engines/{backend}/v1/models/{namespace}/{name}
// and GET /engines/v1/models/{namespace}/{name} requests.
func (m *Manager) handleOpenAIGetModel(w http.ResponseWriter, r *http.Request) {
if m.distributionClient == nil {
http.Error(w, "model distribution service unavailable", http.StatusServiceUnavailable)

View File

@ -58,12 +58,12 @@ func NewScheduler(
s.router.HandleFunc("/", func(w http.ResponseWriter, _ *http.Request) {
http.Error(w, "not found", http.StatusNotFound)
})
s.router.HandleFunc("POST /ml/{backend}/v1/chat/completions", s.handleOpenAIInference)
s.router.HandleFunc("POST /ml/{backend}/v1/completions", s.handleOpenAIInference)
s.router.HandleFunc("POST /ml/{backend}/v1/embeddings", s.handleOpenAIInference)
s.router.HandleFunc("POST /ml/v1/chat/completions", s.handleOpenAIInference)
s.router.HandleFunc("POST /ml/v1/completions", s.handleOpenAIInference)
s.router.HandleFunc("POST /ml/v1/embeddings", s.handleOpenAIInference)
s.router.HandleFunc("POST /engines/{backend}/v1/chat/completions", s.handleOpenAIInference)
s.router.HandleFunc("POST /engines/{backend}/v1/completions", s.handleOpenAIInference)
s.router.HandleFunc("POST /engines/{backend}/v1/embeddings", s.handleOpenAIInference)
s.router.HandleFunc("POST /engines/v1/chat/completions", s.handleOpenAIInference)
s.router.HandleFunc("POST /engines/v1/completions", s.handleOpenAIInference)
s.router.HandleFunc("POST /engines/v1/embeddings", s.handleOpenAIInference)
// Scheduler successfully initialized.
return s
@ -93,9 +93,9 @@ func (s *Scheduler) Run(ctx context.Context) error {
// handleOpenAIInference handles scheduling and responding to OpenAI inference
// requests, including:
// - POST /ml/{backend}/v1/chat/completions
// - POST /ml/{backend}/v1/completions
// - POST /ml/{backend}/v1/embeddings
// - POST /engines/{backend}/v1/chat/completions
// - POST /engines/{backend}/v1/completions
// - POST /engines/{backend}/v1/embeddings
func (s *Scheduler) handleOpenAIInference(w http.ResponseWriter, r *http.Request) {
// Determine the requested backend and ensure that it's valid.
var backend inference.Backend