From ml.docker.internal to model-runner.docker.internal
This commit is contained in:
parent
55843c8685
commit
7878bc7c69
|
|
@ -1,6 +1,6 @@
|
|||
package inference
|
||||
|
||||
// ExperimentalEndpointsPrefix is used to prefix all /ml routes on the Docker
|
||||
// ExperimentalEndpointsPrefix is used to prefix all /engines routes on the Docker
|
||||
// socket while they are still in their experimental stage. This prefix doesn't
|
||||
// apply to endpoints on ml.docker.internal.
|
||||
// apply to endpoints on model-runner.docker.internal.
|
||||
const ExperimentalEndpointsPrefix = "/exp/vDD4.40"
|
||||
|
|
|
|||
|
|
@ -59,14 +59,14 @@ func NewManager(log logger.ComponentLogger, httpClient *http.Client) *Manager {
|
|||
m.router.HandleFunc("/", func(w http.ResponseWriter, _ *http.Request) {
|
||||
http.Error(w, "not found", http.StatusNotFound)
|
||||
})
|
||||
m.router.HandleFunc("POST /ml/models/create", m.handleCreateModel)
|
||||
m.router.HandleFunc("GET /ml/models/json", m.handleGetModels)
|
||||
m.router.HandleFunc("GET /ml/models/{namespace}/{name}/json", m.handleGetModel)
|
||||
m.router.HandleFunc("DELETE /ml/models/{namespace}/{name}", m.handleDeleteModel)
|
||||
m.router.HandleFunc("GET /ml/{backend}/v1/models", m.handleOpenAIGetModels)
|
||||
m.router.HandleFunc("GET /ml/{backend}/v1/models/{namespace}/{name}", m.handleOpenAIGetModel)
|
||||
m.router.HandleFunc("GET /ml/v1/models", m.handleOpenAIGetModels)
|
||||
m.router.HandleFunc("GET /ml/v1/models/{namespace}/{name}", m.handleOpenAIGetModel)
|
||||
m.router.HandleFunc("POST /engines/models/create", m.handleCreateModel)
|
||||
m.router.HandleFunc("GET /engines/models/json", m.handleGetModels)
|
||||
m.router.HandleFunc("GET /engines/models/{namespace}/{name}/json", m.handleGetModel)
|
||||
m.router.HandleFunc("DELETE /engines/models/{namespace}/{name}", m.handleDeleteModel)
|
||||
m.router.HandleFunc("GET /engines/{backend}/v1/models", m.handleOpenAIGetModels)
|
||||
m.router.HandleFunc("GET /engines/{backend}/v1/models/{namespace}/{name}", m.handleOpenAIGetModel)
|
||||
m.router.HandleFunc("GET /engines/v1/models", m.handleOpenAIGetModels)
|
||||
m.router.HandleFunc("GET /engines/v1/models/{namespace}/{name}", m.handleOpenAIGetModel)
|
||||
|
||||
// Populate the pull concurrency semaphore.
|
||||
for i := 0; i < maximumConcurrentModelPulls; i++ {
|
||||
|
|
@ -77,7 +77,7 @@ func NewManager(log logger.ComponentLogger, httpClient *http.Client) *Manager {
|
|||
return m
|
||||
}
|
||||
|
||||
// handleCreateModel handles POST /ml/models/create requests.
|
||||
// handleCreateModel handles POST /engines/models/create requests.
|
||||
func (m *Manager) handleCreateModel(w http.ResponseWriter, r *http.Request) {
|
||||
if m.distributionClient == nil {
|
||||
http.Error(w, "model distribution service unavailable", http.StatusServiceUnavailable)
|
||||
|
|
@ -99,7 +99,7 @@ func (m *Manager) handleCreateModel(w http.ResponseWriter, r *http.Request) {
|
|||
}
|
||||
}
|
||||
|
||||
// handleGetModels handles GET /ml/models/json requests.
|
||||
// handleGetModels handles GET /engines/models/json requests.
|
||||
func (m *Manager) handleGetModels(w http.ResponseWriter, r *http.Request) {
|
||||
if m.distributionClient == nil {
|
||||
http.Error(w, "model distribution service unavailable", http.StatusServiceUnavailable)
|
||||
|
|
@ -120,7 +120,7 @@ func (m *Manager) handleGetModels(w http.ResponseWriter, r *http.Request) {
|
|||
}
|
||||
}
|
||||
|
||||
// handleGetModel handles GET /ml/models/{namespace}/{name}/json requests.
|
||||
// handleGetModel handles GET /engines/models/{namespace}/{name}/json requests.
|
||||
func (m *Manager) handleGetModel(w http.ResponseWriter, r *http.Request) {
|
||||
if m.distributionClient == nil {
|
||||
http.Error(w, "model distribution service unavailable", http.StatusServiceUnavailable)
|
||||
|
|
@ -145,7 +145,7 @@ func (m *Manager) handleGetModel(w http.ResponseWriter, r *http.Request) {
|
|||
}
|
||||
}
|
||||
|
||||
// handleDeleteModel handles DELETE /ml/models/{namespace}/{name} requests.
|
||||
// handleDeleteModel handles DELETE /engines/models/{namespace}/{name} requests.
|
||||
func (m *Manager) handleDeleteModel(w http.ResponseWriter, r *http.Request) {
|
||||
if m.distributionClient == nil {
|
||||
http.Error(w, "model distribution service unavailable", http.StatusServiceUnavailable)
|
||||
|
|
@ -169,8 +169,8 @@ func (m *Manager) handleDeleteModel(w http.ResponseWriter, r *http.Request) {
|
|||
}
|
||||
}
|
||||
|
||||
// handleOpenAIGetModels handles GET /ml/{backend}/v1/models and
|
||||
// GET /ml/v1/models requests.
|
||||
// handleOpenAIGetModels handles GET /engines/{backend}/v1/models and
|
||||
// GET /engines/v1/models requests.
|
||||
func (m *Manager) handleOpenAIGetModels(w http.ResponseWriter, r *http.Request) {
|
||||
if m.distributionClient == nil {
|
||||
http.Error(w, "model distribution service unavailable", http.StatusServiceUnavailable)
|
||||
|
|
@ -191,8 +191,8 @@ func (m *Manager) handleOpenAIGetModels(w http.ResponseWriter, r *http.Request)
|
|||
}
|
||||
}
|
||||
|
||||
// handleOpenAIGetModel handles GET /ml/{backend}/v1/models/{namespace}/{name}
|
||||
// and GET /ml/v1/models/{namespace}/{name} requests.
|
||||
// handleOpenAIGetModel handles GET /engines/{backend}/v1/models/{namespace}/{name}
|
||||
// and GET /engines/v1/models/{namespace}/{name} requests.
|
||||
func (m *Manager) handleOpenAIGetModel(w http.ResponseWriter, r *http.Request) {
|
||||
if m.distributionClient == nil {
|
||||
http.Error(w, "model distribution service unavailable", http.StatusServiceUnavailable)
|
||||
|
|
|
|||
|
|
@ -58,12 +58,12 @@ func NewScheduler(
|
|||
s.router.HandleFunc("/", func(w http.ResponseWriter, _ *http.Request) {
|
||||
http.Error(w, "not found", http.StatusNotFound)
|
||||
})
|
||||
s.router.HandleFunc("POST /ml/{backend}/v1/chat/completions", s.handleOpenAIInference)
|
||||
s.router.HandleFunc("POST /ml/{backend}/v1/completions", s.handleOpenAIInference)
|
||||
s.router.HandleFunc("POST /ml/{backend}/v1/embeddings", s.handleOpenAIInference)
|
||||
s.router.HandleFunc("POST /ml/v1/chat/completions", s.handleOpenAIInference)
|
||||
s.router.HandleFunc("POST /ml/v1/completions", s.handleOpenAIInference)
|
||||
s.router.HandleFunc("POST /ml/v1/embeddings", s.handleOpenAIInference)
|
||||
s.router.HandleFunc("POST /engines/{backend}/v1/chat/completions", s.handleOpenAIInference)
|
||||
s.router.HandleFunc("POST /engines/{backend}/v1/completions", s.handleOpenAIInference)
|
||||
s.router.HandleFunc("POST /engines/{backend}/v1/embeddings", s.handleOpenAIInference)
|
||||
s.router.HandleFunc("POST /engines/v1/chat/completions", s.handleOpenAIInference)
|
||||
s.router.HandleFunc("POST /engines/v1/completions", s.handleOpenAIInference)
|
||||
s.router.HandleFunc("POST /engines/v1/embeddings", s.handleOpenAIInference)
|
||||
|
||||
// Scheduler successfully initialized.
|
||||
return s
|
||||
|
|
@ -93,9 +93,9 @@ func (s *Scheduler) Run(ctx context.Context) error {
|
|||
|
||||
// handleOpenAIInference handles scheduling and responding to OpenAI inference
|
||||
// requests, including:
|
||||
// - POST /ml/{backend}/v1/chat/completions
|
||||
// - POST /ml/{backend}/v1/completions
|
||||
// - POST /ml/{backend}/v1/embeddings
|
||||
// - POST /engines/{backend}/v1/chat/completions
|
||||
// - POST /engines/{backend}/v1/completions
|
||||
// - POST /engines/{backend}/v1/embeddings
|
||||
func (s *Scheduler) handleOpenAIInference(w http.ResponseWriter, r *http.Request) {
|
||||
// Determine the requested backend and ensure that it's valid.
|
||||
var backend inference.Backend
|
||||
|
|
|
|||
Loading…
Reference in New Issue