Merge pull request #45 from doringeman/df

Add /engines/df
This commit is contained in:
Dorin-Andrei Geman 2025-05-21 18:25:22 +03:00 committed by GitHub
commit 7ddea9dde4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 94 additions and 1 deletions

View File

@ -55,7 +55,12 @@ func main() {
modelManager,
log.WithFields(logrus.Fields{"component": "llama.cpp"}),
llamaServerPath,
func() string { wd, _ := os.Getwd(); return wd }(),
func() string {
wd, _ := os.Getwd()
d := filepath.Join(wd, "updated-inference")
_ = os.MkdirAll(d, 0o755)
return d
}(),
)
if err != nil {
log.Fatalf("unable to initialize %s backend: %v", llamacpp.Name, err)

View File

@ -0,0 +1,24 @@
package diskusage
import (
"io/fs"
"path/filepath"
)
func Size(path string) (float64, error) {
var size int64
err := filepath.WalkDir(path, func(_ string, d fs.DirEntry, err error) error {
if err != nil {
return err
}
if d.Type().IsRegular() {
info, err := d.Info()
if err != nil {
return err
}
size += info.Size()
}
return nil
})
return float64(size), err
}

View File

@ -69,4 +69,6 @@ type Backend interface {
Run(ctx context.Context, socket, model string, mode BackendMode) error
// Status returns a description of the backend's state.
Status() string
// GetDiskUsage returns the disk usage of the backend.
GetDiskUsage() (float64, error)
}

View File

@ -12,6 +12,7 @@ import (
"runtime"
"strconv"
"github.com/docker/model-runner/pkg/diskusage"
"github.com/docker/model-runner/pkg/inference"
"github.com/docker/model-runner/pkg/inference/models"
"github.com/docker/model-runner/pkg/logging"
@ -199,3 +200,11 @@ func (l *llamaCpp) Run(ctx context.Context, socket, model string, mode inference
func (l *llamaCpp) Status() string {
return l.status
}
func (l *llamaCpp) GetDiskUsage() (float64, error) {
size, err := diskusage.Size(l.updatedServerStoragePath)
if err != nil {
return 0, fmt.Errorf("error while getting store size: %v", err)
}
return size, nil
}

View File

@ -58,3 +58,7 @@ func (m *mlx) Run(ctx context.Context, socket, model string, mode inference.Back
func (m *mlx) Status() string {
return "not running"
}
func (m *mlx) GetDiskUsage() (float64, error) {
return 0, nil
}

View File

@ -58,3 +58,7 @@ func (v *vLLM) Run(ctx context.Context, socket, model string, mode inference.Bac
func (v *vLLM) Status() string {
return "not running"
}
func (v *vLLM) GetDiskUsage() (float64, error) {
return 0, nil
}

View File

@ -14,6 +14,7 @@ import (
"github.com/docker/model-distribution/distribution"
"github.com/docker/model-distribution/registry"
"github.com/docker/model-distribution/types"
"github.com/docker/model-runner/pkg/diskusage"
"github.com/docker/model-runner/pkg/inference"
"github.com/docker/model-runner/pkg/logging"
"github.com/sirupsen/logrus"
@ -399,6 +400,21 @@ func (m *Manager) handlePushModel(w http.ResponseWriter, r *http.Request, model
}
}
// GetDiskUsage returns the disk usage of the model store.
func (m *Manager) GetDiskUsage() (float64, error, int) {
if m.distributionClient == nil {
return 0, errors.New("model distribution service unavailable"), http.StatusServiceUnavailable
}
storePath := m.distributionClient.GetStorePath()
size, err := diskusage.Size(storePath)
if err != nil {
return 0, fmt.Errorf("error while getting store size: %v", err), http.StatusInternalServerError
}
return size, nil, http.StatusOK
}
// ServeHTTP implement net/http.Handler.ServeHTTP.
func (m *Manager) ServeHTTP(w http.ResponseWriter, r *http.Request) {
m.router.ServeHTTP(w, r)

View File

@ -55,3 +55,9 @@ type BackendStatus struct {
// LastUsed represents when this (backend, model, mode) tuple was last used
LastUsed time.Time `json:"last_used,omitempty"`
}
// DiskUsage represents the disk usage of the models and default backend.
type DiskUsage struct {
ModelsDiskUsage float64 `json:"models_disk_usage"`
DefaultBackendDiskUsage float64 `json:"default_backend_disk_usage"`
}

View File

@ -83,6 +83,7 @@ func (s *Scheduler) routeHandlers() map[string]http.HandlerFunc {
}
m["GET "+inference.InferencePrefix+"/status"] = s.GetBackendStatus
m["GET "+inference.InferencePrefix+"/ps"] = s.GetRunningBackends
m["GET "+inference.InferencePrefix+"/df"] = s.GetDiskUsage
return m
}
@ -266,6 +267,28 @@ func (s *Scheduler) getLoaderStatus() []BackendStatus {
return result
}
func (s *Scheduler) GetDiskUsage(w http.ResponseWriter, _ *http.Request) {
modelsDiskUsage, err, httpCode := s.modelManager.GetDiskUsage()
if err != nil {
http.Error(w, fmt.Sprintf("Failed to get models disk usage: %v", err), httpCode)
return
}
// TODO: Get disk usage for each backend once the backends are implemented.
defaultBackendDiskUsage, err := s.defaultBackend.GetDiskUsage()
if err != nil {
http.Error(w, fmt.Sprintf("Failed to get disk usage for %s: %v", s.defaultBackend.Name(), err), http.StatusInternalServerError)
return
}
diskUsage := DiskUsage{modelsDiskUsage, defaultBackendDiskUsage}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(diskUsage); err != nil {
http.Error(w, fmt.Sprintf("Failed to encode response: %v", err), http.StatusInternalServerError)
return
}
}
// ServeHTTP implements net/http.Handler.ServeHTTP.
func (s *Scheduler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
s.router.ServeHTTP(w, r)