commit
7ddea9dde4
7
main.go
7
main.go
|
@ -55,7 +55,12 @@ func main() {
|
|||
modelManager,
|
||||
log.WithFields(logrus.Fields{"component": "llama.cpp"}),
|
||||
llamaServerPath,
|
||||
func() string { wd, _ := os.Getwd(); return wd }(),
|
||||
func() string {
|
||||
wd, _ := os.Getwd()
|
||||
d := filepath.Join(wd, "updated-inference")
|
||||
_ = os.MkdirAll(d, 0o755)
|
||||
return d
|
||||
}(),
|
||||
)
|
||||
if err != nil {
|
||||
log.Fatalf("unable to initialize %s backend: %v", llamacpp.Name, err)
|
||||
|
|
|
@ -0,0 +1,24 @@
|
|||
package diskusage
|
||||
|
||||
import (
|
||||
"io/fs"
|
||||
"path/filepath"
|
||||
)
|
||||
|
||||
func Size(path string) (float64, error) {
|
||||
var size int64
|
||||
err := filepath.WalkDir(path, func(_ string, d fs.DirEntry, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if d.Type().IsRegular() {
|
||||
info, err := d.Info()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
size += info.Size()
|
||||
}
|
||||
return nil
|
||||
})
|
||||
return float64(size), err
|
||||
}
|
|
@ -69,4 +69,6 @@ type Backend interface {
|
|||
Run(ctx context.Context, socket, model string, mode BackendMode) error
|
||||
// Status returns a description of the backend's state.
|
||||
Status() string
|
||||
// GetDiskUsage returns the disk usage of the backend.
|
||||
GetDiskUsage() (float64, error)
|
||||
}
|
||||
|
|
|
@ -12,6 +12,7 @@ import (
|
|||
"runtime"
|
||||
"strconv"
|
||||
|
||||
"github.com/docker/model-runner/pkg/diskusage"
|
||||
"github.com/docker/model-runner/pkg/inference"
|
||||
"github.com/docker/model-runner/pkg/inference/models"
|
||||
"github.com/docker/model-runner/pkg/logging"
|
||||
|
@ -199,3 +200,11 @@ func (l *llamaCpp) Run(ctx context.Context, socket, model string, mode inference
|
|||
func (l *llamaCpp) Status() string {
|
||||
return l.status
|
||||
}
|
||||
|
||||
func (l *llamaCpp) GetDiskUsage() (float64, error) {
|
||||
size, err := diskusage.Size(l.updatedServerStoragePath)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("error while getting store size: %v", err)
|
||||
}
|
||||
return size, nil
|
||||
}
|
||||
|
|
|
@ -58,3 +58,7 @@ func (m *mlx) Run(ctx context.Context, socket, model string, mode inference.Back
|
|||
func (m *mlx) Status() string {
|
||||
return "not running"
|
||||
}
|
||||
|
||||
func (m *mlx) GetDiskUsage() (float64, error) {
|
||||
return 0, nil
|
||||
}
|
||||
|
|
|
@ -58,3 +58,7 @@ func (v *vLLM) Run(ctx context.Context, socket, model string, mode inference.Bac
|
|||
func (v *vLLM) Status() string {
|
||||
return "not running"
|
||||
}
|
||||
|
||||
func (v *vLLM) GetDiskUsage() (float64, error) {
|
||||
return 0, nil
|
||||
}
|
||||
|
|
|
@ -14,6 +14,7 @@ import (
|
|||
"github.com/docker/model-distribution/distribution"
|
||||
"github.com/docker/model-distribution/registry"
|
||||
"github.com/docker/model-distribution/types"
|
||||
"github.com/docker/model-runner/pkg/diskusage"
|
||||
"github.com/docker/model-runner/pkg/inference"
|
||||
"github.com/docker/model-runner/pkg/logging"
|
||||
"github.com/sirupsen/logrus"
|
||||
|
@ -399,6 +400,21 @@ func (m *Manager) handlePushModel(w http.ResponseWriter, r *http.Request, model
|
|||
}
|
||||
}
|
||||
|
||||
// GetDiskUsage returns the disk usage of the model store.
|
||||
func (m *Manager) GetDiskUsage() (float64, error, int) {
|
||||
if m.distributionClient == nil {
|
||||
return 0, errors.New("model distribution service unavailable"), http.StatusServiceUnavailable
|
||||
}
|
||||
|
||||
storePath := m.distributionClient.GetStorePath()
|
||||
size, err := diskusage.Size(storePath)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("error while getting store size: %v", err), http.StatusInternalServerError
|
||||
}
|
||||
|
||||
return size, nil, http.StatusOK
|
||||
}
|
||||
|
||||
// ServeHTTP implement net/http.Handler.ServeHTTP.
|
||||
func (m *Manager) ServeHTTP(w http.ResponseWriter, r *http.Request) {
|
||||
m.router.ServeHTTP(w, r)
|
||||
|
|
|
@ -55,3 +55,9 @@ type BackendStatus struct {
|
|||
// LastUsed represents when this (backend, model, mode) tuple was last used
|
||||
LastUsed time.Time `json:"last_used,omitempty"`
|
||||
}
|
||||
|
||||
// DiskUsage represents the disk usage of the models and default backend.
|
||||
type DiskUsage struct {
|
||||
ModelsDiskUsage float64 `json:"models_disk_usage"`
|
||||
DefaultBackendDiskUsage float64 `json:"default_backend_disk_usage"`
|
||||
}
|
||||
|
|
|
@ -83,6 +83,7 @@ func (s *Scheduler) routeHandlers() map[string]http.HandlerFunc {
|
|||
}
|
||||
m["GET "+inference.InferencePrefix+"/status"] = s.GetBackendStatus
|
||||
m["GET "+inference.InferencePrefix+"/ps"] = s.GetRunningBackends
|
||||
m["GET "+inference.InferencePrefix+"/df"] = s.GetDiskUsage
|
||||
return m
|
||||
}
|
||||
|
||||
|
@ -266,6 +267,28 @@ func (s *Scheduler) getLoaderStatus() []BackendStatus {
|
|||
return result
|
||||
}
|
||||
|
||||
func (s *Scheduler) GetDiskUsage(w http.ResponseWriter, _ *http.Request) {
|
||||
modelsDiskUsage, err, httpCode := s.modelManager.GetDiskUsage()
|
||||
if err != nil {
|
||||
http.Error(w, fmt.Sprintf("Failed to get models disk usage: %v", err), httpCode)
|
||||
return
|
||||
}
|
||||
|
||||
// TODO: Get disk usage for each backend once the backends are implemented.
|
||||
defaultBackendDiskUsage, err := s.defaultBackend.GetDiskUsage()
|
||||
if err != nil {
|
||||
http.Error(w, fmt.Sprintf("Failed to get disk usage for %s: %v", s.defaultBackend.Name(), err), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
diskUsage := DiskUsage{modelsDiskUsage, defaultBackendDiskUsage}
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
if err := json.NewEncoder(w).Encode(diskUsage); err != nil {
|
||||
http.Error(w, fmt.Sprintf("Failed to encode response: %v", err), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// ServeHTTP implements net/http.Handler.ServeHTTP.
|
||||
func (s *Scheduler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
|
||||
s.router.ServeHTTP(w, r)
|
||||
|
|
Loading…
Reference in New Issue