package main import ( "context" "net" "net/http" "os" "os/signal" "path/filepath" "strings" "syscall" "github.com/docker/model-runner/pkg/inference" "github.com/docker/model-runner/pkg/inference/backends/llamacpp" "github.com/docker/model-runner/pkg/inference/config" "github.com/docker/model-runner/pkg/inference/models" "github.com/docker/model-runner/pkg/inference/scheduling" "github.com/docker/model-runner/pkg/routing" "github.com/sirupsen/logrus" ) var log = logrus.New() func main() { ctx, cancel := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM) defer cancel() sockName := os.Getenv("MODEL_RUNNER_SOCK") if sockName == "" { sockName = "model-runner.sock" } userHomeDir, err := os.UserHomeDir() if err != nil { log.Fatalf("Failed to get user home directory: %v", err) } modelPath := os.Getenv("MODELS_PATH") if modelPath == "" { modelPath = filepath.Join(userHomeDir, ".docker", "models") } modelManager := models.NewManager(log, models.ClientConfig{ StoreRootPath: modelPath, Logger: log.WithFields(logrus.Fields{"component": "model-manager"}), }) llamaServerPath := os.Getenv("LLAMA_SERVER_PATH") if llamaServerPath == "" { llamaServerPath = "/Applications/Docker.app/Contents/Resources/bin" } log.Infof("LLAMA_SERVER_PATH: %s", llamaServerPath) // Create llama.cpp configuration from environment variables llamaCppConfig := createLlamaCppConfigFromEnv() llamaCppBackend, err := llamacpp.New( log, modelManager, log.WithFields(logrus.Fields{"component": "llama.cpp"}), llamaServerPath, func() string { wd, _ := os.Getwd() d := filepath.Join(wd, "updated-inference") _ = os.MkdirAll(d, 0o755) return d }(), llamaCppConfig, ) if err != nil { log.Fatalf("unable to initialize %s backend: %v", llamacpp.Name, err) } scheduler := scheduling.NewScheduler( log, map[string]inference.Backend{llamacpp.Name: llamaCppBackend}, llamaCppBackend, modelManager, http.DefaultClient, ) router := routing.NewNormalizedServeMux() for _, route := range modelManager.GetRoutes() { router.Handle(route, modelManager) } for _, route := range scheduler.GetRoutes() { router.Handle(route, scheduler) } server := &http.Server{Handler: router} serverErrors := make(chan error, 1) // Check if we should use TCP port instead of Unix socket tcpPort := os.Getenv("MODEL_RUNNER_PORT") if tcpPort != "" { // Use TCP port addr := ":" + tcpPort log.Infof("Listening on TCP port %s", tcpPort) server.Addr = addr go func() { serverErrors <- server.ListenAndServe() }() } else { // Use Unix socket if err := os.Remove(sockName); err != nil { if !os.IsNotExist(err) { log.Fatalf("Failed to remove existing socket: %v", err) } } ln, err := net.ListenUnix("unix", &net.UnixAddr{Name: sockName, Net: "unix"}) if err != nil { log.Fatalf("Failed to listen on socket: %v", err) } go func() { serverErrors <- server.Serve(ln) }() } schedulerErrors := make(chan error, 1) go func() { schedulerErrors <- scheduler.Run(ctx) }() select { case err := <-serverErrors: if err != nil { log.Errorf("Server error: %v", err) } case <-ctx.Done(): log.Infoln("Shutdown signal received") log.Infoln("Waiting for the scheduler to stop") if err := <-schedulerErrors; err != nil { log.Errorf("Scheduler error: %v", err) } log.Infoln("Shutting down the server") if err := server.Shutdown(ctx); err != nil { log.Errorf("Server shutdown error: %v", err) } } log.Infoln("Docker Model Runner stopped") } // createLlamaCppConfigFromEnv creates a LlamaCppConfig from environment variables func createLlamaCppConfigFromEnv() config.BackendConfig { // Check if any configuration environment variables are set argsStr := os.Getenv("LLAMA_ARGS") // If no environment variables are set, use default configuration if argsStr == "" { return nil // nil will cause the backend to use its default configuration } // Split the string by spaces, respecting quoted arguments args := splitArgs(argsStr) // Check for disallowed arguments disallowedArgs := []string{"--model", "--host", "--embeddings", "--mmproj"} for _, arg := range args { for _, disallowed := range disallowedArgs { if arg == disallowed { log.Fatalf("LLAMA_ARGS cannot override the %s argument as it is controlled by the model runner", disallowed) } } } log.Infof("Using custom arguments: %v", args) return &llamacpp.Config{ Args: args, } } // splitArgs splits a string into arguments, respecting quoted arguments func splitArgs(s string) []string { var args []string var currentArg strings.Builder inQuotes := false for _, r := range s { switch { case r == '"' || r == '\'': inQuotes = !inQuotes case r == ' ' && !inQuotes: if currentArg.Len() > 0 { args = append(args, currentArg.String()) currentArg.Reset() } default: currentArg.WriteRune(r) } } if currentArg.Len() > 0 { args = append(args, currentArg.String()) } return args }