Dockerize (#22)

* Adds Makefile for local development

* Fix chat completions example request

* Added delete example

* Dockerize model-runner

* WIP Run container with host access to socket

* Dockerize model-runner

* WIP Run container with host access to socket

* Debugging

* Run in Docker container with TCP port access

* mounted model storage

* - Remove duplication in .gitignore
- Do not use alpine in builder image
- NVIDIA seems to use Ubuntu in all of their CDI docs and produces Ubuntu tags for nvidia/cuda but not Debian. So use Ubuntu for our final image
For more details: https://github.com/docker/model-runner/pull/22

* - Add MODELS_PATH environment variable to configure model storage location
- Default to $HOME/.docker/models when MODELS_PATH is not set
- Update Docker container to use /models as the default storage path
- Update Makefile to pass MODELS_PATH to container
- Update Dockerfile to create and set permissions for /models directory

This change allows users to:
- Override the model storage location via MODELS_PATH
- Maintain backward compatibility with default $HOME/.docker/models path
- Use a more idiomatic folder for /models

* Removes unneeded logs
This commit is contained in:
Ignasi 2025-04-29 18:03:12 +02:00 committed by GitHub
parent 4239791795
commit dbbb7afe9f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 284 additions and 68 deletions

76
.dockerignore Normal file
View File

@ -0,0 +1,76 @@
# Version control
.git/
.gitignore
# IDE and editor files
.idea/
.vscode/
*.swp
*.swo
# Go build and cache artifacts
/vendor/
*.test
*.out
*.exe
*.dll
*.so
*.dylib
*.a
*.o
*.obj
*.cgo*
*.coverprofile
*.prof
*.tmp
*.log
# Go tool cache
/go-build/
/go-cache/
# Test and development artifacts
test/
tests/
*_test.go
# Build outputs
dist/
build/
out/
debug/
# Temporary and local files
tmp/
temp/
.local/
local/
# Environment and secrets
.env*
*.env
*.pem
*.key
*.crt
config.local.*
*.local.yml
# Documentation and markdown
docs/
*.md
README*
LICENSE
# Docker files
Dockerfile*
# Miscellaneous
*.bak
*.old
*.orig
*.rej
*.DS_Store
.Spotlight-V100
.Trashes
# Exclude nothing else; keep source code and necessary files for build

63
Dockerfile Normal file
View File

@ -0,0 +1,63 @@
# syntax=docker/dockerfile:1
ARG GO_VERSION=1.24.2
ARG LLAMA_SERVER_VERSION=latest
ARG LLAMA_BINARY_PATH=/com.docker.llama-server.native.linux.cpu.amd64
FROM golang:${GO_VERSION}-bookworm AS builder
# Install git for go mod download if needed
RUN apt-get update && apt-get install -y --no-install-recommends git && rm -rf /var/lib/apt/lists/*
WORKDIR /app
# Copy go mod/sum first for better caching
COPY --link go.mod go.sum ./
# Download dependencies (with cache mounts)
RUN --mount=type=cache,target=/go/pkg/mod \
--mount=type=cache,target=/root/.cache/go-build \
go mod download
# Copy the rest of the source code
COPY --link . .
# Build the Go binary (static build)
RUN --mount=type=cache,target=/go/pkg/mod \
--mount=type=cache,target=/root/.cache/go-build \
CGO_ENABLED=0 GOOS=linux go build -ldflags="-s -w" -o model-runner ./main.go
# --- Get llama.cpp binary ---
FROM docker/docker-model-backend-llamacpp:${LLAMA_SERVER_VERSION} AS llama-server
# --- Final image ---
FROM ubuntu:24.04 AS final
# Create non-root user
RUN groupadd --system modelrunner && useradd --system --gid modelrunner modelrunner
# Install ca-certificates for HTTPS
RUN apt-get update && apt-get install -y --no-install-recommends ca-certificates && rm -rf /var/lib/apt/lists/*
WORKDIR /app
# Create directories for the socket file and llama.cpp binary, and set proper permissions
RUN mkdir -p /var/run/model-runner /app/bin /models && \
chown -R modelrunner:modelrunner /var/run/model-runner /app/bin /models && \
chmod -R 755 /models
# Copy the built binary from builder
COPY --from=builder /app/model-runner /app/model-runner
# Copy the llama.cpp binary from the llama-server stage
ARG LLAMA_BINARY_PATH
COPY --from=llama-server ${LLAMA_BINARY_PATH} /app/bin/com.docker.llama-server
USER modelrunner
# Set the environment variable for the socket path and LLaMA server binary path
ENV MODEL_RUNNER_SOCK=/var/run/model-runner/model-runner.sock
ENV LLAMA_SERVER_PATH=/app/bin
ENV HOME=/home/modelrunner
ENTRYPOINT ["/app/model-runner"]

View File

@ -1,9 +1,17 @@
# Project variables
APP_NAME := model-runner
GO_VERSION := 1.23.7
LLAMA_SERVER_VERSION := v0.0.4-rc2-cpu
TARGET_OS := linux
TARGET_ARCH := amd64
ACCEL := cpu
DOCKER_IMAGE := go-model-runner:latest
LLAMA_BINARY := /com.docker.llama-server.native.$(TARGET_OS).$(ACCEL).$(TARGET_ARCH)
PORT := 8080
MODELS_PATH := $(shell pwd)/models
# Main targets
.PHONY: build run clean help
.PHONY: build run clean test docker-build docker-run help
# Default target
.DEFAULT_GOAL := help
@ -20,11 +28,42 @@ run: build
clean:
rm -f $(APP_NAME)
rm -f model-runner.sock
rm -rf $(MODELS_PATH)
# Run tests
test:
go test -v ./...
# Build Docker image
docker-build:
docker build --platform linux/amd64 \
--build-arg LLAMA_SERVER_VERSION=$(LLAMA_SERVER_VERSION) \
--build-arg LLAMA_BINARY_PATH=$(LLAMA_BINARY) \
-t $(DOCKER_IMAGE) .
# Run in Docker container with TCP port access and mounted model storage
docker-run: docker-build
@echo ""
@echo "Starting service on port $(PORT) with model storage at $(MODELS_PATH)..."
@echo "Service will be available at: http://localhost:$(PORT)"
@echo "Example usage: curl http://localhost:$(PORT)/models"
@echo ""
mkdir -p $(MODELS_PATH)
docker run --rm \
-p $(PORT):$(PORT) \
-v "$(MODELS_PATH):/models" \
-e MODEL_RUNNER_PORT=$(PORT) \
-e LLAMA_SERVER_PATH=/app/bin \
-e MODELS_PATH=/models \
$(DOCKER_IMAGE)
# Show help
help:
@echo "Available targets:"
@echo " build - Build the Go application"
@echo " run - Run the application locally"
@echo " clean - Clean build artifacts"
@echo " help - Show this help message"
@echo " build - Build the Go application"
@echo " run - Run the application locally"
@echo " clean - Clean build artifacts"
@echo " test - Run tests"
@echo " docker-build - Build Docker image"
@echo " docker-run - Run in Docker container with TCP port access and mounted model storage"
@echo " help - Show this help message"

View File

@ -23,61 +23,80 @@ The Makefile provides the following targets:
- `build` - Build the Go application
- `run` - Run the application locally
- `clean` - Clean build artifacts
- `test` - Run tests
- `docker-build` - Build the Docker image
- `docker-run` - Run the application in a Docker container with TCP port access and mounted model storage
- `help` - Show available targets
### Examples
### Running in Docker
The application can be run in Docker with the following features enabled by default:
- TCP port access (default port 8080)
- Persistent model storage in a local `models` directory
```sh
# Build the application
make build
# Run with default settings
make docker-run
# Run the application locally
make run
# Show all available targets
make help
# Customize port and model storage location
make docker-run PORT=3000 MODELS_PATH=/path/to/your/models
```
This will:
- Create a `models` directory in your current working directory (or use the specified path)
- Mount this directory into the container
- Start the service on port 8080 (or the specified port)
- All models downloaded will be stored in the host's `models` directory and will persist between container runs
### llama.cpp integration
The Docker image includes the llama.cpp server binary from the `docker/docker-model-backend-llamacpp` image. You can specify the version of the image to use by setting the `LLAMA_SERVER_VERSION` variable. Additionally, you can configure the target OS, architecture, and acceleration type:
```sh
# Build with a specific llama.cpp server version
LLAMA_SERVER_VERSION=v0.0.4-rc2-cpu make docker-build
# Specify all parameters
LLAMA_SERVER_VERSION=v0.0.4-rc2-cpu TARGET_OS=linux TARGET_ARCH=amd64 ACCEL=cpu make docker-build
```
Default values:
- `LLAMA_SERVER_VERSION`: v0.0.4-rc2-cpu
- `TARGETOS`: linux
- `TARGETARCH`: amd64
- `ACCEL`: cpu
The binary path in the image follows this pattern: `/com.docker.llama-server.native.${TARGETOS}.${ACCEL}.${TARGETARCH}`
## API Examples
The Model Runner exposes a REST API over a Unix socket. You can interact with it using curl commands with the `--unix-socket` option.
The Model Runner exposes a REST API that can be accessed via TCP port. You can interact with it using curl commands.
### Listing Models
### Using the API
To list all available models:
When running with `docker-run`, you can use regular HTTP requests:
```sh
curl --unix-socket model-runner.sock localhost/models
```
# List all available models
curl http://localhost:8080/models
### Creating a Model
# Create a new model
curl http://localhost:8080/models/create -X POST -d '{"from": "ai/smollm2"}'
To create a new model:
# Get information about a specific model
curl http://localhost:8080/models/ai/smollm2
```sh
curl --unix-socket model-runner.sock localhost/models/create -X POST -d '{"from": "ai/smollm2"}'
```
### Getting Model Information
To get information about a specific model:
```sh
curl --unix-socket model-runner.sock localhost/models/ai/smollm2
```
### Chatting with a Model
To chat with a model, you can send a POST request to the model's chat endpoint:
```sh
curl --unix-socket model-runner.sock localhost/engines/llama.cpp/v1/chat/completions -X POST -d '{
# Chat with a model
curl http://localhost:8080/engines/llama.cpp/v1/chat/completions -X POST -d '{
"model": "ai/smollm2",
"messages": [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Hello, how are you?"}
]
}'
# Delete a model
curl http://localhost:8080/models/ai/smollm2 -X DELETE
```
The response will contain the model's reply:
@ -105,10 +124,3 @@ The response will contain the model's reply:
}
}
```
### Deleting a model
To delete a model from the server, send a DELETE request to the model's endpoint:
```sh
curl --unix-socket model-runner.sock localhost/models/ai/smollm2 -X DELETE
```

2
go.mod
View File

@ -5,7 +5,7 @@ go 1.23.7
require (
github.com/containerd/containerd/v2 v2.0.4
github.com/containerd/platforms v1.0.0-rc.1
github.com/docker/model-distribution v0.0.0-20250418222450-8f6c05a5ffa4
github.com/docker/model-distribution v0.0.0-20250423075433-587f475e591d
github.com/jaypipes/ghw v0.16.0
github.com/opencontainers/go-digest v1.0.0
github.com/opencontainers/image-spec v1.1.1

4
go.sum
View File

@ -39,6 +39,8 @@ github.com/docker/docker-credential-helpers v0.8.2 h1:bX3YxiGzFP5sOXWc3bTPEXdEaZ
github.com/docker/docker-credential-helpers v0.8.2/go.mod h1:P3ci7E3lwkZg6XiHdRKft1KckHiO9a2rNtyFbZ/ry9M=
github.com/docker/model-distribution v0.0.0-20250418222450-8f6c05a5ffa4 h1:v7DyBUd08t4XQe4NC2Rb8ZSWOiWVXD0TbdgIOQZ/IQo=
github.com/docker/model-distribution v0.0.0-20250418222450-8f6c05a5ffa4/go.mod h1:fd9H/2KAY0+ByxbohWfCkxp0rxf0pqSlHEtFTjTXbLk=
github.com/docker/model-distribution v0.0.0-20250423075433-587f475e591d h1:lyxdxjNHTSyQ2w1rjbuu5pbgX42AD3kmxWLNv3mdqQ4=
github.com/docker/model-distribution v0.0.0-20250423075433-587f475e591d/go.mod h1:dThpO9JoG5Px3i+rTluAeZcqLGw8C0qepuEL4gL2o/c=
github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
@ -59,6 +61,8 @@ github.com/google/go-containerregistry v0.20.3 h1:oNx7IdTI936V8CQRveCjaxOiegWwvM
github.com/google/go-containerregistry v0.20.3/go.mod h1:w00pIgBRDVUDFM6bq+Qx8lwNWK+cxgCuX1vd3PIBDNI=
github.com/gpustack/gguf-parser-go v0.14.0 h1:9kMdbJ9pHhvEeATx163WE9q+O74bi05KPJLEtVBUjw0=
github.com/gpustack/gguf-parser-go v0.14.0/go.mod h1:GvHh1Kvvq5ojCOsJ5UpwiJJmIjFw3Qk5cW7R+CZ3IJo=
github.com/gpustack/gguf-parser-go v0.14.1 h1:tmz2eTnSEFfE52V10FESqo9oAUquZ6JKQFntWC/wrEg=
github.com/gpustack/gguf-parser-go v0.14.1/go.mod h1:GvHh1Kvvq5ojCOsJ5UpwiJJmIjFw3Qk5cW7R+CZ3IJo=
github.com/henvic/httpretty v0.1.4 h1:Jo7uwIRWVFxkqOnErcoYfH90o3ddQyVrSANeS4cxYmU=
github.com/henvic/httpretty v0.1.4/go.mod h1:Dn60sQTZfbt2dYsdUSNsCljyF4AfdqnuJFDLJA1I4AM=
github.com/jaypipes/ghw v0.16.0 h1:3HurCTS38VNpeQLo5fIdZsySuo/qAfpPSJ5t05QBFPM=

56
main.go
View File

@ -32,16 +32,28 @@ func main() {
log.Fatalf("Failed to get user home directory: %v", err)
}
modelPath := os.Getenv("MODELS_PATH")
if modelPath == "" {
modelPath = filepath.Join(userHomeDir, ".docker", "models")
}
modelManager := models.NewManager(log, models.ClientConfig{
StoreRootPath: filepath.Join(userHomeDir, ".docker", "models"),
StoreRootPath: modelPath,
Logger: log.WithFields(logrus.Fields{"component": "model-manager"}),
})
llamaServerPath := os.Getenv("LLAMA_SERVER_PATH")
if llamaServerPath == "" {
llamaServerPath = "/Applications/Docker.app/Contents/Resources/bin"
}
log.Infof("LLAMA_SERVER_PATH: %s", llamaServerPath)
llamaCppBackend, err := llamacpp.New(
log,
modelManager,
log.WithFields(logrus.Fields{"component": "llama.cpp"}),
"/Applications/Docker.app/Contents/Resources/bin",
llamaServerPath,
func() string { wd, _ := os.Getwd(); return wd }(),
)
if err != nil {
@ -64,22 +76,34 @@ func main() {
router.Handle(route, scheduler)
}
if err := os.Remove(sockName); err != nil {
if !os.IsNotExist(err) {
log.Fatalf("Failed to remove existing socket: %v", err)
}
}
ln, err := net.ListenUnix("unix", &net.UnixAddr{Name: sockName, Net: "unix"})
if err != nil {
log.Fatalf("Failed to listen on socket: %v", err)
}
server := &http.Server{Handler: router}
serverErrors := make(chan error, 1)
go func() {
serverErrors <- server.Serve(ln)
}()
defer server.Close()
// Check if we should use TCP port instead of Unix socket
tcpPort := os.Getenv("MODEL_RUNNER_PORT")
if tcpPort != "" {
// Use TCP port
addr := ":" + tcpPort
log.Infof("Listening on TCP port %s", tcpPort)
server.Addr = addr
go func() {
serverErrors <- server.ListenAndServe()
}()
} else {
// Use Unix socket
if err := os.Remove(sockName); err != nil {
if !os.IsNotExist(err) {
log.Fatalf("Failed to remove existing socket: %v", err)
}
}
ln, err := net.ListenUnix("unix", &net.UnixAddr{Name: sockName, Net: "unix"})
if err != nil {
log.Fatalf("Failed to listen on socket: %v", err)
}
go func() {
serverErrors <- server.Serve(ln)
}()
}
schedulerErrors := make(chan error, 1)
go func() {

View File

@ -33,6 +33,7 @@ var (
func (l *llamaCpp) downloadLatestLlamaCpp(ctx context.Context, log logging.Logger, httpClient *http.Client,
llamaCppPath, vendoredServerStoragePath, desiredVersion, desiredVariant string,
) error {
log.Infof("downloadLatestLlamaCpp: %s, %s, %s, %s", desiredVersion, desiredVariant, vendoredServerStoragePath, llamaCppPath)
desiredTag := desiredVersion + "-" + desiredVariant
url := fmt.Sprintf("https://hub.docker.com/v2/namespaces/%s/repositories/%s/tags/%s", hubNamespace, hubRepo, desiredTag)
resp, err := httpClient.Get(url)

View File

@ -2,7 +2,6 @@ package llamacpp
import (
"context"
"errors"
"net/http"
"github.com/docker/model-runner/pkg/logging"
@ -11,5 +10,5 @@ import (
func (l *llamaCpp) ensureLatestLlamaCpp(ctx context.Context, log logging.Logger, httpClient *http.Client,
llamaCppPath, vendoredServerStoragePath string,
) error {
return errors.New("platform is not supported")
return errLlamaCppUpToDate
}

View File

@ -71,11 +71,9 @@ func (l *llamaCpp) UsesExternalModelManagement() bool {
func (l *llamaCpp) Install(ctx context.Context, httpClient *http.Client) error {
l.updatedLlamaCpp = false
// We don't currently support this backend on Windows or Linux. We'll likely
// We don't currently support this backend on Windows. We'll likely
// never support it on Intel Macs.
if runtime.GOOS == "linux" {
return errors.New("not implemented")
} else if (runtime.GOOS == "darwin" && runtime.GOARCH == "amd64") ||
if (runtime.GOOS == "darwin" && runtime.GOARCH == "amd64") ||
(runtime.GOOS == "windows" && !(runtime.GOARCH == "amd64" || runtime.GOARCH == "arm64")) {
return errors.New("platform not supported")
}