Dockerize (#22)
* Adds Makefile for local development * Fix chat completions example request * Added delete example * Dockerize model-runner * WIP Run container with host access to socket * Dockerize model-runner * WIP Run container with host access to socket * Debugging * Run in Docker container with TCP port access * mounted model storage * - Remove duplication in .gitignore - Do not use alpine in builder image - NVIDIA seems to use Ubuntu in all of their CDI docs and produces Ubuntu tags for nvidia/cuda but not Debian. So use Ubuntu for our final image For more details: https://github.com/docker/model-runner/pull/22 * - Add MODELS_PATH environment variable to configure model storage location - Default to $HOME/.docker/models when MODELS_PATH is not set - Update Docker container to use /models as the default storage path - Update Makefile to pass MODELS_PATH to container - Update Dockerfile to create and set permissions for /models directory This change allows users to: - Override the model storage location via MODELS_PATH - Maintain backward compatibility with default $HOME/.docker/models path - Use a more idiomatic folder for /models * Removes unneeded logs
This commit is contained in:
parent
4239791795
commit
dbbb7afe9f
|
|
@ -0,0 +1,76 @@
|
|||
# Version control
|
||||
.git/
|
||||
.gitignore
|
||||
|
||||
# IDE and editor files
|
||||
.idea/
|
||||
.vscode/
|
||||
*.swp
|
||||
*.swo
|
||||
|
||||
# Go build and cache artifacts
|
||||
/vendor/
|
||||
*.test
|
||||
*.out
|
||||
*.exe
|
||||
*.dll
|
||||
*.so
|
||||
*.dylib
|
||||
*.a
|
||||
*.o
|
||||
*.obj
|
||||
*.cgo*
|
||||
*.coverprofile
|
||||
*.prof
|
||||
*.tmp
|
||||
*.log
|
||||
|
||||
# Go tool cache
|
||||
/go-build/
|
||||
/go-cache/
|
||||
|
||||
# Test and development artifacts
|
||||
test/
|
||||
tests/
|
||||
*_test.go
|
||||
|
||||
# Build outputs
|
||||
dist/
|
||||
build/
|
||||
out/
|
||||
debug/
|
||||
|
||||
# Temporary and local files
|
||||
tmp/
|
||||
temp/
|
||||
.local/
|
||||
local/
|
||||
|
||||
# Environment and secrets
|
||||
.env*
|
||||
*.env
|
||||
*.pem
|
||||
*.key
|
||||
*.crt
|
||||
config.local.*
|
||||
*.local.yml
|
||||
|
||||
# Documentation and markdown
|
||||
docs/
|
||||
*.md
|
||||
README*
|
||||
LICENSE
|
||||
|
||||
# Docker files
|
||||
Dockerfile*
|
||||
|
||||
# Miscellaneous
|
||||
*.bak
|
||||
*.old
|
||||
*.orig
|
||||
*.rej
|
||||
*.DS_Store
|
||||
.Spotlight-V100
|
||||
.Trashes
|
||||
|
||||
# Exclude nothing else; keep source code and necessary files for build
|
||||
|
|
@ -0,0 +1,63 @@
|
|||
# syntax=docker/dockerfile:1
|
||||
|
||||
ARG GO_VERSION=1.24.2
|
||||
ARG LLAMA_SERVER_VERSION=latest
|
||||
ARG LLAMA_BINARY_PATH=/com.docker.llama-server.native.linux.cpu.amd64
|
||||
|
||||
FROM golang:${GO_VERSION}-bookworm AS builder
|
||||
|
||||
# Install git for go mod download if needed
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends git && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Copy go mod/sum first for better caching
|
||||
COPY --link go.mod go.sum ./
|
||||
|
||||
# Download dependencies (with cache mounts)
|
||||
RUN --mount=type=cache,target=/go/pkg/mod \
|
||||
--mount=type=cache,target=/root/.cache/go-build \
|
||||
go mod download
|
||||
|
||||
# Copy the rest of the source code
|
||||
COPY --link . .
|
||||
|
||||
# Build the Go binary (static build)
|
||||
RUN --mount=type=cache,target=/go/pkg/mod \
|
||||
--mount=type=cache,target=/root/.cache/go-build \
|
||||
CGO_ENABLED=0 GOOS=linux go build -ldflags="-s -w" -o model-runner ./main.go
|
||||
|
||||
# --- Get llama.cpp binary ---
|
||||
FROM docker/docker-model-backend-llamacpp:${LLAMA_SERVER_VERSION} AS llama-server
|
||||
|
||||
# --- Final image ---
|
||||
FROM ubuntu:24.04 AS final
|
||||
|
||||
# Create non-root user
|
||||
RUN groupadd --system modelrunner && useradd --system --gid modelrunner modelrunner
|
||||
|
||||
# Install ca-certificates for HTTPS
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends ca-certificates && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Create directories for the socket file and llama.cpp binary, and set proper permissions
|
||||
RUN mkdir -p /var/run/model-runner /app/bin /models && \
|
||||
chown -R modelrunner:modelrunner /var/run/model-runner /app/bin /models && \
|
||||
chmod -R 755 /models
|
||||
|
||||
# Copy the built binary from builder
|
||||
COPY --from=builder /app/model-runner /app/model-runner
|
||||
|
||||
# Copy the llama.cpp binary from the llama-server stage
|
||||
ARG LLAMA_BINARY_PATH
|
||||
COPY --from=llama-server ${LLAMA_BINARY_PATH} /app/bin/com.docker.llama-server
|
||||
|
||||
USER modelrunner
|
||||
|
||||
# Set the environment variable for the socket path and LLaMA server binary path
|
||||
ENV MODEL_RUNNER_SOCK=/var/run/model-runner/model-runner.sock
|
||||
ENV LLAMA_SERVER_PATH=/app/bin
|
||||
ENV HOME=/home/modelrunner
|
||||
|
||||
ENTRYPOINT ["/app/model-runner"]
|
||||
49
Makefile
49
Makefile
|
|
@ -1,9 +1,17 @@
|
|||
# Project variables
|
||||
APP_NAME := model-runner
|
||||
GO_VERSION := 1.23.7
|
||||
LLAMA_SERVER_VERSION := v0.0.4-rc2-cpu
|
||||
TARGET_OS := linux
|
||||
TARGET_ARCH := amd64
|
||||
ACCEL := cpu
|
||||
DOCKER_IMAGE := go-model-runner:latest
|
||||
LLAMA_BINARY := /com.docker.llama-server.native.$(TARGET_OS).$(ACCEL).$(TARGET_ARCH)
|
||||
PORT := 8080
|
||||
MODELS_PATH := $(shell pwd)/models
|
||||
|
||||
# Main targets
|
||||
.PHONY: build run clean help
|
||||
.PHONY: build run clean test docker-build docker-run help
|
||||
|
||||
# Default target
|
||||
.DEFAULT_GOAL := help
|
||||
|
|
@ -20,11 +28,42 @@ run: build
|
|||
clean:
|
||||
rm -f $(APP_NAME)
|
||||
rm -f model-runner.sock
|
||||
rm -rf $(MODELS_PATH)
|
||||
|
||||
# Run tests
|
||||
test:
|
||||
go test -v ./...
|
||||
|
||||
# Build Docker image
|
||||
docker-build:
|
||||
docker build --platform linux/amd64 \
|
||||
--build-arg LLAMA_SERVER_VERSION=$(LLAMA_SERVER_VERSION) \
|
||||
--build-arg LLAMA_BINARY_PATH=$(LLAMA_BINARY) \
|
||||
-t $(DOCKER_IMAGE) .
|
||||
|
||||
# Run in Docker container with TCP port access and mounted model storage
|
||||
docker-run: docker-build
|
||||
@echo ""
|
||||
@echo "Starting service on port $(PORT) with model storage at $(MODELS_PATH)..."
|
||||
@echo "Service will be available at: http://localhost:$(PORT)"
|
||||
@echo "Example usage: curl http://localhost:$(PORT)/models"
|
||||
@echo ""
|
||||
mkdir -p $(MODELS_PATH)
|
||||
docker run --rm \
|
||||
-p $(PORT):$(PORT) \
|
||||
-v "$(MODELS_PATH):/models" \
|
||||
-e MODEL_RUNNER_PORT=$(PORT) \
|
||||
-e LLAMA_SERVER_PATH=/app/bin \
|
||||
-e MODELS_PATH=/models \
|
||||
$(DOCKER_IMAGE)
|
||||
|
||||
# Show help
|
||||
help:
|
||||
@echo "Available targets:"
|
||||
@echo " build - Build the Go application"
|
||||
@echo " run - Run the application locally"
|
||||
@echo " clean - Clean build artifacts"
|
||||
@echo " help - Show this help message"
|
||||
@echo " build - Build the Go application"
|
||||
@echo " run - Run the application locally"
|
||||
@echo " clean - Clean build artifacts"
|
||||
@echo " test - Run tests"
|
||||
@echo " docker-build - Build Docker image"
|
||||
@echo " docker-run - Run in Docker container with TCP port access and mounted model storage"
|
||||
@echo " help - Show this help message"
|
||||
|
|
|
|||
92
README.md
92
README.md
|
|
@ -23,61 +23,80 @@ The Makefile provides the following targets:
|
|||
- `build` - Build the Go application
|
||||
- `run` - Run the application locally
|
||||
- `clean` - Clean build artifacts
|
||||
- `test` - Run tests
|
||||
- `docker-build` - Build the Docker image
|
||||
- `docker-run` - Run the application in a Docker container with TCP port access and mounted model storage
|
||||
- `help` - Show available targets
|
||||
|
||||
### Examples
|
||||
### Running in Docker
|
||||
|
||||
The application can be run in Docker with the following features enabled by default:
|
||||
- TCP port access (default port 8080)
|
||||
- Persistent model storage in a local `models` directory
|
||||
|
||||
```sh
|
||||
# Build the application
|
||||
make build
|
||||
# Run with default settings
|
||||
make docker-run
|
||||
|
||||
# Run the application locally
|
||||
make run
|
||||
|
||||
# Show all available targets
|
||||
make help
|
||||
# Customize port and model storage location
|
||||
make docker-run PORT=3000 MODELS_PATH=/path/to/your/models
|
||||
```
|
||||
|
||||
This will:
|
||||
- Create a `models` directory in your current working directory (or use the specified path)
|
||||
- Mount this directory into the container
|
||||
- Start the service on port 8080 (or the specified port)
|
||||
- All models downloaded will be stored in the host's `models` directory and will persist between container runs
|
||||
|
||||
### llama.cpp integration
|
||||
|
||||
The Docker image includes the llama.cpp server binary from the `docker/docker-model-backend-llamacpp` image. You can specify the version of the image to use by setting the `LLAMA_SERVER_VERSION` variable. Additionally, you can configure the target OS, architecture, and acceleration type:
|
||||
|
||||
```sh
|
||||
# Build with a specific llama.cpp server version
|
||||
LLAMA_SERVER_VERSION=v0.0.4-rc2-cpu make docker-build
|
||||
|
||||
# Specify all parameters
|
||||
LLAMA_SERVER_VERSION=v0.0.4-rc2-cpu TARGET_OS=linux TARGET_ARCH=amd64 ACCEL=cpu make docker-build
|
||||
```
|
||||
|
||||
Default values:
|
||||
- `LLAMA_SERVER_VERSION`: v0.0.4-rc2-cpu
|
||||
- `TARGETOS`: linux
|
||||
- `TARGETARCH`: amd64
|
||||
- `ACCEL`: cpu
|
||||
|
||||
The binary path in the image follows this pattern: `/com.docker.llama-server.native.${TARGETOS}.${ACCEL}.${TARGETARCH}`
|
||||
|
||||
## API Examples
|
||||
|
||||
The Model Runner exposes a REST API over a Unix socket. You can interact with it using curl commands with the `--unix-socket` option.
|
||||
The Model Runner exposes a REST API that can be accessed via TCP port. You can interact with it using curl commands.
|
||||
|
||||
### Listing Models
|
||||
### Using the API
|
||||
|
||||
To list all available models:
|
||||
When running with `docker-run`, you can use regular HTTP requests:
|
||||
|
||||
```sh
|
||||
curl --unix-socket model-runner.sock localhost/models
|
||||
```
|
||||
# List all available models
|
||||
curl http://localhost:8080/models
|
||||
|
||||
### Creating a Model
|
||||
# Create a new model
|
||||
curl http://localhost:8080/models/create -X POST -d '{"from": "ai/smollm2"}'
|
||||
|
||||
To create a new model:
|
||||
# Get information about a specific model
|
||||
curl http://localhost:8080/models/ai/smollm2
|
||||
|
||||
```sh
|
||||
curl --unix-socket model-runner.sock localhost/models/create -X POST -d '{"from": "ai/smollm2"}'
|
||||
```
|
||||
|
||||
### Getting Model Information
|
||||
|
||||
To get information about a specific model:
|
||||
|
||||
```sh
|
||||
curl --unix-socket model-runner.sock localhost/models/ai/smollm2
|
||||
```
|
||||
|
||||
### Chatting with a Model
|
||||
|
||||
To chat with a model, you can send a POST request to the model's chat endpoint:
|
||||
|
||||
```sh
|
||||
curl --unix-socket model-runner.sock localhost/engines/llama.cpp/v1/chat/completions -X POST -d '{
|
||||
# Chat with a model
|
||||
curl http://localhost:8080/engines/llama.cpp/v1/chat/completions -X POST -d '{
|
||||
"model": "ai/smollm2",
|
||||
"messages": [
|
||||
{"role": "system", "content": "You are a helpful assistant."},
|
||||
{"role": "user", "content": "Hello, how are you?"}
|
||||
]
|
||||
}'
|
||||
|
||||
# Delete a model
|
||||
curl http://localhost:8080/models/ai/smollm2 -X DELETE
|
||||
```
|
||||
|
||||
The response will contain the model's reply:
|
||||
|
|
@ -105,10 +124,3 @@ The response will contain the model's reply:
|
|||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Deleting a model
|
||||
To delete a model from the server, send a DELETE request to the model's endpoint:
|
||||
|
||||
```sh
|
||||
curl --unix-socket model-runner.sock localhost/models/ai/smollm2 -X DELETE
|
||||
```
|
||||
|
|
|
|||
2
go.mod
2
go.mod
|
|
@ -5,7 +5,7 @@ go 1.23.7
|
|||
require (
|
||||
github.com/containerd/containerd/v2 v2.0.4
|
||||
github.com/containerd/platforms v1.0.0-rc.1
|
||||
github.com/docker/model-distribution v0.0.0-20250418222450-8f6c05a5ffa4
|
||||
github.com/docker/model-distribution v0.0.0-20250423075433-587f475e591d
|
||||
github.com/jaypipes/ghw v0.16.0
|
||||
github.com/opencontainers/go-digest v1.0.0
|
||||
github.com/opencontainers/image-spec v1.1.1
|
||||
|
|
|
|||
4
go.sum
4
go.sum
|
|
@ -39,6 +39,8 @@ github.com/docker/docker-credential-helpers v0.8.2 h1:bX3YxiGzFP5sOXWc3bTPEXdEaZ
|
|||
github.com/docker/docker-credential-helpers v0.8.2/go.mod h1:P3ci7E3lwkZg6XiHdRKft1KckHiO9a2rNtyFbZ/ry9M=
|
||||
github.com/docker/model-distribution v0.0.0-20250418222450-8f6c05a5ffa4 h1:v7DyBUd08t4XQe4NC2Rb8ZSWOiWVXD0TbdgIOQZ/IQo=
|
||||
github.com/docker/model-distribution v0.0.0-20250418222450-8f6c05a5ffa4/go.mod h1:fd9H/2KAY0+ByxbohWfCkxp0rxf0pqSlHEtFTjTXbLk=
|
||||
github.com/docker/model-distribution v0.0.0-20250423075433-587f475e591d h1:lyxdxjNHTSyQ2w1rjbuu5pbgX42AD3kmxWLNv3mdqQ4=
|
||||
github.com/docker/model-distribution v0.0.0-20250423075433-587f475e591d/go.mod h1:dThpO9JoG5Px3i+rTluAeZcqLGw8C0qepuEL4gL2o/c=
|
||||
github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
|
||||
github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
|
||||
github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
|
||||
|
|
@ -59,6 +61,8 @@ github.com/google/go-containerregistry v0.20.3 h1:oNx7IdTI936V8CQRveCjaxOiegWwvM
|
|||
github.com/google/go-containerregistry v0.20.3/go.mod h1:w00pIgBRDVUDFM6bq+Qx8lwNWK+cxgCuX1vd3PIBDNI=
|
||||
github.com/gpustack/gguf-parser-go v0.14.0 h1:9kMdbJ9pHhvEeATx163WE9q+O74bi05KPJLEtVBUjw0=
|
||||
github.com/gpustack/gguf-parser-go v0.14.0/go.mod h1:GvHh1Kvvq5ojCOsJ5UpwiJJmIjFw3Qk5cW7R+CZ3IJo=
|
||||
github.com/gpustack/gguf-parser-go v0.14.1 h1:tmz2eTnSEFfE52V10FESqo9oAUquZ6JKQFntWC/wrEg=
|
||||
github.com/gpustack/gguf-parser-go v0.14.1/go.mod h1:GvHh1Kvvq5ojCOsJ5UpwiJJmIjFw3Qk5cW7R+CZ3IJo=
|
||||
github.com/henvic/httpretty v0.1.4 h1:Jo7uwIRWVFxkqOnErcoYfH90o3ddQyVrSANeS4cxYmU=
|
||||
github.com/henvic/httpretty v0.1.4/go.mod h1:Dn60sQTZfbt2dYsdUSNsCljyF4AfdqnuJFDLJA1I4AM=
|
||||
github.com/jaypipes/ghw v0.16.0 h1:3HurCTS38VNpeQLo5fIdZsySuo/qAfpPSJ5t05QBFPM=
|
||||
|
|
|
|||
56
main.go
56
main.go
|
|
@ -32,16 +32,28 @@ func main() {
|
|||
log.Fatalf("Failed to get user home directory: %v", err)
|
||||
}
|
||||
|
||||
modelPath := os.Getenv("MODELS_PATH")
|
||||
if modelPath == "" {
|
||||
modelPath = filepath.Join(userHomeDir, ".docker", "models")
|
||||
}
|
||||
|
||||
modelManager := models.NewManager(log, models.ClientConfig{
|
||||
StoreRootPath: filepath.Join(userHomeDir, ".docker", "models"),
|
||||
StoreRootPath: modelPath,
|
||||
Logger: log.WithFields(logrus.Fields{"component": "model-manager"}),
|
||||
})
|
||||
|
||||
llamaServerPath := os.Getenv("LLAMA_SERVER_PATH")
|
||||
if llamaServerPath == "" {
|
||||
llamaServerPath = "/Applications/Docker.app/Contents/Resources/bin"
|
||||
}
|
||||
|
||||
log.Infof("LLAMA_SERVER_PATH: %s", llamaServerPath)
|
||||
|
||||
llamaCppBackend, err := llamacpp.New(
|
||||
log,
|
||||
modelManager,
|
||||
log.WithFields(logrus.Fields{"component": "llama.cpp"}),
|
||||
"/Applications/Docker.app/Contents/Resources/bin",
|
||||
llamaServerPath,
|
||||
func() string { wd, _ := os.Getwd(); return wd }(),
|
||||
)
|
||||
if err != nil {
|
||||
|
|
@ -64,22 +76,34 @@ func main() {
|
|||
router.Handle(route, scheduler)
|
||||
}
|
||||
|
||||
if err := os.Remove(sockName); err != nil {
|
||||
if !os.IsNotExist(err) {
|
||||
log.Fatalf("Failed to remove existing socket: %v", err)
|
||||
}
|
||||
}
|
||||
ln, err := net.ListenUnix("unix", &net.UnixAddr{Name: sockName, Net: "unix"})
|
||||
if err != nil {
|
||||
log.Fatalf("Failed to listen on socket: %v", err)
|
||||
}
|
||||
|
||||
server := &http.Server{Handler: router}
|
||||
serverErrors := make(chan error, 1)
|
||||
go func() {
|
||||
serverErrors <- server.Serve(ln)
|
||||
}()
|
||||
defer server.Close()
|
||||
|
||||
// Check if we should use TCP port instead of Unix socket
|
||||
tcpPort := os.Getenv("MODEL_RUNNER_PORT")
|
||||
if tcpPort != "" {
|
||||
// Use TCP port
|
||||
addr := ":" + tcpPort
|
||||
log.Infof("Listening on TCP port %s", tcpPort)
|
||||
server.Addr = addr
|
||||
go func() {
|
||||
serverErrors <- server.ListenAndServe()
|
||||
}()
|
||||
} else {
|
||||
// Use Unix socket
|
||||
if err := os.Remove(sockName); err != nil {
|
||||
if !os.IsNotExist(err) {
|
||||
log.Fatalf("Failed to remove existing socket: %v", err)
|
||||
}
|
||||
}
|
||||
ln, err := net.ListenUnix("unix", &net.UnixAddr{Name: sockName, Net: "unix"})
|
||||
if err != nil {
|
||||
log.Fatalf("Failed to listen on socket: %v", err)
|
||||
}
|
||||
go func() {
|
||||
serverErrors <- server.Serve(ln)
|
||||
}()
|
||||
}
|
||||
|
||||
schedulerErrors := make(chan error, 1)
|
||||
go func() {
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@ var (
|
|||
func (l *llamaCpp) downloadLatestLlamaCpp(ctx context.Context, log logging.Logger, httpClient *http.Client,
|
||||
llamaCppPath, vendoredServerStoragePath, desiredVersion, desiredVariant string,
|
||||
) error {
|
||||
log.Infof("downloadLatestLlamaCpp: %s, %s, %s, %s", desiredVersion, desiredVariant, vendoredServerStoragePath, llamaCppPath)
|
||||
desiredTag := desiredVersion + "-" + desiredVariant
|
||||
url := fmt.Sprintf("https://hub.docker.com/v2/namespaces/%s/repositories/%s/tags/%s", hubNamespace, hubRepo, desiredTag)
|
||||
resp, err := httpClient.Get(url)
|
||||
|
|
|
|||
|
|
@ -2,7 +2,6 @@ package llamacpp
|
|||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"net/http"
|
||||
|
||||
"github.com/docker/model-runner/pkg/logging"
|
||||
|
|
@ -11,5 +10,5 @@ import (
|
|||
func (l *llamaCpp) ensureLatestLlamaCpp(ctx context.Context, log logging.Logger, httpClient *http.Client,
|
||||
llamaCppPath, vendoredServerStoragePath string,
|
||||
) error {
|
||||
return errors.New("platform is not supported")
|
||||
return errLlamaCppUpToDate
|
||||
}
|
||||
|
|
|
|||
|
|
@ -71,11 +71,9 @@ func (l *llamaCpp) UsesExternalModelManagement() bool {
|
|||
func (l *llamaCpp) Install(ctx context.Context, httpClient *http.Client) error {
|
||||
l.updatedLlamaCpp = false
|
||||
|
||||
// We don't currently support this backend on Windows or Linux. We'll likely
|
||||
// We don't currently support this backend on Windows. We'll likely
|
||||
// never support it on Intel Macs.
|
||||
if runtime.GOOS == "linux" {
|
||||
return errors.New("not implemented")
|
||||
} else if (runtime.GOOS == "darwin" && runtime.GOARCH == "amd64") ||
|
||||
if (runtime.GOOS == "darwin" && runtime.GOARCH == "amd64") ||
|
||||
(runtime.GOOS == "windows" && !(runtime.GOARCH == "amd64" || runtime.GOARCH == "arm64")) {
|
||||
return errors.New("platform not supported")
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue