Merge pull request #123 from docker/nicks/chart
charts: add Kubernetes examples
This commit is contained in:
commit
6b1cfee5a3
|
|
@ -144,3 +144,11 @@ curl http://localhost:8080/metrics
|
|||
- **Monitoring integration**: Add the endpoint to your Prometheus configuration
|
||||
|
||||
Check [METRICS.md](./METRICS.md) for more details.
|
||||
|
||||
## Kubernetes
|
||||
|
||||
Experimental support for running in Kubernetes is available
|
||||
in the form of [a Helm chart and static YAML](charts/docker-model-runner/README).
|
||||
|
||||
If you are interested in a specific Kubernetes use-case, please start a
|
||||
discussion on the issue tracker.
|
||||
|
|
|
|||
|
|
@ -0,0 +1,18 @@
|
|||
# Contributing
|
||||
|
||||
|
||||
## Using the Makefile
|
||||
|
||||
```bash
|
||||
# Render to plain Kubernetes YAML
|
||||
make render
|
||||
|
||||
# Install the chart
|
||||
make install
|
||||
|
||||
# Upgrade the chart
|
||||
make upgrade
|
||||
|
||||
# Uninstall the chart
|
||||
make uninstall
|
||||
```
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
apiVersion: v2
|
||||
name: docker-model-runner
|
||||
description: A Helm chart for Docker Model Runner
|
||||
type: application
|
||||
version: 0.1.0
|
||||
appVersion: "latest"
|
||||
|
|
@ -0,0 +1,54 @@
|
|||
.PHONY: render clean install upgrade uninstall template lint package help
|
||||
|
||||
CHART_NAME := docker-model-runner
|
||||
RELEASE_NAME := docker-model-runner
|
||||
NAMESPACE := default
|
||||
|
||||
render:
|
||||
@echo "Rendering Helm chart to plain Kubernetes YAML..."
|
||||
mkdir -p static
|
||||
helm template $(RELEASE_NAME) . --namespace $(NAMESPACE) > static/docker-model-runner.yaml
|
||||
helm template $(RELEASE_NAME) . --namespace $(NAMESPACE) --set "nodePort.enabled=true" > static/docker-model-runner-desktop.yaml
|
||||
helm template $(RELEASE_NAME) . --namespace $(NAMESPACE) --set "modelInit.enabled=true" --set "modelInit.models[0]=ai/smollm2:latest" > static/docker-model-runner-smollm2.yaml
|
||||
helm template $(RELEASE_NAME) . --namespace $(NAMESPACE) --set "storage.storageClass=gp2" > static/docker-model-runner-eks.yaml
|
||||
@echo "Rendered YAML saved to static"
|
||||
|
||||
clean:
|
||||
@echo "Cleaning up rendered files..."
|
||||
rm -fR static
|
||||
|
||||
install:
|
||||
@echo "Installing Helm chart..."
|
||||
helm install $(RELEASE_NAME) . --namespace $(NAMESPACE) --create-namespace
|
||||
|
||||
upgrade:
|
||||
@echo "Upgrading Helm chart..."
|
||||
helm upgrade $(RELEASE_NAME) . --namespace $(NAMESPACE)
|
||||
|
||||
uninstall:
|
||||
@echo "Uninstalling Helm chart..."
|
||||
helm uninstall $(RELEASE_NAME) --namespace $(NAMESPACE)
|
||||
|
||||
template:
|
||||
@echo "Templating Helm chart..."
|
||||
helm template $(RELEASE_NAME) . --namespace $(NAMESPACE)
|
||||
|
||||
lint:
|
||||
@echo "Linting Helm chart..."
|
||||
helm lint .
|
||||
|
||||
package:
|
||||
@echo "Packaging Helm chart..."
|
||||
helm package .
|
||||
|
||||
help:
|
||||
@echo "Available targets:"
|
||||
@echo " render - Render Helm chart to plain Kubernetes YAML (saves to rendered.yaml)"
|
||||
@echo " template - Template Helm chart (output to stdout)"
|
||||
@echo " lint - Lint Helm chart"
|
||||
@echo " package - Package Helm chart"
|
||||
@echo " install - Install Helm chart"
|
||||
@echo " upgrade - Upgrade Helm chart"
|
||||
@echo " uninstall - Uninstall Helm chart"
|
||||
@echo " clean - Clean up rendered files"
|
||||
@echo " help - Show this help message"
|
||||
|
|
@ -0,0 +1,175 @@
|
|||
# Docker Model Runner Kubernetes Support
|
||||
|
||||
Manifests for deploying Docker Model Runner on Kubernetes with ephemeral storage, GPU support, and model pre-pulling capabilities.
|
||||
|
||||
## Quickstart
|
||||
|
||||
### On Docker Desktop
|
||||
|
||||
```
|
||||
kubectl apply -f static/docker-model-runner-desktop.yaml
|
||||
kubectl wait --for=condition=Available deployment/docker-model-runner --timeout=5m
|
||||
MODEL_RUNNER_HOST=http://localhost:31245 docker model run ai/smollm2:latest
|
||||
```
|
||||
|
||||
### On any Kubernetes Cluster
|
||||
|
||||
```
|
||||
kubectl apply -f static/docker-model-runner.yaml
|
||||
kubectl wait --for=condition=Available deployment/docker-model-runner --timeout=5m
|
||||
kubectl port-forward deployment/docker-model-runner 31245:12434
|
||||
```
|
||||
|
||||
Then:
|
||||
|
||||
```
|
||||
MODEL_RUNNER_HOST=http://localhost:31245 docker model run ai/smollm2:latest
|
||||
```
|
||||
|
||||
## Helm Configuration
|
||||
|
||||
### Basic Configuration
|
||||
|
||||
Key configuration options in `values.yaml`:
|
||||
|
||||
```yaml
|
||||
# Storage configuration
|
||||
storage:
|
||||
size: 100Gi
|
||||
storageClass: "" # Set this to the storage class of your cloud provider.
|
||||
|
||||
# Model pre-pull configuration
|
||||
modelInit:
|
||||
enabled: false
|
||||
models:
|
||||
- "ai/smollm2:latest"
|
||||
|
||||
# GPU configuration
|
||||
gpu:
|
||||
enabled: false
|
||||
vendor: nvidia # or amd
|
||||
count: 1
|
||||
|
||||
# NodePort configuration
|
||||
nodePort:
|
||||
enabled: false
|
||||
port: 31245
|
||||
```
|
||||
|
||||
### GPU Scheduling
|
||||
|
||||
To enable GPU scheduling:
|
||||
|
||||
```yaml
|
||||
gpu:
|
||||
enabled: true
|
||||
vendor: nvidia # or amd
|
||||
count: 1
|
||||
```
|
||||
|
||||
This will add the appropriate resource requests/limits:
|
||||
- NVIDIA: `nvidia.com/gpu`
|
||||
- AMD: `amd.com/gpu`
|
||||
|
||||
### Model Pre-pulling
|
||||
|
||||
Configure models to pre-pull during pod initialization:
|
||||
|
||||
```yaml
|
||||
modelInit:
|
||||
enabled: true
|
||||
models:
|
||||
- "ai/smollm2:latest"
|
||||
- "ai/llama3.2:latest"
|
||||
- "ai/mistral:latest"
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
### Testing the Installation
|
||||
|
||||
Once installed, set up a port-forward to access the service:
|
||||
|
||||
```bash
|
||||
kubectl port-forward service/docker-model-runner-nodeport 31245:80
|
||||
```
|
||||
|
||||
Then test the model runner:
|
||||
|
||||
```bash
|
||||
MODEL_RUNNER_HOST=http://localhost:31245 docker model run ai/smollm2:latest
|
||||
```
|
||||
|
||||
### Using with Open WebUI
|
||||
|
||||
To use Docker Model Runner with Open WebUI, install the Open WebUI Helm chart:
|
||||
|
||||
```bash
|
||||
# Add the Open WebUI Helm repository
|
||||
helm repo add open-webui https://helm.openwebui.com/
|
||||
helm repo update
|
||||
|
||||
# Install Open WebUI with auth diabled
|
||||
# See the open-webui Helm chart for
|
||||
# connecting to your auth provider.
|
||||
helm upgrade --install --wait open-webui open-webui/open-webui \
|
||||
--set ollama.enabled=false \
|
||||
--set pipelines.enabled=false \
|
||||
--set extraEnvVars[0].name="WEBUI_AUTH" \
|
||||
--set-string extraEnvVars[0].value=false \
|
||||
--set openaiBaseApiUrl="http://docker-model-runner/engines/v1"
|
||||
```
|
||||
|
||||
Access Open WebUI:
|
||||
|
||||
```bash
|
||||
kubectl port-forward service/open-webui 8080:80
|
||||
```
|
||||
|
||||
Then visit http://localhost:8080 in your browser.
|
||||
|
||||
## Values Reference
|
||||
|
||||
| Parameter | Description | Default |
|
||||
|-----------|-------------|---------|
|
||||
| `replicaCount` | Number of replicas | `1` |
|
||||
| `image.repository` | Docker Model Runner image repository | `docker/model-runner` |
|
||||
| `image.tag` | Docker Model Runner image tag | `latest` |
|
||||
| `image.pullPolicy` | Image pull policy | `IfNotPresent` |
|
||||
| `storage.size` | Ephemeral volume size | `100Gi` |
|
||||
| `storage.storageClass` | Storage class for ephemeral volume | `""` |
|
||||
| `modelInit.enabled` | Enable model pre-pulling | `false` |
|
||||
| `modelInit.models` | List of models to pre-pull | `["ai/smollm2:latest"]` |
|
||||
| `gpu.enabled` | Enable GPU support | `false` |
|
||||
| `gpu.vendor` | GPU vendor (nvidia or amd) | `nvidia` |
|
||||
| `gpu.count` | Number of GPUs to request | `1` |
|
||||
| `nodePort.enabled` | Enable NodePort service | `false` |
|
||||
| `nodePort.port` | NodePort port number | `31245` |
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Pod Fails to Start
|
||||
|
||||
Check the pod logs:
|
||||
|
||||
```bash
|
||||
kubectl logs -f deployment/docker-model-runner
|
||||
```
|
||||
|
||||
### Model Pre-pull Issues
|
||||
|
||||
Check the init container logs:
|
||||
|
||||
```bash
|
||||
kubectl logs -f deployment/docker-model-runner -c model-init
|
||||
```
|
||||
|
||||
### GPU Not Available
|
||||
|
||||
Your cluster must use [a GPU scheduling plugin](https://kubernetes.io/docs/tasks/manage-gpus/scheduling-gpus/).
|
||||
|
||||
Ensure your cluster has GPU support and the appropriate device plugin installed:
|
||||
|
||||
- For NVIDIA GPUs: Install the [NVIDIA device plugin](https://github.com/NVIDIA/k8s-device-plugin)
|
||||
- For AMD GPUs: Install the [AMD device plugin](https://github.com/ROCm/k8s-device-plugin#deployment)
|
||||
|
||||
|
|
@ -0,0 +1,114 @@
|
|||
---
|
||||
# Source: docker-model-runner/templates/service.yaml
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: docker-model-runner
|
||||
labels:
|
||||
helm.sh/chart: docker-model-runner-0.1.0
|
||||
app.kubernetes.io/name: docker-model-runner
|
||||
app.kubernetes.io/instance: docker-model-runner
|
||||
app.kubernetes.io/version: "latest"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 80
|
||||
targetPort: 12434
|
||||
protocol: TCP
|
||||
name: http
|
||||
selector:
|
||||
app.kubernetes.io/name: docker-model-runner
|
||||
app.kubernetes.io/instance: docker-model-runner
|
||||
---
|
||||
# Source: docker-model-runner/templates/service.yaml
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: docker-model-runner-nodeport
|
||||
labels:
|
||||
helm.sh/chart: docker-model-runner-0.1.0
|
||||
app.kubernetes.io/name: docker-model-runner
|
||||
app.kubernetes.io/instance: docker-model-runner
|
||||
app.kubernetes.io/version: "latest"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: NodePort
|
||||
ports:
|
||||
- port: 80
|
||||
targetPort: 12434
|
||||
nodePort: 31245
|
||||
protocol: TCP
|
||||
name: http
|
||||
selector:
|
||||
app.kubernetes.io/name: docker-model-runner
|
||||
app.kubernetes.io/instance: docker-model-runner
|
||||
---
|
||||
# Source: docker-model-runner/templates/deployment.yaml
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: docker-model-runner
|
||||
labels:
|
||||
helm.sh/chart: docker-model-runner-0.1.0
|
||||
app.kubernetes.io/name: docker-model-runner
|
||||
app.kubernetes.io/instance: docker-model-runner
|
||||
app.kubernetes.io/version: "latest"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: docker-model-runner
|
||||
app.kubernetes.io/instance: docker-model-runner
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: docker-model-runner
|
||||
app.kubernetes.io/instance: docker-model-runner
|
||||
spec:
|
||||
initContainers:
|
||||
- name: changeowner
|
||||
image: busybox
|
||||
command: ["sh", "-c", "chmod a+rwx /models"]
|
||||
volumeMounts:
|
||||
- name: model-storage
|
||||
mountPath: /models
|
||||
|
||||
containers:
|
||||
- name: model-runner
|
||||
image: "docker/model-runner:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- containerPort: 12434
|
||||
env:
|
||||
- name: DMR_ORIGINS
|
||||
value: "http://localhost:31245,http://localhost:12434"
|
||||
volumeMounts:
|
||||
- name: model-storage
|
||||
mountPath: /models
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /engines/status
|
||||
port: 12434
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 10
|
||||
failureThreshold: 3
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /engines/status
|
||||
port: 12434
|
||||
initialDelaySeconds: 15
|
||||
periodSeconds: 20
|
||||
failureThreshold: 3
|
||||
volumes:
|
||||
- name: model-storage
|
||||
ephemeral:
|
||||
volumeClaimTemplate:
|
||||
spec:
|
||||
accessModes: [ "ReadWriteOnce" ]
|
||||
resources:
|
||||
requests:
|
||||
storage: 100Gi
|
||||
|
|
@ -0,0 +1,92 @@
|
|||
---
|
||||
# Source: docker-model-runner/templates/service.yaml
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: docker-model-runner
|
||||
labels:
|
||||
helm.sh/chart: docker-model-runner-0.1.0
|
||||
app.kubernetes.io/name: docker-model-runner
|
||||
app.kubernetes.io/instance: docker-model-runner
|
||||
app.kubernetes.io/version: "latest"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 80
|
||||
targetPort: 12434
|
||||
protocol: TCP
|
||||
name: http
|
||||
selector:
|
||||
app.kubernetes.io/name: docker-model-runner
|
||||
app.kubernetes.io/instance: docker-model-runner
|
||||
---
|
||||
# Source: docker-model-runner/templates/deployment.yaml
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: docker-model-runner
|
||||
labels:
|
||||
helm.sh/chart: docker-model-runner-0.1.0
|
||||
app.kubernetes.io/name: docker-model-runner
|
||||
app.kubernetes.io/instance: docker-model-runner
|
||||
app.kubernetes.io/version: "latest"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: docker-model-runner
|
||||
app.kubernetes.io/instance: docker-model-runner
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: docker-model-runner
|
||||
app.kubernetes.io/instance: docker-model-runner
|
||||
spec:
|
||||
initContainers:
|
||||
- name: changeowner
|
||||
image: busybox
|
||||
command: ["sh", "-c", "chmod a+rwx /models"]
|
||||
volumeMounts:
|
||||
- name: model-storage
|
||||
mountPath: /models
|
||||
|
||||
containers:
|
||||
- name: model-runner
|
||||
image: "docker/model-runner:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- containerPort: 12434
|
||||
env:
|
||||
- name: DMR_ORIGINS
|
||||
value: "http://localhost:31245,http://localhost:12434"
|
||||
volumeMounts:
|
||||
- name: model-storage
|
||||
mountPath: /models
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /engines/status
|
||||
port: 12434
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 10
|
||||
failureThreshold: 3
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /engines/status
|
||||
port: 12434
|
||||
initialDelaySeconds: 15
|
||||
periodSeconds: 20
|
||||
failureThreshold: 3
|
||||
volumes:
|
||||
- name: model-storage
|
||||
ephemeral:
|
||||
volumeClaimTemplate:
|
||||
spec:
|
||||
accessModes: [ "ReadWriteOnce" ]
|
||||
storageClassName: gp2
|
||||
resources:
|
||||
requests:
|
||||
storage: 100Gi
|
||||
|
|
@ -0,0 +1,130 @@
|
|||
---
|
||||
# Source: docker-model-runner/templates/configmap.yaml
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: docker-model-runner-init
|
||||
labels:
|
||||
helm.sh/chart: docker-model-runner-0.1.0
|
||||
app.kubernetes.io/name: docker-model-runner
|
||||
app.kubernetes.io/instance: docker-model-runner
|
||||
app.kubernetes.io/version: "latest"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
models: |
|
||||
ai/smollm2:latest
|
||||
---
|
||||
# Source: docker-model-runner/templates/service.yaml
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: docker-model-runner
|
||||
labels:
|
||||
helm.sh/chart: docker-model-runner-0.1.0
|
||||
app.kubernetes.io/name: docker-model-runner
|
||||
app.kubernetes.io/instance: docker-model-runner
|
||||
app.kubernetes.io/version: "latest"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 80
|
||||
targetPort: 12434
|
||||
protocol: TCP
|
||||
name: http
|
||||
selector:
|
||||
app.kubernetes.io/name: docker-model-runner
|
||||
app.kubernetes.io/instance: docker-model-runner
|
||||
---
|
||||
# Source: docker-model-runner/templates/deployment.yaml
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: docker-model-runner
|
||||
labels:
|
||||
helm.sh/chart: docker-model-runner-0.1.0
|
||||
app.kubernetes.io/name: docker-model-runner
|
||||
app.kubernetes.io/instance: docker-model-runner
|
||||
app.kubernetes.io/version: "latest"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: docker-model-runner
|
||||
app.kubernetes.io/instance: docker-model-runner
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: docker-model-runner
|
||||
app.kubernetes.io/instance: docker-model-runner
|
||||
spec:
|
||||
initContainers:
|
||||
- name: changeowner
|
||||
image: busybox
|
||||
command: ["sh", "-c", "chmod a+rwx /models"]
|
||||
volumeMounts:
|
||||
- name: model-storage
|
||||
mountPath: /models
|
||||
|
||||
containers:
|
||||
- name: model-runner
|
||||
image: "docker/model-runner:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- containerPort: 12434
|
||||
env:
|
||||
- name: DMR_ORIGINS
|
||||
value: "http://localhost:31245,http://localhost:12434"
|
||||
volumeMounts:
|
||||
- name: model-storage
|
||||
mountPath: /models
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /engines/status
|
||||
port: 12434
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 10
|
||||
failureThreshold: 3
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /engines/status
|
||||
port: 12434
|
||||
initialDelaySeconds: 15
|
||||
periodSeconds: 20
|
||||
failureThreshold: 3
|
||||
- name: model-init
|
||||
image: curlimages/curl:8.14.1
|
||||
command: ["/bin/sh", "-c"]
|
||||
args:
|
||||
- |
|
||||
set -ex
|
||||
MODEL_RUNNER=http://localhost:12434
|
||||
echo "Pre-pulling models..."
|
||||
while IFS= read -r model; do
|
||||
if [ -n "$model" ]; then
|
||||
echo "Pulling model: $model"
|
||||
curl -d "{\"from\": \"$model\"}" "$MODEL_RUNNER"/models/create
|
||||
fi
|
||||
done < /config/models
|
||||
echo "Model pre-pull complete"
|
||||
tail -f /dev/null
|
||||
volumeMounts:
|
||||
- name: model-storage
|
||||
mountPath: /models
|
||||
- name: init-config
|
||||
mountPath: /config
|
||||
volumes:
|
||||
- name: model-storage
|
||||
ephemeral:
|
||||
volumeClaimTemplate:
|
||||
spec:
|
||||
accessModes: [ "ReadWriteOnce" ]
|
||||
resources:
|
||||
requests:
|
||||
storage: 100Gi
|
||||
- name: init-config
|
||||
configMap:
|
||||
name: docker-model-runner-init
|
||||
|
|
@ -0,0 +1,91 @@
|
|||
---
|
||||
# Source: docker-model-runner/templates/service.yaml
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: docker-model-runner
|
||||
labels:
|
||||
helm.sh/chart: docker-model-runner-0.1.0
|
||||
app.kubernetes.io/name: docker-model-runner
|
||||
app.kubernetes.io/instance: docker-model-runner
|
||||
app.kubernetes.io/version: "latest"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 80
|
||||
targetPort: 12434
|
||||
protocol: TCP
|
||||
name: http
|
||||
selector:
|
||||
app.kubernetes.io/name: docker-model-runner
|
||||
app.kubernetes.io/instance: docker-model-runner
|
||||
---
|
||||
# Source: docker-model-runner/templates/deployment.yaml
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: docker-model-runner
|
||||
labels:
|
||||
helm.sh/chart: docker-model-runner-0.1.0
|
||||
app.kubernetes.io/name: docker-model-runner
|
||||
app.kubernetes.io/instance: docker-model-runner
|
||||
app.kubernetes.io/version: "latest"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: docker-model-runner
|
||||
app.kubernetes.io/instance: docker-model-runner
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: docker-model-runner
|
||||
app.kubernetes.io/instance: docker-model-runner
|
||||
spec:
|
||||
initContainers:
|
||||
- name: changeowner
|
||||
image: busybox
|
||||
command: ["sh", "-c", "chmod a+rwx /models"]
|
||||
volumeMounts:
|
||||
- name: model-storage
|
||||
mountPath: /models
|
||||
|
||||
containers:
|
||||
- name: model-runner
|
||||
image: "docker/model-runner:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- containerPort: 12434
|
||||
env:
|
||||
- name: DMR_ORIGINS
|
||||
value: "http://localhost:31245,http://localhost:12434"
|
||||
volumeMounts:
|
||||
- name: model-storage
|
||||
mountPath: /models
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /engines/status
|
||||
port: 12434
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 10
|
||||
failureThreshold: 3
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /engines/status
|
||||
port: 12434
|
||||
initialDelaySeconds: 15
|
||||
periodSeconds: 20
|
||||
failureThreshold: 3
|
||||
volumes:
|
||||
- name: model-storage
|
||||
ephemeral:
|
||||
volumeClaimTemplate:
|
||||
spec:
|
||||
accessModes: [ "ReadWriteOnce" ]
|
||||
resources:
|
||||
requests:
|
||||
storage: 100Gi
|
||||
|
|
@ -0,0 +1,64 @@
|
|||
{{/*
|
||||
Expand the name of the chart.
|
||||
*/}}
|
||||
{{- define "docker-model-runner.name" -}}
|
||||
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Create a default fully qualified app name.
|
||||
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
|
||||
If release name contains chart name it will be used as a full name.
|
||||
*/}}
|
||||
{{- define "docker-model-runner.fullname" -}}
|
||||
{{- if .Values.fullnameOverride }}
|
||||
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
|
||||
{{- else }}
|
||||
{{- $name := default .Chart.Name .Values.nameOverride }}
|
||||
{{- if contains $name .Release.Name }}
|
||||
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
|
||||
{{- else }}
|
||||
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Create chart name and version as used by the chart label.
|
||||
*/}}
|
||||
{{- define "docker-model-runner.chart" -}}
|
||||
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Common labels
|
||||
*/}}
|
||||
{{- define "docker-model-runner.labels" -}}
|
||||
helm.sh/chart: {{ include "docker-model-runner.chart" . }}
|
||||
{{ include "docker-model-runner.selectorLabels" . }}
|
||||
{{- if .Chart.AppVersion }}
|
||||
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
|
||||
{{- end }}
|
||||
app.kubernetes.io/managed-by: {{ .Release.Service }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Selector labels
|
||||
*/}}
|
||||
{{- define "docker-model-runner.selectorLabels" -}}
|
||||
app.kubernetes.io/name: {{ include "docker-model-runner.name" . }}
|
||||
app.kubernetes.io/instance: {{ .Release.Name }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
GPU resource limits
|
||||
*/}}
|
||||
{{- define "docker-model-runner.gpuResources" -}}
|
||||
{{- if .Values.gpu.enabled }}
|
||||
{{- if eq .Values.gpu.vendor "nvidia" }}
|
||||
nvidia.com/gpu: {{ .Values.gpu.count }}
|
||||
{{- else if eq .Values.gpu.vendor "amd" }}
|
||||
amd.com/gpu: {{ .Values.gpu.count }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
|
@ -0,0 +1,13 @@
|
|||
{{- if .Values.modelInit.enabled }}
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: {{ include "docker-model-runner.fullname" . }}-init
|
||||
labels:
|
||||
{{- include "docker-model-runner.labels" . | nindent 4 }}
|
||||
data:
|
||||
models: |
|
||||
{{- range .Values.modelInit.models }}
|
||||
{{ . }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
|
@ -0,0 +1,112 @@
|
|||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: {{ include "docker-model-runner.fullname" . }}
|
||||
labels:
|
||||
{{- include "docker-model-runner.labels" . | nindent 4 }}
|
||||
spec:
|
||||
replicas: {{ .Values.replicaCount }}
|
||||
selector:
|
||||
matchLabels:
|
||||
{{- include "docker-model-runner.selectorLabels" . | nindent 6 }}
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
{{- include "docker-model-runner.selectorLabels" . | nindent 8 }}
|
||||
spec:
|
||||
initContainers:
|
||||
- name: changeowner
|
||||
image: busybox
|
||||
command: ["sh", "-c", "chmod a+rwx /models"]
|
||||
volumeMounts:
|
||||
- name: model-storage
|
||||
mountPath: /models
|
||||
|
||||
containers:
|
||||
- name: model-runner
|
||||
image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}"
|
||||
imagePullPolicy: {{ .Values.image.pullPolicy }}
|
||||
ports:
|
||||
- containerPort: {{ .Values.service.targetPort }}
|
||||
env:
|
||||
- name: DMR_ORIGINS
|
||||
value: "http://localhost:{{ .Values.nodePort.port }},http://localhost:{{ .Values.service.targetPort }}"
|
||||
volumeMounts:
|
||||
- name: model-storage
|
||||
mountPath: /models
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: {{ .Values.securityContext.allowPrivilegeEscalation }}
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: {{ .Values.readinessProbe.httpGet.path }}
|
||||
port: {{ .Values.readinessProbe.httpGet.port }}
|
||||
initialDelaySeconds: {{ .Values.readinessProbe.initialDelaySeconds }}
|
||||
periodSeconds: {{ .Values.readinessProbe.periodSeconds }}
|
||||
failureThreshold: {{ .Values.readinessProbe.failureThreshold }}
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: {{ .Values.livenessProbe.httpGet.path }}
|
||||
port: {{ .Values.livenessProbe.httpGet.port }}
|
||||
initialDelaySeconds: {{ .Values.livenessProbe.initialDelaySeconds }}
|
||||
periodSeconds: {{ .Values.livenessProbe.periodSeconds }}
|
||||
failureThreshold: {{ .Values.livenessProbe.failureThreshold }}
|
||||
{{- if .Values.gpu.enabled }}
|
||||
resources:
|
||||
limits:
|
||||
{{- include "docker-model-runner.gpuResources" . | nindent 12 }}
|
||||
requests:
|
||||
{{- include "docker-model-runner.gpuResources" . | nindent 12 }}
|
||||
{{- end }}
|
||||
|
||||
{{- if .Values.modelInit.enabled }}
|
||||
- name: model-init
|
||||
image: curlimages/curl:8.14.1
|
||||
command: ["/bin/sh", "-c"]
|
||||
args:
|
||||
- |
|
||||
set -ex
|
||||
MODEL_RUNNER=http://localhost:{{ .Values.service.targetPort }}
|
||||
echo "Pre-pulling models..."
|
||||
while IFS= read -r model; do
|
||||
if [ -n "$model" ]; then
|
||||
echo "Pulling model: $model"
|
||||
curl -d "{\"from\": \"$model\"}" "$MODEL_RUNNER"/models/create
|
||||
fi
|
||||
done < /config/models
|
||||
echo "Model pre-pull complete"
|
||||
tail -f /dev/null
|
||||
volumeMounts:
|
||||
- name: model-storage
|
||||
mountPath: /models
|
||||
- name: init-config
|
||||
mountPath: /config
|
||||
{{- end }}
|
||||
volumes:
|
||||
- name: model-storage
|
||||
ephemeral:
|
||||
volumeClaimTemplate:
|
||||
spec:
|
||||
accessModes: [ "ReadWriteOnce" ]
|
||||
{{- if .Values.storage.storageClass }}
|
||||
storageClassName: {{ .Values.storage.storageClass }}
|
||||
{{- end }}
|
||||
resources:
|
||||
requests:
|
||||
storage: {{ .Values.storage.size }}
|
||||
{{- if .Values.modelInit.enabled }}
|
||||
- name: init-config
|
||||
configMap:
|
||||
name: {{ include "docker-model-runner.fullname" . }}-init
|
||||
{{- end }}
|
||||
{{- with .Values.nodeSelector }}
|
||||
nodeSelector:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.affinity }}
|
||||
affinity:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.tolerations }}
|
||||
tolerations:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
|
|
@ -0,0 +1,34 @@
|
|||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: {{ include "docker-model-runner.fullname" . }}
|
||||
labels:
|
||||
{{- include "docker-model-runner.labels" . | nindent 4 }}
|
||||
spec:
|
||||
type: {{ .Values.service.type }}
|
||||
ports:
|
||||
- port: {{ .Values.service.port }}
|
||||
targetPort: {{ .Values.service.targetPort }}
|
||||
protocol: TCP
|
||||
name: http
|
||||
selector:
|
||||
{{- include "docker-model-runner.selectorLabels" . | nindent 4 }}
|
||||
---
|
||||
{{- if .Values.nodePort.enabled }}
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: {{ include "docker-model-runner.fullname" . }}-nodeport
|
||||
labels:
|
||||
{{- include "docker-model-runner.labels" . | nindent 4 }}
|
||||
spec:
|
||||
type: NodePort
|
||||
ports:
|
||||
- port: {{ .Values.service.port }}
|
||||
targetPort: {{ .Values.service.targetPort }}
|
||||
nodePort: {{ .Values.nodePort.port }}
|
||||
protocol: TCP
|
||||
name: http
|
||||
selector:
|
||||
{{- include "docker-model-runner.selectorLabels" . | nindent 4 }}
|
||||
{{- end }}
|
||||
|
|
@ -0,0 +1,72 @@
|
|||
replicaCount: 1
|
||||
|
||||
image:
|
||||
repository: docker/model-runner
|
||||
pullPolicy: IfNotPresent
|
||||
tag: "latest"
|
||||
|
||||
nameOverride: ""
|
||||
fullnameOverride: ""
|
||||
|
||||
service:
|
||||
type: ClusterIP
|
||||
port: 80
|
||||
targetPort: 12434
|
||||
|
||||
nodePort:
|
||||
enabled: false
|
||||
port: 31245
|
||||
|
||||
# GPU resource allocation
|
||||
gpu:
|
||||
enabled: false
|
||||
# GPU vendor: nvidia or amd
|
||||
vendor: nvidia
|
||||
# Number of GPUs to request
|
||||
count: 1
|
||||
|
||||
# Ephemeral volume configuration
|
||||
storage:
|
||||
# Storage size for ephemeral volume
|
||||
size: 100Gi
|
||||
# Storage class for ephemeral volume
|
||||
storageClass: ""
|
||||
|
||||
# Model pre-pull configuration
|
||||
modelInit:
|
||||
enabled: false
|
||||
models: []
|
||||
|
||||
# Environment variables
|
||||
env:
|
||||
DMR_ORIGINS: "http://localhost:31246,http://localhost:80"
|
||||
|
||||
# Security context
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
|
||||
# Probes
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /engines/status
|
||||
port: 12434
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 10
|
||||
failureThreshold: 3
|
||||
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /engines/status
|
||||
port: 12434
|
||||
initialDelaySeconds: 15
|
||||
periodSeconds: 20
|
||||
failureThreshold: 3
|
||||
|
||||
# Node selector
|
||||
nodeSelector: {}
|
||||
|
||||
# Tolerations
|
||||
tolerations: []
|
||||
|
||||
# Affinity
|
||||
affinity: {}
|
||||
Loading…
Reference in New Issue