Merge pull request #123 from docker/nicks/chart

charts: add Kubernetes examples
This commit is contained in:
Jacob Howard 2025-07-30 10:55:53 +03:00 committed by GitHub
commit 6b1cfee5a3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
14 changed files with 983 additions and 0 deletions

View File

@ -144,3 +144,11 @@ curl http://localhost:8080/metrics
- **Monitoring integration**: Add the endpoint to your Prometheus configuration
Check [METRICS.md](./METRICS.md) for more details.
## Kubernetes
Experimental support for running in Kubernetes is available
in the form of [a Helm chart and static YAML](charts/docker-model-runner/README).
If you are interested in a specific Kubernetes use-case, please start a
discussion on the issue tracker.

View File

@ -0,0 +1,18 @@
# Contributing
## Using the Makefile
```bash
# Render to plain Kubernetes YAML
make render
# Install the chart
make install
# Upgrade the chart
make upgrade
# Uninstall the chart
make uninstall
```

View File

@ -0,0 +1,6 @@
apiVersion: v2
name: docker-model-runner
description: A Helm chart for Docker Model Runner
type: application
version: 0.1.0
appVersion: "latest"

View File

@ -0,0 +1,54 @@
.PHONY: render clean install upgrade uninstall template lint package help
CHART_NAME := docker-model-runner
RELEASE_NAME := docker-model-runner
NAMESPACE := default
render:
@echo "Rendering Helm chart to plain Kubernetes YAML..."
mkdir -p static
helm template $(RELEASE_NAME) . --namespace $(NAMESPACE) > static/docker-model-runner.yaml
helm template $(RELEASE_NAME) . --namespace $(NAMESPACE) --set "nodePort.enabled=true" > static/docker-model-runner-desktop.yaml
helm template $(RELEASE_NAME) . --namespace $(NAMESPACE) --set "modelInit.enabled=true" --set "modelInit.models[0]=ai/smollm2:latest" > static/docker-model-runner-smollm2.yaml
helm template $(RELEASE_NAME) . --namespace $(NAMESPACE) --set "storage.storageClass=gp2" > static/docker-model-runner-eks.yaml
@echo "Rendered YAML saved to static"
clean:
@echo "Cleaning up rendered files..."
rm -fR static
install:
@echo "Installing Helm chart..."
helm install $(RELEASE_NAME) . --namespace $(NAMESPACE) --create-namespace
upgrade:
@echo "Upgrading Helm chart..."
helm upgrade $(RELEASE_NAME) . --namespace $(NAMESPACE)
uninstall:
@echo "Uninstalling Helm chart..."
helm uninstall $(RELEASE_NAME) --namespace $(NAMESPACE)
template:
@echo "Templating Helm chart..."
helm template $(RELEASE_NAME) . --namespace $(NAMESPACE)
lint:
@echo "Linting Helm chart..."
helm lint .
package:
@echo "Packaging Helm chart..."
helm package .
help:
@echo "Available targets:"
@echo " render - Render Helm chart to plain Kubernetes YAML (saves to rendered.yaml)"
@echo " template - Template Helm chart (output to stdout)"
@echo " lint - Lint Helm chart"
@echo " package - Package Helm chart"
@echo " install - Install Helm chart"
@echo " upgrade - Upgrade Helm chart"
@echo " uninstall - Uninstall Helm chart"
@echo " clean - Clean up rendered files"
@echo " help - Show this help message"

View File

@ -0,0 +1,175 @@
# Docker Model Runner Kubernetes Support
Manifests for deploying Docker Model Runner on Kubernetes with ephemeral storage, GPU support, and model pre-pulling capabilities.
## Quickstart
### On Docker Desktop
```
kubectl apply -f static/docker-model-runner-desktop.yaml
kubectl wait --for=condition=Available deployment/docker-model-runner --timeout=5m
MODEL_RUNNER_HOST=http://localhost:31245 docker model run ai/smollm2:latest
```
### On any Kubernetes Cluster
```
kubectl apply -f static/docker-model-runner.yaml
kubectl wait --for=condition=Available deployment/docker-model-runner --timeout=5m
kubectl port-forward deployment/docker-model-runner 31245:12434
```
Then:
```
MODEL_RUNNER_HOST=http://localhost:31245 docker model run ai/smollm2:latest
```
## Helm Configuration
### Basic Configuration
Key configuration options in `values.yaml`:
```yaml
# Storage configuration
storage:
size: 100Gi
storageClass: "" # Set this to the storage class of your cloud provider.
# Model pre-pull configuration
modelInit:
enabled: false
models:
- "ai/smollm2:latest"
# GPU configuration
gpu:
enabled: false
vendor: nvidia # or amd
count: 1
# NodePort configuration
nodePort:
enabled: false
port: 31245
```
### GPU Scheduling
To enable GPU scheduling:
```yaml
gpu:
enabled: true
vendor: nvidia # or amd
count: 1
```
This will add the appropriate resource requests/limits:
- NVIDIA: `nvidia.com/gpu`
- AMD: `amd.com/gpu`
### Model Pre-pulling
Configure models to pre-pull during pod initialization:
```yaml
modelInit:
enabled: true
models:
- "ai/smollm2:latest"
- "ai/llama3.2:latest"
- "ai/mistral:latest"
```
## Usage
### Testing the Installation
Once installed, set up a port-forward to access the service:
```bash
kubectl port-forward service/docker-model-runner-nodeport 31245:80
```
Then test the model runner:
```bash
MODEL_RUNNER_HOST=http://localhost:31245 docker model run ai/smollm2:latest
```
### Using with Open WebUI
To use Docker Model Runner with Open WebUI, install the Open WebUI Helm chart:
```bash
# Add the Open WebUI Helm repository
helm repo add open-webui https://helm.openwebui.com/
helm repo update
# Install Open WebUI with auth diabled
# See the open-webui Helm chart for
# connecting to your auth provider.
helm upgrade --install --wait open-webui open-webui/open-webui \
--set ollama.enabled=false \
--set pipelines.enabled=false \
--set extraEnvVars[0].name="WEBUI_AUTH" \
--set-string extraEnvVars[0].value=false \
--set openaiBaseApiUrl="http://docker-model-runner/engines/v1"
```
Access Open WebUI:
```bash
kubectl port-forward service/open-webui 8080:80
```
Then visit http://localhost:8080 in your browser.
## Values Reference
| Parameter | Description | Default |
|-----------|-------------|---------|
| `replicaCount` | Number of replicas | `1` |
| `image.repository` | Docker Model Runner image repository | `docker/model-runner` |
| `image.tag` | Docker Model Runner image tag | `latest` |
| `image.pullPolicy` | Image pull policy | `IfNotPresent` |
| `storage.size` | Ephemeral volume size | `100Gi` |
| `storage.storageClass` | Storage class for ephemeral volume | `""` |
| `modelInit.enabled` | Enable model pre-pulling | `false` |
| `modelInit.models` | List of models to pre-pull | `["ai/smollm2:latest"]` |
| `gpu.enabled` | Enable GPU support | `false` |
| `gpu.vendor` | GPU vendor (nvidia or amd) | `nvidia` |
| `gpu.count` | Number of GPUs to request | `1` |
| `nodePort.enabled` | Enable NodePort service | `false` |
| `nodePort.port` | NodePort port number | `31245` |
## Troubleshooting
### Pod Fails to Start
Check the pod logs:
```bash
kubectl logs -f deployment/docker-model-runner
```
### Model Pre-pull Issues
Check the init container logs:
```bash
kubectl logs -f deployment/docker-model-runner -c model-init
```
### GPU Not Available
Your cluster must use [a GPU scheduling plugin](https://kubernetes.io/docs/tasks/manage-gpus/scheduling-gpus/).
Ensure your cluster has GPU support and the appropriate device plugin installed:
- For NVIDIA GPUs: Install the [NVIDIA device plugin](https://github.com/NVIDIA/k8s-device-plugin)
- For AMD GPUs: Install the [AMD device plugin](https://github.com/ROCm/k8s-device-plugin#deployment)

View File

@ -0,0 +1,114 @@
---
# Source: docker-model-runner/templates/service.yaml
apiVersion: v1
kind: Service
metadata:
name: docker-model-runner
labels:
helm.sh/chart: docker-model-runner-0.1.0
app.kubernetes.io/name: docker-model-runner
app.kubernetes.io/instance: docker-model-runner
app.kubernetes.io/version: "latest"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
ports:
- port: 80
targetPort: 12434
protocol: TCP
name: http
selector:
app.kubernetes.io/name: docker-model-runner
app.kubernetes.io/instance: docker-model-runner
---
# Source: docker-model-runner/templates/service.yaml
apiVersion: v1
kind: Service
metadata:
name: docker-model-runner-nodeport
labels:
helm.sh/chart: docker-model-runner-0.1.0
app.kubernetes.io/name: docker-model-runner
app.kubernetes.io/instance: docker-model-runner
app.kubernetes.io/version: "latest"
app.kubernetes.io/managed-by: Helm
spec:
type: NodePort
ports:
- port: 80
targetPort: 12434
nodePort: 31245
protocol: TCP
name: http
selector:
app.kubernetes.io/name: docker-model-runner
app.kubernetes.io/instance: docker-model-runner
---
# Source: docker-model-runner/templates/deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: docker-model-runner
labels:
helm.sh/chart: docker-model-runner-0.1.0
app.kubernetes.io/name: docker-model-runner
app.kubernetes.io/instance: docker-model-runner
app.kubernetes.io/version: "latest"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: docker-model-runner
app.kubernetes.io/instance: docker-model-runner
template:
metadata:
labels:
app.kubernetes.io/name: docker-model-runner
app.kubernetes.io/instance: docker-model-runner
spec:
initContainers:
- name: changeowner
image: busybox
command: ["sh", "-c", "chmod a+rwx /models"]
volumeMounts:
- name: model-storage
mountPath: /models
containers:
- name: model-runner
image: "docker/model-runner:latest"
imagePullPolicy: IfNotPresent
ports:
- containerPort: 12434
env:
- name: DMR_ORIGINS
value: "http://localhost:31245,http://localhost:12434"
volumeMounts:
- name: model-storage
mountPath: /models
securityContext:
allowPrivilegeEscalation: false
readinessProbe:
httpGet:
path: /engines/status
port: 12434
initialDelaySeconds: 5
periodSeconds: 10
failureThreshold: 3
livenessProbe:
httpGet:
path: /engines/status
port: 12434
initialDelaySeconds: 15
periodSeconds: 20
failureThreshold: 3
volumes:
- name: model-storage
ephemeral:
volumeClaimTemplate:
spec:
accessModes: [ "ReadWriteOnce" ]
resources:
requests:
storage: 100Gi

View File

@ -0,0 +1,92 @@
---
# Source: docker-model-runner/templates/service.yaml
apiVersion: v1
kind: Service
metadata:
name: docker-model-runner
labels:
helm.sh/chart: docker-model-runner-0.1.0
app.kubernetes.io/name: docker-model-runner
app.kubernetes.io/instance: docker-model-runner
app.kubernetes.io/version: "latest"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
ports:
- port: 80
targetPort: 12434
protocol: TCP
name: http
selector:
app.kubernetes.io/name: docker-model-runner
app.kubernetes.io/instance: docker-model-runner
---
# Source: docker-model-runner/templates/deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: docker-model-runner
labels:
helm.sh/chart: docker-model-runner-0.1.0
app.kubernetes.io/name: docker-model-runner
app.kubernetes.io/instance: docker-model-runner
app.kubernetes.io/version: "latest"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: docker-model-runner
app.kubernetes.io/instance: docker-model-runner
template:
metadata:
labels:
app.kubernetes.io/name: docker-model-runner
app.kubernetes.io/instance: docker-model-runner
spec:
initContainers:
- name: changeowner
image: busybox
command: ["sh", "-c", "chmod a+rwx /models"]
volumeMounts:
- name: model-storage
mountPath: /models
containers:
- name: model-runner
image: "docker/model-runner:latest"
imagePullPolicy: IfNotPresent
ports:
- containerPort: 12434
env:
- name: DMR_ORIGINS
value: "http://localhost:31245,http://localhost:12434"
volumeMounts:
- name: model-storage
mountPath: /models
securityContext:
allowPrivilegeEscalation: false
readinessProbe:
httpGet:
path: /engines/status
port: 12434
initialDelaySeconds: 5
periodSeconds: 10
failureThreshold: 3
livenessProbe:
httpGet:
path: /engines/status
port: 12434
initialDelaySeconds: 15
periodSeconds: 20
failureThreshold: 3
volumes:
- name: model-storage
ephemeral:
volumeClaimTemplate:
spec:
accessModes: [ "ReadWriteOnce" ]
storageClassName: gp2
resources:
requests:
storage: 100Gi

View File

@ -0,0 +1,130 @@
---
# Source: docker-model-runner/templates/configmap.yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: docker-model-runner-init
labels:
helm.sh/chart: docker-model-runner-0.1.0
app.kubernetes.io/name: docker-model-runner
app.kubernetes.io/instance: docker-model-runner
app.kubernetes.io/version: "latest"
app.kubernetes.io/managed-by: Helm
data:
models: |
ai/smollm2:latest
---
# Source: docker-model-runner/templates/service.yaml
apiVersion: v1
kind: Service
metadata:
name: docker-model-runner
labels:
helm.sh/chart: docker-model-runner-0.1.0
app.kubernetes.io/name: docker-model-runner
app.kubernetes.io/instance: docker-model-runner
app.kubernetes.io/version: "latest"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
ports:
- port: 80
targetPort: 12434
protocol: TCP
name: http
selector:
app.kubernetes.io/name: docker-model-runner
app.kubernetes.io/instance: docker-model-runner
---
# Source: docker-model-runner/templates/deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: docker-model-runner
labels:
helm.sh/chart: docker-model-runner-0.1.0
app.kubernetes.io/name: docker-model-runner
app.kubernetes.io/instance: docker-model-runner
app.kubernetes.io/version: "latest"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: docker-model-runner
app.kubernetes.io/instance: docker-model-runner
template:
metadata:
labels:
app.kubernetes.io/name: docker-model-runner
app.kubernetes.io/instance: docker-model-runner
spec:
initContainers:
- name: changeowner
image: busybox
command: ["sh", "-c", "chmod a+rwx /models"]
volumeMounts:
- name: model-storage
mountPath: /models
containers:
- name: model-runner
image: "docker/model-runner:latest"
imagePullPolicy: IfNotPresent
ports:
- containerPort: 12434
env:
- name: DMR_ORIGINS
value: "http://localhost:31245,http://localhost:12434"
volumeMounts:
- name: model-storage
mountPath: /models
securityContext:
allowPrivilegeEscalation: false
readinessProbe:
httpGet:
path: /engines/status
port: 12434
initialDelaySeconds: 5
periodSeconds: 10
failureThreshold: 3
livenessProbe:
httpGet:
path: /engines/status
port: 12434
initialDelaySeconds: 15
periodSeconds: 20
failureThreshold: 3
- name: model-init
image: curlimages/curl:8.14.1
command: ["/bin/sh", "-c"]
args:
- |
set -ex
MODEL_RUNNER=http://localhost:12434
echo "Pre-pulling models..."
while IFS= read -r model; do
if [ -n "$model" ]; then
echo "Pulling model: $model"
curl -d "{\"from\": \"$model\"}" "$MODEL_RUNNER"/models/create
fi
done < /config/models
echo "Model pre-pull complete"
tail -f /dev/null
volumeMounts:
- name: model-storage
mountPath: /models
- name: init-config
mountPath: /config
volumes:
- name: model-storage
ephemeral:
volumeClaimTemplate:
spec:
accessModes: [ "ReadWriteOnce" ]
resources:
requests:
storage: 100Gi
- name: init-config
configMap:
name: docker-model-runner-init

View File

@ -0,0 +1,91 @@
---
# Source: docker-model-runner/templates/service.yaml
apiVersion: v1
kind: Service
metadata:
name: docker-model-runner
labels:
helm.sh/chart: docker-model-runner-0.1.0
app.kubernetes.io/name: docker-model-runner
app.kubernetes.io/instance: docker-model-runner
app.kubernetes.io/version: "latest"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
ports:
- port: 80
targetPort: 12434
protocol: TCP
name: http
selector:
app.kubernetes.io/name: docker-model-runner
app.kubernetes.io/instance: docker-model-runner
---
# Source: docker-model-runner/templates/deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: docker-model-runner
labels:
helm.sh/chart: docker-model-runner-0.1.0
app.kubernetes.io/name: docker-model-runner
app.kubernetes.io/instance: docker-model-runner
app.kubernetes.io/version: "latest"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: docker-model-runner
app.kubernetes.io/instance: docker-model-runner
template:
metadata:
labels:
app.kubernetes.io/name: docker-model-runner
app.kubernetes.io/instance: docker-model-runner
spec:
initContainers:
- name: changeowner
image: busybox
command: ["sh", "-c", "chmod a+rwx /models"]
volumeMounts:
- name: model-storage
mountPath: /models
containers:
- name: model-runner
image: "docker/model-runner:latest"
imagePullPolicy: IfNotPresent
ports:
- containerPort: 12434
env:
- name: DMR_ORIGINS
value: "http://localhost:31245,http://localhost:12434"
volumeMounts:
- name: model-storage
mountPath: /models
securityContext:
allowPrivilegeEscalation: false
readinessProbe:
httpGet:
path: /engines/status
port: 12434
initialDelaySeconds: 5
periodSeconds: 10
failureThreshold: 3
livenessProbe:
httpGet:
path: /engines/status
port: 12434
initialDelaySeconds: 15
periodSeconds: 20
failureThreshold: 3
volumes:
- name: model-storage
ephemeral:
volumeClaimTemplate:
spec:
accessModes: [ "ReadWriteOnce" ]
resources:
requests:
storage: 100Gi

View File

@ -0,0 +1,64 @@
{{/*
Expand the name of the chart.
*/}}
{{- define "docker-model-runner.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
{{- end }}
{{/*
Create a default fully qualified app name.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
If release name contains chart name it will be used as a full name.
*/}}
{{- define "docker-model-runner.fullname" -}}
{{- if .Values.fullnameOverride }}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- $name := default .Chart.Name .Values.nameOverride }}
{{- if contains $name .Release.Name }}
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
{{- end }}
{{- end }}
{{- end }}
{{/*
Create chart name and version as used by the chart label.
*/}}
{{- define "docker-model-runner.chart" -}}
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
{{- end }}
{{/*
Common labels
*/}}
{{- define "docker-model-runner.labels" -}}
helm.sh/chart: {{ include "docker-model-runner.chart" . }}
{{ include "docker-model-runner.selectorLabels" . }}
{{- if .Chart.AppVersion }}
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
{{- end }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
{{- end }}
{{/*
Selector labels
*/}}
{{- define "docker-model-runner.selectorLabels" -}}
app.kubernetes.io/name: {{ include "docker-model-runner.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- end }}
{{/*
GPU resource limits
*/}}
{{- define "docker-model-runner.gpuResources" -}}
{{- if .Values.gpu.enabled }}
{{- if eq .Values.gpu.vendor "nvidia" }}
nvidia.com/gpu: {{ .Values.gpu.count }}
{{- else if eq .Values.gpu.vendor "amd" }}
amd.com/gpu: {{ .Values.gpu.count }}
{{- end }}
{{- end }}
{{- end }}

View File

@ -0,0 +1,13 @@
{{- if .Values.modelInit.enabled }}
apiVersion: v1
kind: ConfigMap
metadata:
name: {{ include "docker-model-runner.fullname" . }}-init
labels:
{{- include "docker-model-runner.labels" . | nindent 4 }}
data:
models: |
{{- range .Values.modelInit.models }}
{{ . }}
{{- end }}
{{- end }}

View File

@ -0,0 +1,112 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ include "docker-model-runner.fullname" . }}
labels:
{{- include "docker-model-runner.labels" . | nindent 4 }}
spec:
replicas: {{ .Values.replicaCount }}
selector:
matchLabels:
{{- include "docker-model-runner.selectorLabels" . | nindent 6 }}
template:
metadata:
labels:
{{- include "docker-model-runner.selectorLabels" . | nindent 8 }}
spec:
initContainers:
- name: changeowner
image: busybox
command: ["sh", "-c", "chmod a+rwx /models"]
volumeMounts:
- name: model-storage
mountPath: /models
containers:
- name: model-runner
image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}"
imagePullPolicy: {{ .Values.image.pullPolicy }}
ports:
- containerPort: {{ .Values.service.targetPort }}
env:
- name: DMR_ORIGINS
value: "http://localhost:{{ .Values.nodePort.port }},http://localhost:{{ .Values.service.targetPort }}"
volumeMounts:
- name: model-storage
mountPath: /models
securityContext:
allowPrivilegeEscalation: {{ .Values.securityContext.allowPrivilegeEscalation }}
readinessProbe:
httpGet:
path: {{ .Values.readinessProbe.httpGet.path }}
port: {{ .Values.readinessProbe.httpGet.port }}
initialDelaySeconds: {{ .Values.readinessProbe.initialDelaySeconds }}
periodSeconds: {{ .Values.readinessProbe.periodSeconds }}
failureThreshold: {{ .Values.readinessProbe.failureThreshold }}
livenessProbe:
httpGet:
path: {{ .Values.livenessProbe.httpGet.path }}
port: {{ .Values.livenessProbe.httpGet.port }}
initialDelaySeconds: {{ .Values.livenessProbe.initialDelaySeconds }}
periodSeconds: {{ .Values.livenessProbe.periodSeconds }}
failureThreshold: {{ .Values.livenessProbe.failureThreshold }}
{{- if .Values.gpu.enabled }}
resources:
limits:
{{- include "docker-model-runner.gpuResources" . | nindent 12 }}
requests:
{{- include "docker-model-runner.gpuResources" . | nindent 12 }}
{{- end }}
{{- if .Values.modelInit.enabled }}
- name: model-init
image: curlimages/curl:8.14.1
command: ["/bin/sh", "-c"]
args:
- |
set -ex
MODEL_RUNNER=http://localhost:{{ .Values.service.targetPort }}
echo "Pre-pulling models..."
while IFS= read -r model; do
if [ -n "$model" ]; then
echo "Pulling model: $model"
curl -d "{\"from\": \"$model\"}" "$MODEL_RUNNER"/models/create
fi
done < /config/models
echo "Model pre-pull complete"
tail -f /dev/null
volumeMounts:
- name: model-storage
mountPath: /models
- name: init-config
mountPath: /config
{{- end }}
volumes:
- name: model-storage
ephemeral:
volumeClaimTemplate:
spec:
accessModes: [ "ReadWriteOnce" ]
{{- if .Values.storage.storageClass }}
storageClassName: {{ .Values.storage.storageClass }}
{{- end }}
resources:
requests:
storage: {{ .Values.storage.size }}
{{- if .Values.modelInit.enabled }}
- name: init-config
configMap:
name: {{ include "docker-model-runner.fullname" . }}-init
{{- end }}
{{- with .Values.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}

View File

@ -0,0 +1,34 @@
apiVersion: v1
kind: Service
metadata:
name: {{ include "docker-model-runner.fullname" . }}
labels:
{{- include "docker-model-runner.labels" . | nindent 4 }}
spec:
type: {{ .Values.service.type }}
ports:
- port: {{ .Values.service.port }}
targetPort: {{ .Values.service.targetPort }}
protocol: TCP
name: http
selector:
{{- include "docker-model-runner.selectorLabels" . | nindent 4 }}
---
{{- if .Values.nodePort.enabled }}
apiVersion: v1
kind: Service
metadata:
name: {{ include "docker-model-runner.fullname" . }}-nodeport
labels:
{{- include "docker-model-runner.labels" . | nindent 4 }}
spec:
type: NodePort
ports:
- port: {{ .Values.service.port }}
targetPort: {{ .Values.service.targetPort }}
nodePort: {{ .Values.nodePort.port }}
protocol: TCP
name: http
selector:
{{- include "docker-model-runner.selectorLabels" . | nindent 4 }}
{{- end }}

View File

@ -0,0 +1,72 @@
replicaCount: 1
image:
repository: docker/model-runner
pullPolicy: IfNotPresent
tag: "latest"
nameOverride: ""
fullnameOverride: ""
service:
type: ClusterIP
port: 80
targetPort: 12434
nodePort:
enabled: false
port: 31245
# GPU resource allocation
gpu:
enabled: false
# GPU vendor: nvidia or amd
vendor: nvidia
# Number of GPUs to request
count: 1
# Ephemeral volume configuration
storage:
# Storage size for ephemeral volume
size: 100Gi
# Storage class for ephemeral volume
storageClass: ""
# Model pre-pull configuration
modelInit:
enabled: false
models: []
# Environment variables
env:
DMR_ORIGINS: "http://localhost:31246,http://localhost:80"
# Security context
securityContext:
allowPrivilegeEscalation: false
# Probes
readinessProbe:
httpGet:
path: /engines/status
port: 12434
initialDelaySeconds: 5
periodSeconds: 10
failureThreshold: 3
livenessProbe:
httpGet:
path: /engines/status
port: 12434
initialDelaySeconds: 15
periodSeconds: 20
failureThreshold: 3
# Node selector
nodeSelector: {}
# Tolerations
tolerations: []
# Affinity
affinity: {}