Merge pull request #2 from volatilemolotov/models-preload

implement models preloading and reusing
This commit is contained in:
Vlado Djerek 2025-07-21 14:39:59 +02:00 committed by GitHub
commit 6a5c72becc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 161 additions and 9 deletions

View File

@ -5,7 +5,7 @@ dep_update:
helm dependency update helm-chart/ai-starter-kit
install:
helm upgrade --install ai-starter-kit helm-chart/ai-starter-kit --timeout 10m
helm upgrade --install ai-starter-kit helm-chart/ai-starter-kit --set huggingface.token="your_hf_token" --timeout 10m
start:
minikube start --cpus 4 --memory 8192

View File

@ -0,0 +1,44 @@
from huggingface_hub import login
from pathlib import Path
from transformers import AutoModel, AutoTokenizer
TOKEN_PATH = Path("/etc/secrets/huggingface/token")
if TOKEN_PATH.is_file():
print("Hugging Face token file found.")
try:
token = TOKEN_PATH.read_text().strip()
if token:
print("Logging into Hugging Face Hub...")
login(token=token)
print("Login successful.")
else:
print("Token file is empty. Proceeding without login.")
except Exception as e:
print(f"Failed to read token or login: {e}")
else:
print("Hugging Face token not found. Proceeding without login.")
print("Downloads for private or gated models may fail.")
# --- Model Download ---
# List your desired Hugging Face model names here
model_names = [
"Qwen/Qwen3-Embedding-0.6B",
]
# The cache directory is mounted from a PersistentVolumeClaim
save_base_dir = "/tmp/models-cache"
for model_name in model_names:
print(f"--- Downloading {model_name} ---")
try:
model = AutoModel.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
save_dir = f"{save_base_dir}/{model_name}"
model.save_pretrained(save_dir)
tokenizer.save_pretrained(save_dir)
print(f"Successfully cached {model_name} in {save_base_dir}")
except Exception as e:
print(f"Failed to download {model_name}. Error: {e}")
print("--- Model download process finished. ---")

View File

@ -0,0 +1,4 @@
transformers
torch
huggingface_hub
pathlib

View File

@ -0,0 +1,29 @@
---
{{- /*
Create ConfigMaps for jupyterhub singleuser pods.
These ConfigMaps are mounted as volumes.
*/ -}}
---
apiVersion: v1
kind: ConfigMap
metadata:
name: {{ .Release.Name }}-requirements-txt
labels:
app.kubernetes.io/managed-by: {{ .Release.Service | quote }}
app.kubernetes.io/instance: {{ .Release.Name | quote }}
helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}"
data:
requirements.txt: |-
{{ .Files.Get "files/requirements.txt" | nindent 4 }}
---
apiVersion: v1
kind: ConfigMap
metadata:
name: {{ .Release.Name }}-hf-download-script
labels:
app.kubernetes.io/managed-by: {{ .Release.Service | quote }}
app.kubernetes.io/instance: {{ .Release.Name | quote }}
helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}"
data:
download_models.py: |-
{{ .Files.Get "files/download_models.py" | nindent 4 }}

View File

@ -0,0 +1,13 @@
{{- if .Values.huggingface.token }}
apiVersion: v1
kind: Secret
metadata:
name: {{ .Release.Name }}-hf-token-secret
labels:
app.kubernetes.io/managed-by: {{ .Release.Service | quote }}
app.kubernetes.io/instance: {{ .Release.Name | quote }}
helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}"
type: Opaque
stringData:
token: {{ .Values.huggingface.token }}
{{- end }}

View File

@ -0,0 +1,24 @@
{{- if .Values.modelsCachePvc.enabled -}}
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: {{ .Release.Name }}-models-cache-pvc
labels:
app.kubernetes.io/managed-by: {{ .Release.Service | quote }}
app.kubernetes.io/instance: {{ .Release.Name | quote }}
helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}"
spec:
accessModes:
{{- toYaml .Values.modelsCachePvc.accessModes | nindent 4 }}
{{- /*
If storageClassName is set to a specific class, it will be used.
If storageClassName is set to an empty string (""), no storage class will be used for provisioning.
If storageClassName is null or omitted, the default storage class will be used.
*/}}
{{- if or .Values.modelsCachePvc.storageClassName (eq .Values.modelsCachePvc.storageClassName "") }}
storageClassName: {{ .Values.modelsCachePvc.storageClassName | quote }}
{{- end }}
resources:
requests:
storage: {{ .Values.modelsCachePvc.size }}
{{- end -}}

View File

@ -7,29 +7,53 @@ jupyterhub:
password: "changeme"
singleuser:
fsGid: 100
lifecycleHooks:
postStart:
exec:
command: ["/bin/sh", "-c", "pip install -r /tmp/requirements.txt"]
command: ["/bin/sh", "-c", "pip install -r /tmp/requirements.txt && python /tmp/download_models.py"]
extraVolumes:
- name: requirements-txt
configMap:
name: requirements-txt
name: "{{ .Release.Name }}-requirements-txt"
- name: models-cache
persistentVolumeClaim:
claimName: "{{ .Release.Name }}-models-cache-pvc"
- name: hf-download-script
configMap:
name: "{{ .Release.Name }}-hf-download-script"
- name: hf-token-secret
secret:
secretName: "{{ .Release.Name }}-hf-token-secret"
optional: true
extraVolumeMounts:
- name: requirements-txt
mountPath: /tmp/requirements.txt
subPath: requirements.txt
- name: models-cache
mountPath: /tmp/models-cache
- name: hf-download-script
mountPath: /tmp/download_models.py
subPath: download_models.py
- name: hf-token-secret
mountPath: "/etc/secrets/huggingface"
readOnly: true
extraEnvVars:
RAY_ADDRESS: "{{ tpl .Release.Name $ }}-kuberay-head-svc:6379"
MLFLOW_TRACKING_URI: "http://{{ tpl .Release.Name $ }}-mlflow-tracking"
RAY_ADDRESS: "{{ .Release.Name }}-kuberay-head-svc:6379"
MLFLOW_TRACKING_URI: "http://{{ .Release.Name }}-mlflow-tracking"
# JUPYTERLAB_DIR: "/opt"
resources:
limits:
memory: 16Gi
requests:
memory: 4Gi
hub:
password: "sneakypass"
extraEnvVars:
- name: "RAY_ADDRESS"
value: "{{ tpl .Release.Name $ }}-kuberay-head-svc"
value: "{{ .Release.Name }}-kuberay-head-svc"
- name: "MLFLOW_TRACKING_URI"
value: "http://{{ tpl .Release.Name $ }}-mlflow-tracking"
value: "http://{{ .Release.Name }}-mlflow-tracking"
ray-cluster:
head:
@ -49,3 +73,19 @@ mlflow:
auth:
password: "changemeibegyou"
flaskServerSecretKey: "noneedtochangethisone"
huggingface:
# Provide your Hugging Face token here to download gated or private models.
# It is recommended to set this via --set or a separate values file, e.g.,
# --set huggingface.token=hf_...
token: ""
modelsCachePvc:
enabled: true
# To use the default StorageClass, set storageClassName to null or omit it.
# To use a specific StorageClass (e.g. "standard-rwo" on GKE), provide its name.
# To create a PVC that doesn't request any StorageClass, set it to an empty string ("").
storageClassName: null
accessModes:
- ReadWriteOnce
size: 10Gi

View File

@ -1,2 +0,0 @@
ray
mlflow