implement models preloading and reusing

This commit is contained in:
Nikita Aleksandrov 2025-07-21 14:15:37 +02:00
parent 03b2df17b0
commit 9c5faaf338
8 changed files with 161 additions and 9 deletions

View File

@ -5,7 +5,7 @@ dep_update:
helm dependency update helm-chart/ai-starter-kit helm dependency update helm-chart/ai-starter-kit
install: install:
helm upgrade --install ai-starter-kit helm-chart/ai-starter-kit --timeout 10m helm upgrade --install ai-starter-kit helm-chart/ai-starter-kit --set huggingface.token="your_hf_token" --timeout 10m
start: start:
minikube start --cpus 4 --memory 8192 minikube start --cpus 4 --memory 8192

View File

@ -0,0 +1,44 @@
from huggingface_hub import login
from pathlib import Path
from transformers import AutoModel, AutoTokenizer
TOKEN_PATH = Path("/etc/secrets/huggingface/token")
if TOKEN_PATH.is_file():
print("Hugging Face token file found.")
try:
token = TOKEN_PATH.read_text().strip()
if token:
print("Logging into Hugging Face Hub...")
login(token=token)
print("Login successful.")
else:
print("Token file is empty. Proceeding without login.")
except Exception as e:
print(f"Failed to read token or login: {e}")
else:
print("Hugging Face token not found. Proceeding without login.")
print("Downloads for private or gated models may fail.")
# --- Model Download ---
# List your desired Hugging Face model names here
model_names = [
"Qwen/Qwen3-Embedding-0.6B",
]
# The cache directory is mounted from a PersistentVolumeClaim
save_base_dir = "/tmp/models-cache"
for model_name in model_names:
print(f"--- Downloading {model_name} ---")
try:
model = AutoModel.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
save_dir = f"{save_base_dir}/{model_name}"
model.save_pretrained(save_dir)
tokenizer.save_pretrained(save_dir)
print(f"Successfully cached {model_name} in {save_base_dir}")
except Exception as e:
print(f"Failed to download {model_name}. Error: {e}")
print("--- Model download process finished. ---")

View File

@ -0,0 +1,4 @@
transformers
torch
huggingface_hub
pathlib

View File

@ -0,0 +1,29 @@
---
{{- /*
Create ConfigMaps for jupyterhub singleuser pods.
These ConfigMaps are mounted as volumes.
*/ -}}
---
apiVersion: v1
kind: ConfigMap
metadata:
name: {{ .Release.Name }}-requirements-txt
labels:
app.kubernetes.io/managed-by: {{ .Release.Service | quote }}
app.kubernetes.io/instance: {{ .Release.Name | quote }}
helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}"
data:
requirements.txt: |-
{{ .Files.Get "files/requirements.txt" | nindent 4 }}
---
apiVersion: v1
kind: ConfigMap
metadata:
name: {{ .Release.Name }}-hf-download-script
labels:
app.kubernetes.io/managed-by: {{ .Release.Service | quote }}
app.kubernetes.io/instance: {{ .Release.Name | quote }}
helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}"
data:
download_models.py: |-
{{ .Files.Get "files/download_models.py" | nindent 4 }}

View File

@ -0,0 +1,13 @@
{{- if .Values.huggingface.token }}
apiVersion: v1
kind: Secret
metadata:
name: {{ .Release.Name }}-hf-token-secret
labels:
app.kubernetes.io/managed-by: {{ .Release.Service | quote }}
app.kubernetes.io/instance: {{ .Release.Name | quote }}
helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}"
type: Opaque
stringData:
token: {{ .Values.huggingface.token }}
{{- end }}

View File

@ -0,0 +1,24 @@
{{- if .Values.modelsCachePvc.enabled -}}
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: {{ .Release.Name }}-models-cache-pvc
labels:
app.kubernetes.io/managed-by: {{ .Release.Service | quote }}
app.kubernetes.io/instance: {{ .Release.Name | quote }}
helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}"
spec:
accessModes:
{{- toYaml .Values.modelsCachePvc.accessModes | nindent 4 }}
{{- /*
If storageClassName is set to a specific class, it will be used.
If storageClassName is set to an empty string (""), no storage class will be used for provisioning.
If storageClassName is null or omitted, the default storage class will be used.
*/}}
{{- if or .Values.modelsCachePvc.storageClassName (eq .Values.modelsCachePvc.storageClassName "") }}
storageClassName: {{ .Values.modelsCachePvc.storageClassName | quote }}
{{- end }}
resources:
requests:
storage: {{ .Values.modelsCachePvc.size }}
{{- end -}}

View File

@ -7,29 +7,53 @@ jupyterhub:
password: "changeme" password: "changeme"
singleuser: singleuser:
fsGid: 100
lifecycleHooks: lifecycleHooks:
postStart: postStart:
exec: exec:
command: ["/bin/sh", "-c", "pip install -r /tmp/requirements.txt"] command: ["/bin/sh", "-c", "pip install -r /tmp/requirements.txt && python /tmp/download_models.py"]
extraVolumes: extraVolumes:
- name: requirements-txt - name: requirements-txt
configMap: configMap:
name: requirements-txt name: "{{ .Release.Name }}-requirements-txt"
- name: models-cache
persistentVolumeClaim:
claimName: "{{ .Release.Name }}-models-cache-pvc"
- name: hf-download-script
configMap:
name: "{{ .Release.Name }}-hf-download-script"
- name: hf-token-secret
secret:
secretName: "{{ .Release.Name }}-hf-token-secret"
optional: true
extraVolumeMounts: extraVolumeMounts:
- name: requirements-txt - name: requirements-txt
mountPath: /tmp/requirements.txt mountPath: /tmp/requirements.txt
subPath: requirements.txt subPath: requirements.txt
- name: models-cache
mountPath: /tmp/models-cache
- name: hf-download-script
mountPath: /tmp/download_models.py
subPath: download_models.py
- name: hf-token-secret
mountPath: "/etc/secrets/huggingface"
readOnly: true
extraEnvVars: extraEnvVars:
RAY_ADDRESS: "{{ tpl .Release.Name $ }}-kuberay-head-svc:6379" RAY_ADDRESS: "{{ .Release.Name }}-kuberay-head-svc:6379"
MLFLOW_TRACKING_URI: "http://{{ tpl .Release.Name $ }}-mlflow-tracking" MLFLOW_TRACKING_URI: "http://{{ .Release.Name }}-mlflow-tracking"
# JUPYTERLAB_DIR: "/opt" # JUPYTERLAB_DIR: "/opt"
resources:
limits:
memory: 16Gi
requests:
memory: 4Gi
hub: hub:
password: "sneakypass" password: "sneakypass"
extraEnvVars: extraEnvVars:
- name: "RAY_ADDRESS" - name: "RAY_ADDRESS"
value: "{{ tpl .Release.Name $ }}-kuberay-head-svc" value: "{{ .Release.Name }}-kuberay-head-svc"
- name: "MLFLOW_TRACKING_URI" - name: "MLFLOW_TRACKING_URI"
value: "http://{{ tpl .Release.Name $ }}-mlflow-tracking" value: "http://{{ .Release.Name }}-mlflow-tracking"
ray-cluster: ray-cluster:
head: head:
@ -49,3 +73,19 @@ mlflow:
auth: auth:
password: "changemeibegyou" password: "changemeibegyou"
flaskServerSecretKey: "noneedtochangethisone" flaskServerSecretKey: "noneedtochangethisone"
huggingface:
# Provide your Hugging Face token here to download gated or private models.
# It is recommended to set this via --set or a separate values file, e.g.,
# --set huggingface.token=hf_...
token: ""
modelsCachePvc:
enabled: true
# To use the default StorageClass, set storageClassName to null or omit it.
# To use a specific StorageClass (e.g. "standard-rwo" on GKE), provide its name.
# To create a PVC that doesn't request any StorageClass, set it to an empty string ("").
storageClassName: null
accessModes:
- ReadWriteOnce
size: 10Gi

View File

@ -1,2 +0,0 @@
ray
mlflow