implement models preloading and reusing
This commit is contained in:
parent
03b2df17b0
commit
9c5faaf338
|
|
@ -5,7 +5,7 @@ dep_update:
|
||||||
helm dependency update helm-chart/ai-starter-kit
|
helm dependency update helm-chart/ai-starter-kit
|
||||||
|
|
||||||
install:
|
install:
|
||||||
helm upgrade --install ai-starter-kit helm-chart/ai-starter-kit --timeout 10m
|
helm upgrade --install ai-starter-kit helm-chart/ai-starter-kit --set huggingface.token="your_hf_token" --timeout 10m
|
||||||
|
|
||||||
start:
|
start:
|
||||||
minikube start --cpus 4 --memory 8192
|
minikube start --cpus 4 --memory 8192
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,44 @@
|
||||||
|
from huggingface_hub import login
|
||||||
|
from pathlib import Path
|
||||||
|
from transformers import AutoModel, AutoTokenizer
|
||||||
|
|
||||||
|
TOKEN_PATH = Path("/etc/secrets/huggingface/token")
|
||||||
|
if TOKEN_PATH.is_file():
|
||||||
|
print("Hugging Face token file found.")
|
||||||
|
try:
|
||||||
|
token = TOKEN_PATH.read_text().strip()
|
||||||
|
if token:
|
||||||
|
print("Logging into Hugging Face Hub...")
|
||||||
|
login(token=token)
|
||||||
|
print("Login successful.")
|
||||||
|
else:
|
||||||
|
print("Token file is empty. Proceeding without login.")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Failed to read token or login: {e}")
|
||||||
|
else:
|
||||||
|
print("Hugging Face token not found. Proceeding without login.")
|
||||||
|
print("Downloads for private or gated models may fail.")
|
||||||
|
|
||||||
|
|
||||||
|
# --- Model Download ---
|
||||||
|
# List your desired Hugging Face model names here
|
||||||
|
model_names = [
|
||||||
|
"Qwen/Qwen3-Embedding-0.6B",
|
||||||
|
]
|
||||||
|
|
||||||
|
# The cache directory is mounted from a PersistentVolumeClaim
|
||||||
|
save_base_dir = "/tmp/models-cache"
|
||||||
|
|
||||||
|
for model_name in model_names:
|
||||||
|
print(f"--- Downloading {model_name} ---")
|
||||||
|
try:
|
||||||
|
model = AutoModel.from_pretrained(model_name)
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
||||||
|
save_dir = f"{save_base_dir}/{model_name}"
|
||||||
|
model.save_pretrained(save_dir)
|
||||||
|
tokenizer.save_pretrained(save_dir)
|
||||||
|
print(f"Successfully cached {model_name} in {save_base_dir}")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Failed to download {model_name}. Error: {e}")
|
||||||
|
|
||||||
|
print("--- Model download process finished. ---")
|
||||||
|
|
@ -0,0 +1,4 @@
|
||||||
|
transformers
|
||||||
|
torch
|
||||||
|
huggingface_hub
|
||||||
|
pathlib
|
||||||
|
|
@ -0,0 +1,29 @@
|
||||||
|
---
|
||||||
|
{{- /*
|
||||||
|
Create ConfigMaps for jupyterhub singleuser pods.
|
||||||
|
These ConfigMaps are mounted as volumes.
|
||||||
|
*/ -}}
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: {{ .Release.Name }}-requirements-txt
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/managed-by: {{ .Release.Service | quote }}
|
||||||
|
app.kubernetes.io/instance: {{ .Release.Name | quote }}
|
||||||
|
helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}"
|
||||||
|
data:
|
||||||
|
requirements.txt: |-
|
||||||
|
{{ .Files.Get "files/requirements.txt" | nindent 4 }}
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: {{ .Release.Name }}-hf-download-script
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/managed-by: {{ .Release.Service | quote }}
|
||||||
|
app.kubernetes.io/instance: {{ .Release.Name | quote }}
|
||||||
|
helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}"
|
||||||
|
data:
|
||||||
|
download_models.py: |-
|
||||||
|
{{ .Files.Get "files/download_models.py" | nindent 4 }}
|
||||||
|
|
@ -0,0 +1,13 @@
|
||||||
|
{{- if .Values.huggingface.token }}
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Secret
|
||||||
|
metadata:
|
||||||
|
name: {{ .Release.Name }}-hf-token-secret
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/managed-by: {{ .Release.Service | quote }}
|
||||||
|
app.kubernetes.io/instance: {{ .Release.Name | quote }}
|
||||||
|
helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}"
|
||||||
|
type: Opaque
|
||||||
|
stringData:
|
||||||
|
token: {{ .Values.huggingface.token }}
|
||||||
|
{{- end }}
|
||||||
|
|
@ -0,0 +1,24 @@
|
||||||
|
{{- if .Values.modelsCachePvc.enabled -}}
|
||||||
|
apiVersion: v1
|
||||||
|
kind: PersistentVolumeClaim
|
||||||
|
metadata:
|
||||||
|
name: {{ .Release.Name }}-models-cache-pvc
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/managed-by: {{ .Release.Service | quote }}
|
||||||
|
app.kubernetes.io/instance: {{ .Release.Name | quote }}
|
||||||
|
helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}"
|
||||||
|
spec:
|
||||||
|
accessModes:
|
||||||
|
{{- toYaml .Values.modelsCachePvc.accessModes | nindent 4 }}
|
||||||
|
{{- /*
|
||||||
|
If storageClassName is set to a specific class, it will be used.
|
||||||
|
If storageClassName is set to an empty string (""), no storage class will be used for provisioning.
|
||||||
|
If storageClassName is null or omitted, the default storage class will be used.
|
||||||
|
*/}}
|
||||||
|
{{- if or .Values.modelsCachePvc.storageClassName (eq .Values.modelsCachePvc.storageClassName "") }}
|
||||||
|
storageClassName: {{ .Values.modelsCachePvc.storageClassName | quote }}
|
||||||
|
{{- end }}
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: {{ .Values.modelsCachePvc.size }}
|
||||||
|
{{- end -}}
|
||||||
|
|
@ -7,29 +7,53 @@ jupyterhub:
|
||||||
password: "changeme"
|
password: "changeme"
|
||||||
|
|
||||||
singleuser:
|
singleuser:
|
||||||
|
fsGid: 100
|
||||||
lifecycleHooks:
|
lifecycleHooks:
|
||||||
postStart:
|
postStart:
|
||||||
exec:
|
exec:
|
||||||
command: ["/bin/sh", "-c", "pip install -r /tmp/requirements.txt"]
|
command: ["/bin/sh", "-c", "pip install -r /tmp/requirements.txt && python /tmp/download_models.py"]
|
||||||
extraVolumes:
|
extraVolumes:
|
||||||
- name: requirements-txt
|
- name: requirements-txt
|
||||||
configMap:
|
configMap:
|
||||||
name: requirements-txt
|
name: "{{ .Release.Name }}-requirements-txt"
|
||||||
|
- name: models-cache
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: "{{ .Release.Name }}-models-cache-pvc"
|
||||||
|
- name: hf-download-script
|
||||||
|
configMap:
|
||||||
|
name: "{{ .Release.Name }}-hf-download-script"
|
||||||
|
- name: hf-token-secret
|
||||||
|
secret:
|
||||||
|
secretName: "{{ .Release.Name }}-hf-token-secret"
|
||||||
|
optional: true
|
||||||
extraVolumeMounts:
|
extraVolumeMounts:
|
||||||
- name: requirements-txt
|
- name: requirements-txt
|
||||||
mountPath: /tmp/requirements.txt
|
mountPath: /tmp/requirements.txt
|
||||||
subPath: requirements.txt
|
subPath: requirements.txt
|
||||||
|
- name: models-cache
|
||||||
|
mountPath: /tmp/models-cache
|
||||||
|
- name: hf-download-script
|
||||||
|
mountPath: /tmp/download_models.py
|
||||||
|
subPath: download_models.py
|
||||||
|
- name: hf-token-secret
|
||||||
|
mountPath: "/etc/secrets/huggingface"
|
||||||
|
readOnly: true
|
||||||
extraEnvVars:
|
extraEnvVars:
|
||||||
RAY_ADDRESS: "{{ tpl .Release.Name $ }}-kuberay-head-svc:6379"
|
RAY_ADDRESS: "{{ .Release.Name }}-kuberay-head-svc:6379"
|
||||||
MLFLOW_TRACKING_URI: "http://{{ tpl .Release.Name $ }}-mlflow-tracking"
|
MLFLOW_TRACKING_URI: "http://{{ .Release.Name }}-mlflow-tracking"
|
||||||
# JUPYTERLAB_DIR: "/opt"
|
# JUPYTERLAB_DIR: "/opt"
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
memory: 16Gi
|
||||||
|
requests:
|
||||||
|
memory: 4Gi
|
||||||
hub:
|
hub:
|
||||||
password: "sneakypass"
|
password: "sneakypass"
|
||||||
extraEnvVars:
|
extraEnvVars:
|
||||||
- name: "RAY_ADDRESS"
|
- name: "RAY_ADDRESS"
|
||||||
value: "{{ tpl .Release.Name $ }}-kuberay-head-svc"
|
value: "{{ .Release.Name }}-kuberay-head-svc"
|
||||||
- name: "MLFLOW_TRACKING_URI"
|
- name: "MLFLOW_TRACKING_URI"
|
||||||
value: "http://{{ tpl .Release.Name $ }}-mlflow-tracking"
|
value: "http://{{ .Release.Name }}-mlflow-tracking"
|
||||||
|
|
||||||
ray-cluster:
|
ray-cluster:
|
||||||
head:
|
head:
|
||||||
|
|
@ -49,3 +73,19 @@ mlflow:
|
||||||
auth:
|
auth:
|
||||||
password: "changemeibegyou"
|
password: "changemeibegyou"
|
||||||
flaskServerSecretKey: "noneedtochangethisone"
|
flaskServerSecretKey: "noneedtochangethisone"
|
||||||
|
|
||||||
|
huggingface:
|
||||||
|
# Provide your Hugging Face token here to download gated or private models.
|
||||||
|
# It is recommended to set this via --set or a separate values file, e.g.,
|
||||||
|
# --set huggingface.token=hf_...
|
||||||
|
token: ""
|
||||||
|
|
||||||
|
modelsCachePvc:
|
||||||
|
enabled: true
|
||||||
|
# To use the default StorageClass, set storageClassName to null or omit it.
|
||||||
|
# To use a specific StorageClass (e.g. "standard-rwo" on GKE), provide its name.
|
||||||
|
# To create a PVC that doesn't request any StorageClass, set it to an empty string ("").
|
||||||
|
storageClassName: null
|
||||||
|
accessModes:
|
||||||
|
- ReadWriteOnce
|
||||||
|
size: 10Gi
|
||||||
|
|
|
||||||
|
|
@ -1,2 +0,0 @@
|
||||||
ray
|
|
||||||
mlflow
|
|
||||||
Loading…
Reference in New Issue