Merge pull request #2 from volatilemolotov/models-preload
implement models preloading and reusing
This commit is contained in:
commit
6a5c72becc
|
|
@ -5,7 +5,7 @@ dep_update:
|
|||
helm dependency update helm-chart/ai-starter-kit
|
||||
|
||||
install:
|
||||
helm upgrade --install ai-starter-kit helm-chart/ai-starter-kit --timeout 10m
|
||||
helm upgrade --install ai-starter-kit helm-chart/ai-starter-kit --set huggingface.token="your_hf_token" --timeout 10m
|
||||
|
||||
start:
|
||||
minikube start --cpus 4 --memory 8192
|
||||
|
|
|
|||
|
|
@ -0,0 +1,44 @@
|
|||
from huggingface_hub import login
|
||||
from pathlib import Path
|
||||
from transformers import AutoModel, AutoTokenizer
|
||||
|
||||
TOKEN_PATH = Path("/etc/secrets/huggingface/token")
|
||||
if TOKEN_PATH.is_file():
|
||||
print("Hugging Face token file found.")
|
||||
try:
|
||||
token = TOKEN_PATH.read_text().strip()
|
||||
if token:
|
||||
print("Logging into Hugging Face Hub...")
|
||||
login(token=token)
|
||||
print("Login successful.")
|
||||
else:
|
||||
print("Token file is empty. Proceeding without login.")
|
||||
except Exception as e:
|
||||
print(f"Failed to read token or login: {e}")
|
||||
else:
|
||||
print("Hugging Face token not found. Proceeding without login.")
|
||||
print("Downloads for private or gated models may fail.")
|
||||
|
||||
|
||||
# --- Model Download ---
|
||||
# List your desired Hugging Face model names here
|
||||
model_names = [
|
||||
"Qwen/Qwen3-Embedding-0.6B",
|
||||
]
|
||||
|
||||
# The cache directory is mounted from a PersistentVolumeClaim
|
||||
save_base_dir = "/tmp/models-cache"
|
||||
|
||||
for model_name in model_names:
|
||||
print(f"--- Downloading {model_name} ---")
|
||||
try:
|
||||
model = AutoModel.from_pretrained(model_name)
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
||||
save_dir = f"{save_base_dir}/{model_name}"
|
||||
model.save_pretrained(save_dir)
|
||||
tokenizer.save_pretrained(save_dir)
|
||||
print(f"Successfully cached {model_name} in {save_base_dir}")
|
||||
except Exception as e:
|
||||
print(f"Failed to download {model_name}. Error: {e}")
|
||||
|
||||
print("--- Model download process finished. ---")
|
||||
|
|
@ -0,0 +1,4 @@
|
|||
transformers
|
||||
torch
|
||||
huggingface_hub
|
||||
pathlib
|
||||
|
|
@ -0,0 +1,29 @@
|
|||
---
|
||||
{{- /*
|
||||
Create ConfigMaps for jupyterhub singleuser pods.
|
||||
These ConfigMaps are mounted as volumes.
|
||||
*/ -}}
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: {{ .Release.Name }}-requirements-txt
|
||||
labels:
|
||||
app.kubernetes.io/managed-by: {{ .Release.Service | quote }}
|
||||
app.kubernetes.io/instance: {{ .Release.Name | quote }}
|
||||
helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}"
|
||||
data:
|
||||
requirements.txt: |-
|
||||
{{ .Files.Get "files/requirements.txt" | nindent 4 }}
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: {{ .Release.Name }}-hf-download-script
|
||||
labels:
|
||||
app.kubernetes.io/managed-by: {{ .Release.Service | quote }}
|
||||
app.kubernetes.io/instance: {{ .Release.Name | quote }}
|
||||
helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}"
|
||||
data:
|
||||
download_models.py: |-
|
||||
{{ .Files.Get "files/download_models.py" | nindent 4 }}
|
||||
|
|
@ -0,0 +1,13 @@
|
|||
{{- if .Values.huggingface.token }}
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: {{ .Release.Name }}-hf-token-secret
|
||||
labels:
|
||||
app.kubernetes.io/managed-by: {{ .Release.Service | quote }}
|
||||
app.kubernetes.io/instance: {{ .Release.Name | quote }}
|
||||
helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}"
|
||||
type: Opaque
|
||||
stringData:
|
||||
token: {{ .Values.huggingface.token }}
|
||||
{{- end }}
|
||||
|
|
@ -0,0 +1,24 @@
|
|||
{{- if .Values.modelsCachePvc.enabled -}}
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: {{ .Release.Name }}-models-cache-pvc
|
||||
labels:
|
||||
app.kubernetes.io/managed-by: {{ .Release.Service | quote }}
|
||||
app.kubernetes.io/instance: {{ .Release.Name | quote }}
|
||||
helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}"
|
||||
spec:
|
||||
accessModes:
|
||||
{{- toYaml .Values.modelsCachePvc.accessModes | nindent 4 }}
|
||||
{{- /*
|
||||
If storageClassName is set to a specific class, it will be used.
|
||||
If storageClassName is set to an empty string (""), no storage class will be used for provisioning.
|
||||
If storageClassName is null or omitted, the default storage class will be used.
|
||||
*/}}
|
||||
{{- if or .Values.modelsCachePvc.storageClassName (eq .Values.modelsCachePvc.storageClassName "") }}
|
||||
storageClassName: {{ .Values.modelsCachePvc.storageClassName | quote }}
|
||||
{{- end }}
|
||||
resources:
|
||||
requests:
|
||||
storage: {{ .Values.modelsCachePvc.size }}
|
||||
{{- end -}}
|
||||
|
|
@ -7,29 +7,53 @@ jupyterhub:
|
|||
password: "changeme"
|
||||
|
||||
singleuser:
|
||||
fsGid: 100
|
||||
lifecycleHooks:
|
||||
postStart:
|
||||
exec:
|
||||
command: ["/bin/sh", "-c", "pip install -r /tmp/requirements.txt"]
|
||||
command: ["/bin/sh", "-c", "pip install -r /tmp/requirements.txt && python /tmp/download_models.py"]
|
||||
extraVolumes:
|
||||
- name: requirements-txt
|
||||
configMap:
|
||||
name: requirements-txt
|
||||
name: "{{ .Release.Name }}-requirements-txt"
|
||||
- name: models-cache
|
||||
persistentVolumeClaim:
|
||||
claimName: "{{ .Release.Name }}-models-cache-pvc"
|
||||
- name: hf-download-script
|
||||
configMap:
|
||||
name: "{{ .Release.Name }}-hf-download-script"
|
||||
- name: hf-token-secret
|
||||
secret:
|
||||
secretName: "{{ .Release.Name }}-hf-token-secret"
|
||||
optional: true
|
||||
extraVolumeMounts:
|
||||
- name: requirements-txt
|
||||
mountPath: /tmp/requirements.txt
|
||||
subPath: requirements.txt
|
||||
- name: models-cache
|
||||
mountPath: /tmp/models-cache
|
||||
- name: hf-download-script
|
||||
mountPath: /tmp/download_models.py
|
||||
subPath: download_models.py
|
||||
- name: hf-token-secret
|
||||
mountPath: "/etc/secrets/huggingface"
|
||||
readOnly: true
|
||||
extraEnvVars:
|
||||
RAY_ADDRESS: "{{ tpl .Release.Name $ }}-kuberay-head-svc:6379"
|
||||
MLFLOW_TRACKING_URI: "http://{{ tpl .Release.Name $ }}-mlflow-tracking"
|
||||
RAY_ADDRESS: "{{ .Release.Name }}-kuberay-head-svc:6379"
|
||||
MLFLOW_TRACKING_URI: "http://{{ .Release.Name }}-mlflow-tracking"
|
||||
# JUPYTERLAB_DIR: "/opt"
|
||||
resources:
|
||||
limits:
|
||||
memory: 16Gi
|
||||
requests:
|
||||
memory: 4Gi
|
||||
hub:
|
||||
password: "sneakypass"
|
||||
extraEnvVars:
|
||||
- name: "RAY_ADDRESS"
|
||||
value: "{{ tpl .Release.Name $ }}-kuberay-head-svc"
|
||||
value: "{{ .Release.Name }}-kuberay-head-svc"
|
||||
- name: "MLFLOW_TRACKING_URI"
|
||||
value: "http://{{ tpl .Release.Name $ }}-mlflow-tracking"
|
||||
value: "http://{{ .Release.Name }}-mlflow-tracking"
|
||||
|
||||
ray-cluster:
|
||||
head:
|
||||
|
|
@ -49,3 +73,19 @@ mlflow:
|
|||
auth:
|
||||
password: "changemeibegyou"
|
||||
flaskServerSecretKey: "noneedtochangethisone"
|
||||
|
||||
huggingface:
|
||||
# Provide your Hugging Face token here to download gated or private models.
|
||||
# It is recommended to set this via --set or a separate values file, e.g.,
|
||||
# --set huggingface.token=hf_...
|
||||
token: ""
|
||||
|
||||
modelsCachePvc:
|
||||
enabled: true
|
||||
# To use the default StorageClass, set storageClassName to null or omit it.
|
||||
# To use a specific StorageClass (e.g. "standard-rwo" on GKE), provide its name.
|
||||
# To create a PVC that doesn't request any StorageClass, set it to an empty string ("").
|
||||
storageClassName: null
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
size: 10Gi
|
||||
|
|
|
|||
|
|
@ -1,2 +0,0 @@
|
|||
ray
|
||||
mlflow
|
||||
Loading…
Reference in New Issue