From 9c5faaf3383fbbc0fc9a52ef62bafb356b06855f Mon Sep 17 00:00:00 2001 From: Nikita Aleksandrov Date: Mon, 21 Jul 2025 14:15:37 +0200 Subject: [PATCH] implement models preloading and reusing --- ai/ai-starter-kit/Makefile | 2 +- .../ai-starter-kit/files/download_models.py | 44 ++++++++++++++++ .../ai-starter-kit/files/requirements.txt | 4 ++ .../ai-starter-kit/templates/configmaps.yaml | 29 +++++++++++ .../ai-starter-kit/templates/hf-secret.yaml | 13 +++++ .../ai-starter-kit/templates/pvc.yaml | 24 +++++++++ .../helm-chart/ai-starter-kit/values.yaml | 52 ++++++++++++++++--- ai/ai-starter-kit/requirements.txt | 2 - 8 files changed, 161 insertions(+), 9 deletions(-) create mode 100644 ai/ai-starter-kit/helm-chart/ai-starter-kit/files/download_models.py create mode 100644 ai/ai-starter-kit/helm-chart/ai-starter-kit/files/requirements.txt create mode 100644 ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/configmaps.yaml create mode 100644 ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/hf-secret.yaml create mode 100644 ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/pvc.yaml delete mode 100644 ai/ai-starter-kit/requirements.txt diff --git a/ai/ai-starter-kit/Makefile b/ai/ai-starter-kit/Makefile index 00f3e860..4306b3fa 100644 --- a/ai/ai-starter-kit/Makefile +++ b/ai/ai-starter-kit/Makefile @@ -5,7 +5,7 @@ dep_update: helm dependency update helm-chart/ai-starter-kit install: - helm upgrade --install ai-starter-kit helm-chart/ai-starter-kit --timeout 10m + helm upgrade --install ai-starter-kit helm-chart/ai-starter-kit --set huggingface.token="your_hf_token" --timeout 10m start: minikube start --cpus 4 --memory 8192 diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/download_models.py b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/download_models.py new file mode 100644 index 00000000..2766f3f1 --- /dev/null +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/download_models.py @@ -0,0 +1,44 @@ +from huggingface_hub import login +from pathlib import Path +from transformers import AutoModel, AutoTokenizer + +TOKEN_PATH = Path("/etc/secrets/huggingface/token") +if TOKEN_PATH.is_file(): + print("Hugging Face token file found.") + try: + token = TOKEN_PATH.read_text().strip() + if token: + print("Logging into Hugging Face Hub...") + login(token=token) + print("Login successful.") + else: + print("Token file is empty. Proceeding without login.") + except Exception as e: + print(f"Failed to read token or login: {e}") +else: + print("Hugging Face token not found. Proceeding without login.") + print("Downloads for private or gated models may fail.") + + +# --- Model Download --- +# List your desired Hugging Face model names here +model_names = [ + "Qwen/Qwen3-Embedding-0.6B", +] + +# The cache directory is mounted from a PersistentVolumeClaim +save_base_dir = "/tmp/models-cache" + +for model_name in model_names: + print(f"--- Downloading {model_name} ---") + try: + model = AutoModel.from_pretrained(model_name) + tokenizer = AutoTokenizer.from_pretrained(model_name) + save_dir = f"{save_base_dir}/{model_name}" + model.save_pretrained(save_dir) + tokenizer.save_pretrained(save_dir) + print(f"Successfully cached {model_name} in {save_base_dir}") + except Exception as e: + print(f"Failed to download {model_name}. Error: {e}") + +print("--- Model download process finished. ---") \ No newline at end of file diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/requirements.txt b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/requirements.txt new file mode 100644 index 00000000..0ecae476 --- /dev/null +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/requirements.txt @@ -0,0 +1,4 @@ +transformers +torch +huggingface_hub +pathlib \ No newline at end of file diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/configmaps.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/configmaps.yaml new file mode 100644 index 00000000..23ab3040 --- /dev/null +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/configmaps.yaml @@ -0,0 +1,29 @@ +--- +{{- /* +Create ConfigMaps for jupyterhub singleuser pods. +These ConfigMaps are mounted as volumes. +*/ -}} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ .Release.Name }}-requirements-txt + labels: + app.kubernetes.io/managed-by: {{ .Release.Service | quote }} + app.kubernetes.io/instance: {{ .Release.Name | quote }} + helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" +data: + requirements.txt: |- +{{ .Files.Get "files/requirements.txt" | nindent 4 }} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ .Release.Name }}-hf-download-script + labels: + app.kubernetes.io/managed-by: {{ .Release.Service | quote }} + app.kubernetes.io/instance: {{ .Release.Name | quote }} + helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" +data: + download_models.py: |- +{{ .Files.Get "files/download_models.py" | nindent 4 }} \ No newline at end of file diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/hf-secret.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/hf-secret.yaml new file mode 100644 index 00000000..308b0a94 --- /dev/null +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/hf-secret.yaml @@ -0,0 +1,13 @@ +{{- if .Values.huggingface.token }} +apiVersion: v1 +kind: Secret +metadata: + name: {{ .Release.Name }}-hf-token-secret + labels: + app.kubernetes.io/managed-by: {{ .Release.Service | quote }} + app.kubernetes.io/instance: {{ .Release.Name | quote }} + helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" +type: Opaque +stringData: + token: {{ .Values.huggingface.token }} +{{- end }} \ No newline at end of file diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/pvc.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/pvc.yaml new file mode 100644 index 00000000..8aa70eaf --- /dev/null +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/pvc.yaml @@ -0,0 +1,24 @@ +{{- if .Values.modelsCachePvc.enabled -}} +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: {{ .Release.Name }}-models-cache-pvc + labels: + app.kubernetes.io/managed-by: {{ .Release.Service | quote }} + app.kubernetes.io/instance: {{ .Release.Name | quote }} + helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" +spec: + accessModes: + {{- toYaml .Values.modelsCachePvc.accessModes | nindent 4 }} + {{- /* + If storageClassName is set to a specific class, it will be used. + If storageClassName is set to an empty string (""), no storage class will be used for provisioning. + If storageClassName is null or omitted, the default storage class will be used. + */}} + {{- if or .Values.modelsCachePvc.storageClassName (eq .Values.modelsCachePvc.storageClassName "") }} + storageClassName: {{ .Values.modelsCachePvc.storageClassName | quote }} + {{- end }} + resources: + requests: + storage: {{ .Values.modelsCachePvc.size }} +{{- end -}} \ No newline at end of file diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml index 0ab044a1..66820724 100644 --- a/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml @@ -7,29 +7,53 @@ jupyterhub: password: "changeme" singleuser: + fsGid: 100 lifecycleHooks: postStart: exec: - command: ["/bin/sh", "-c", "pip install -r /tmp/requirements.txt"] + command: ["/bin/sh", "-c", "pip install -r /tmp/requirements.txt && python /tmp/download_models.py"] extraVolumes: - name: requirements-txt configMap: - name: requirements-txt + name: "{{ .Release.Name }}-requirements-txt" + - name: models-cache + persistentVolumeClaim: + claimName: "{{ .Release.Name }}-models-cache-pvc" + - name: hf-download-script + configMap: + name: "{{ .Release.Name }}-hf-download-script" + - name: hf-token-secret + secret: + secretName: "{{ .Release.Name }}-hf-token-secret" + optional: true extraVolumeMounts: - name: requirements-txt mountPath: /tmp/requirements.txt subPath: requirements.txt + - name: models-cache + mountPath: /tmp/models-cache + - name: hf-download-script + mountPath: /tmp/download_models.py + subPath: download_models.py + - name: hf-token-secret + mountPath: "/etc/secrets/huggingface" + readOnly: true extraEnvVars: - RAY_ADDRESS: "{{ tpl .Release.Name $ }}-kuberay-head-svc:6379" - MLFLOW_TRACKING_URI: "http://{{ tpl .Release.Name $ }}-mlflow-tracking" + RAY_ADDRESS: "{{ .Release.Name }}-kuberay-head-svc:6379" + MLFLOW_TRACKING_URI: "http://{{ .Release.Name }}-mlflow-tracking" # JUPYTERLAB_DIR: "/opt" + resources: + limits: + memory: 16Gi + requests: + memory: 4Gi hub: password: "sneakypass" extraEnvVars: - name: "RAY_ADDRESS" - value: "{{ tpl .Release.Name $ }}-kuberay-head-svc" + value: "{{ .Release.Name }}-kuberay-head-svc" - name: "MLFLOW_TRACKING_URI" - value: "http://{{ tpl .Release.Name $ }}-mlflow-tracking" + value: "http://{{ .Release.Name }}-mlflow-tracking" ray-cluster: head: @@ -49,3 +73,19 @@ mlflow: auth: password: "changemeibegyou" flaskServerSecretKey: "noneedtochangethisone" + +huggingface: + # Provide your Hugging Face token here to download gated or private models. + # It is recommended to set this via --set or a separate values file, e.g., + # --set huggingface.token=hf_... + token: "" + +modelsCachePvc: + enabled: true + # To use the default StorageClass, set storageClassName to null or omit it. + # To use a specific StorageClass (e.g. "standard-rwo" on GKE), provide its name. + # To create a PVC that doesn't request any StorageClass, set it to an empty string (""). + storageClassName: null + accessModes: + - ReadWriteOnce + size: 10Gi diff --git a/ai/ai-starter-kit/requirements.txt b/ai/ai-starter-kit/requirements.txt deleted file mode 100644 index aed0c7b2..00000000 --- a/ai/ai-starter-kit/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -ray -mlflow \ No newline at end of file