From 9c5faaf3383fbbc0fc9a52ef62bafb356b06855f Mon Sep 17 00:00:00 2001
From: Nikita Aleksandrov <nikita.alexandrov@akvelon.us>
Date: Mon, 21 Jul 2025 14:15:37 +0200
Subject: [PATCH] implement models preloading and reusing

---
 ai/ai-starter-kit/Makefile                    |  2 +-
 .../ai-starter-kit/files/download_models.py   | 44 ++++++++++++++++
 .../ai-starter-kit/files/requirements.txt     |  4 ++
 .../ai-starter-kit/templates/configmaps.yaml  | 29 +++++++++++
 .../ai-starter-kit/templates/hf-secret.yaml   | 13 +++++
 .../ai-starter-kit/templates/pvc.yaml         | 24 +++++++++
 .../helm-chart/ai-starter-kit/values.yaml     | 52 ++++++++++++++++---
 ai/ai-starter-kit/requirements.txt            |  2 -
 8 files changed, 161 insertions(+), 9 deletions(-)
 create mode 100644 ai/ai-starter-kit/helm-chart/ai-starter-kit/files/download_models.py
 create mode 100644 ai/ai-starter-kit/helm-chart/ai-starter-kit/files/requirements.txt
 create mode 100644 ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/configmaps.yaml
 create mode 100644 ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/hf-secret.yaml
 create mode 100644 ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/pvc.yaml
 delete mode 100644 ai/ai-starter-kit/requirements.txt

diff --git a/ai/ai-starter-kit/Makefile b/ai/ai-starter-kit/Makefile
index 00f3e860..4306b3fa 100644
--- a/ai/ai-starter-kit/Makefile
+++ b/ai/ai-starter-kit/Makefile
@@ -5,7 +5,7 @@ dep_update:
 	helm dependency update helm-chart/ai-starter-kit
 
 install:
-	helm upgrade --install ai-starter-kit helm-chart/ai-starter-kit --timeout 10m
+	helm upgrade --install ai-starter-kit helm-chart/ai-starter-kit --set huggingface.token="your_hf_token" --timeout 10m
 
 start:
 	minikube start --cpus 4 --memory 8192
diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/download_models.py b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/download_models.py
new file mode 100644
index 00000000..2766f3f1
--- /dev/null
+++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/download_models.py
@@ -0,0 +1,44 @@
+from huggingface_hub import login
+from pathlib import Path
+from transformers import AutoModel, AutoTokenizer
+
+TOKEN_PATH = Path("/etc/secrets/huggingface/token")
+if TOKEN_PATH.is_file():
+    print("Hugging Face token file found.")
+    try:
+        token = TOKEN_PATH.read_text().strip()
+        if token:
+            print("Logging into Hugging Face Hub...")
+            login(token=token)
+            print("Login successful.")
+        else:
+            print("Token file is empty. Proceeding without login.")
+    except Exception as e:
+        print(f"Failed to read token or login: {e}")
+else:
+    print("Hugging Face token not found. Proceeding without login.")
+    print("Downloads for private or gated models may fail.")
+
+
+# --- Model Download ---
+# List your desired Hugging Face model names here
+model_names = [
+    "Qwen/Qwen3-Embedding-0.6B",
+]
+
+# The cache directory is mounted from a PersistentVolumeClaim
+save_base_dir = "/tmp/models-cache"
+
+for model_name in model_names:
+    print(f"--- Downloading {model_name} ---")
+    try:
+         model = AutoModel.from_pretrained(model_name)
+         tokenizer = AutoTokenizer.from_pretrained(model_name)
+         save_dir = f"{save_base_dir}/{model_name}"
+         model.save_pretrained(save_dir)
+         tokenizer.save_pretrained(save_dir)
+         print(f"Successfully cached {model_name} in {save_base_dir}")
+    except Exception as e:
+        print(f"Failed to download {model_name}. Error: {e}")
+
+print("--- Model download process finished. ---")
\ No newline at end of file
diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/requirements.txt b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/requirements.txt
new file mode 100644
index 00000000..0ecae476
--- /dev/null
+++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/requirements.txt
@@ -0,0 +1,4 @@
+transformers
+torch
+huggingface_hub
+pathlib
\ No newline at end of file
diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/configmaps.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/configmaps.yaml
new file mode 100644
index 00000000..23ab3040
--- /dev/null
+++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/configmaps.yaml
@@ -0,0 +1,29 @@
+---
+{{- /*
+Create ConfigMaps for jupyterhub singleuser pods.
+These ConfigMaps are mounted as volumes.
+*/ -}}
+---
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ .Release.Name }}-requirements-txt
+  labels:
+    app.kubernetes.io/managed-by: {{ .Release.Service | quote }}
+    app.kubernetes.io/instance: {{ .Release.Name | quote }}
+    helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}"
+data:
+  requirements.txt: |-
+{{ .Files.Get "files/requirements.txt" | nindent 4 }}
+---
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ .Release.Name }}-hf-download-script
+  labels:
+    app.kubernetes.io/managed-by: {{ .Release.Service | quote }}
+    app.kubernetes.io/instance: {{ .Release.Name | quote }}
+    helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}"
+data:
+  download_models.py: |-
+{{ .Files.Get "files/download_models.py" | nindent 4 }}
\ No newline at end of file
diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/hf-secret.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/hf-secret.yaml
new file mode 100644
index 00000000..308b0a94
--- /dev/null
+++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/hf-secret.yaml
@@ -0,0 +1,13 @@
+{{- if .Values.huggingface.token }}
+apiVersion: v1
+kind: Secret
+metadata:
+  name: {{ .Release.Name }}-hf-token-secret
+  labels:
+    app.kubernetes.io/managed-by: {{ .Release.Service | quote }}
+    app.kubernetes.io/instance: {{ .Release.Name | quote }}
+    helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}"
+type: Opaque
+stringData:
+  token: {{ .Values.huggingface.token }}
+{{- end }}
\ No newline at end of file
diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/pvc.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/pvc.yaml
new file mode 100644
index 00000000..8aa70eaf
--- /dev/null
+++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/pvc.yaml
@@ -0,0 +1,24 @@
+{{- if .Values.modelsCachePvc.enabled -}}
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: {{ .Release.Name }}-models-cache-pvc
+  labels:
+    app.kubernetes.io/managed-by: {{ .Release.Service | quote }}
+    app.kubernetes.io/instance: {{ .Release.Name | quote }}
+    helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}"
+spec:
+  accessModes:
+    {{- toYaml .Values.modelsCachePvc.accessModes | nindent 4 }}
+  {{- /*
+    If storageClassName is set to a specific class, it will be used.
+    If storageClassName is set to an empty string (""), no storage class will be used for provisioning.
+    If storageClassName is null or omitted, the default storage class will be used.
+  */}}
+  {{- if or .Values.modelsCachePvc.storageClassName (eq .Values.modelsCachePvc.storageClassName "") }}
+  storageClassName: {{ .Values.modelsCachePvc.storageClassName | quote }}
+  {{- end }}
+  resources:
+    requests:
+      storage: {{ .Values.modelsCachePvc.size }}
+{{- end -}}
\ No newline at end of file
diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml
index 0ab044a1..66820724 100644
--- a/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml
+++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml
@@ -7,29 +7,53 @@ jupyterhub:
       password: "changeme"
 
   singleuser:
+    fsGid: 100
     lifecycleHooks:
       postStart:
         exec:
-          command: ["/bin/sh", "-c", "pip install -r /tmp/requirements.txt"]
+          command: ["/bin/sh", "-c", "pip install -r /tmp/requirements.txt && python /tmp/download_models.py"]
     extraVolumes:
       - name: requirements-txt
         configMap:
-          name: requirements-txt
+          name: "{{ .Release.Name }}-requirements-txt"
+      - name: models-cache
+        persistentVolumeClaim:
+          claimName: "{{ .Release.Name }}-models-cache-pvc"
+      - name: hf-download-script
+        configMap:
+          name: "{{ .Release.Name }}-hf-download-script"
+      - name: hf-token-secret
+        secret:
+          secretName: "{{ .Release.Name }}-hf-token-secret"
+          optional: true
     extraVolumeMounts:
       - name: requirements-txt
         mountPath: /tmp/requirements.txt
         subPath: requirements.txt
+      - name: models-cache
+        mountPath: /tmp/models-cache
+      - name: hf-download-script
+        mountPath: /tmp/download_models.py
+        subPath: download_models.py
+      - name: hf-token-secret
+        mountPath: "/etc/secrets/huggingface"
+        readOnly: true
     extraEnvVars:
-        RAY_ADDRESS: "{{ tpl .Release.Name $ }}-kuberay-head-svc:6379"
-        MLFLOW_TRACKING_URI: "http://{{ tpl .Release.Name $ }}-mlflow-tracking"
+        RAY_ADDRESS: "{{ .Release.Name }}-kuberay-head-svc:6379"
+        MLFLOW_TRACKING_URI: "http://{{ .Release.Name }}-mlflow-tracking"
         # JUPYTERLAB_DIR: "/opt"
+    resources:
+      limits:
+        memory: 16Gi
+      requests:
+        memory: 4Gi
   hub:
     password: "sneakypass"
     extraEnvVars:
         - name: "RAY_ADDRESS"
-          value: "{{ tpl .Release.Name $ }}-kuberay-head-svc"
+          value: "{{ .Release.Name }}-kuberay-head-svc"
         - name: "MLFLOW_TRACKING_URI"
-          value: "http://{{ tpl .Release.Name $ }}-mlflow-tracking"
+          value: "http://{{ .Release.Name }}-mlflow-tracking"
 
 ray-cluster:
   head:
@@ -49,3 +73,19 @@ mlflow:
     auth:
       password: "changemeibegyou"
       flaskServerSecretKey: "noneedtochangethisone"
+
+huggingface:
+  # Provide your Hugging Face token here to download gated or private models.
+  # It is recommended to set this via --set or a separate values file, e.g.,
+  # --set huggingface.token=hf_...
+  token: ""
+
+modelsCachePvc:
+  enabled: true
+  # To use the default StorageClass, set storageClassName to null or omit it.
+  # To use a specific StorageClass (e.g. "standard-rwo" on GKE), provide its name.
+  # To create a PVC that doesn't request any StorageClass, set it to an empty string ("").
+  storageClassName: null
+  accessModes:
+    - ReadWriteOnce
+  size: 10Gi
diff --git a/ai/ai-starter-kit/requirements.txt b/ai/ai-starter-kit/requirements.txt
deleted file mode 100644
index aed0c7b2..00000000
--- a/ai/ai-starter-kit/requirements.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-ray
-mlflow
\ No newline at end of file