Merge pull request #2 from volatilemolotov/models-preload

implement models preloading and reusing
2025-07-21 14:39:59 +02:00 · 2025-07-21 14:39:59 +02:00 · 6a5c72becc
parent 9ef2b017db 9c5faaf338
commit 6a5c72becc
8 changed files with 161 additions and 9 deletions
--- a/ai/ai-starter-kit/Makefile
+++ b/ai/ai-starter-kit/Makefile
@ -5,7 +5,7 @@ dep_update:
 	helm dependency update helm-chart/ai-starter-kit

 install:
-	helm upgrade --install ai-starter-kit helm-chart/ai-starter-kit --timeout 10m
+	helm upgrade --install ai-starter-kit helm-chart/ai-starter-kit --set huggingface.token="your_hf_token" --timeout 10m

 start:
 	minikube start --cpus 4 --memory 8192
--- a/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/download_models.py
+++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/download_models.py
@ -0,0 +1,44 @@
+from huggingface_hub import login
+from pathlib import Path
+from transformers import AutoModel, AutoTokenizer
+
+TOKEN_PATH = Path("/etc/secrets/huggingface/token")
+if TOKEN_PATH.is_file():
+    print("Hugging Face token file found.")
+    try:
+        token = TOKEN_PATH.read_text().strip()
+        if token:
+            print("Logging into Hugging Face Hub...")
+            login(token=token)
+            print("Login successful.")
+        else:
+            print("Token file is empty. Proceeding without login.")
+    except Exception as e:
+        print(f"Failed to read token or login: {e}")
+else:
+    print("Hugging Face token not found. Proceeding without login.")
+    print("Downloads for private or gated models may fail.")
+
+
+# --- Model Download ---
+# List your desired Hugging Face model names here
+model_names = [
+    "Qwen/Qwen3-Embedding-0.6B",
+]
+
+# The cache directory is mounted from a PersistentVolumeClaim
+save_base_dir = "/tmp/models-cache"
+
+for model_name in model_names:
+    print(f"--- Downloading {model_name} ---")
+    try:
+         model = AutoModel.from_pretrained(model_name)
+         tokenizer = AutoTokenizer.from_pretrained(model_name)
+         save_dir = f"{save_base_dir}/{model_name}"
+         model.save_pretrained(save_dir)
+         tokenizer.save_pretrained(save_dir)
+         print(f"Successfully cached {model_name} in {save_base_dir}")
+    except Exception as e:
+        print(f"Failed to download {model_name}. Error: {e}")
+
+print("--- Model download process finished. ---")
--- a/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/requirements.txt
+++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/requirements.txt
@ -0,0 +1,4 @@
+transformers
+torch
+huggingface_hub
+pathlib
--- a/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/configmaps.yaml
+++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/configmaps.yaml
@ -0,0 +1,29 @@
+---
+{{- /*
+Create ConfigMaps for jupyterhub singleuser pods.
+These ConfigMaps are mounted as volumes.
+*/ -}}
+---
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ .Release.Name }}-requirements-txt
+  labels:
+    app.kubernetes.io/managed-by: {{ .Release.Service | quote }}
+    app.kubernetes.io/instance: {{ .Release.Name | quote }}
+    helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}"
+data:
+  requirements.txt: |-
+{{ .Files.Get "files/requirements.txt" | nindent 4 }}
+---
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ .Release.Name }}-hf-download-script
+  labels:
+    app.kubernetes.io/managed-by: {{ .Release.Service | quote }}
+    app.kubernetes.io/instance: {{ .Release.Name | quote }}
+    helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}"
+data:
+  download_models.py: |-
+{{ .Files.Get "files/download_models.py" | nindent 4 }}
--- a/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/hf-secret.yaml
+++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/hf-secret.yaml
@ -0,0 +1,13 @@
+{{- if .Values.huggingface.token }}
+apiVersion: v1
+kind: Secret
+metadata:
+  name: {{ .Release.Name }}-hf-token-secret
+  labels:
+    app.kubernetes.io/managed-by: {{ .Release.Service | quote }}
+    app.kubernetes.io/instance: {{ .Release.Name | quote }}
+    helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}"
+type: Opaque
+stringData:
+  token: {{ .Values.huggingface.token }}
+{{- end }}
--- a/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/pvc.yaml
+++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/pvc.yaml
@ -0,0 +1,24 @@
+{{- if .Values.modelsCachePvc.enabled -}}
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: {{ .Release.Name }}-models-cache-pvc
+  labels:
+    app.kubernetes.io/managed-by: {{ .Release.Service | quote }}
+    app.kubernetes.io/instance: {{ .Release.Name | quote }}
+    helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}"
+spec:
+  accessModes:
+    {{- toYaml .Values.modelsCachePvc.accessModes | nindent 4 }}
+  {{- /*
+    If storageClassName is set to a specific class, it will be used.
+    If storageClassName is set to an empty string (""), no storage class will be used for provisioning.
+    If storageClassName is null or omitted, the default storage class will be used.
+  */}}
+  {{- if or .Values.modelsCachePvc.storageClassName (eq .Values.modelsCachePvc.storageClassName "") }}
+  storageClassName: {{ .Values.modelsCachePvc.storageClassName | quote }}
+  {{- end }}
+  resources:
+    requests:
+      storage: {{ .Values.modelsCachePvc.size }}
+{{- end -}}
--- a/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml
+++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml
@ -7,29 +7,53 @@ jupyterhub:
      password: "changeme"

  singleuser:
+    fsGid: 100
    lifecycleHooks:
      postStart:
        exec:
-          command: ["/bin/sh", "-c", "pip install -r /tmp/requirements.txt"]
+          command: ["/bin/sh", "-c", "pip install -r /tmp/requirements.txt && python /tmp/download_models.py"]
    extraVolumes:
      - name: requirements-txt
        configMap:
-          name: requirements-txt
+          name: "{{ .Release.Name }}-requirements-txt"
+      - name: models-cache
+        persistentVolumeClaim:
+          claimName: "{{ .Release.Name }}-models-cache-pvc"
+      - name: hf-download-script
+        configMap:
+          name: "{{ .Release.Name }}-hf-download-script"
+      - name: hf-token-secret
+        secret:
+          secretName: "{{ .Release.Name }}-hf-token-secret"
+          optional: true
    extraVolumeMounts:
      - name: requirements-txt
        mountPath: /tmp/requirements.txt
        subPath: requirements.txt
+      - name: models-cache
+        mountPath: /tmp/models-cache
+      - name: hf-download-script
+        mountPath: /tmp/download_models.py
+        subPath: download_models.py
+      - name: hf-token-secret
+        mountPath: "/etc/secrets/huggingface"
+        readOnly: true
    extraEnvVars:
-        RAY_ADDRESS: "{{ tpl .Release.Name $ }}-kuberay-head-svc:6379"
-        MLFLOW_TRACKING_URI: "http://{{ tpl .Release.Name $ }}-mlflow-tracking"
+        RAY_ADDRESS: "{{ .Release.Name }}-kuberay-head-svc:6379"
+        MLFLOW_TRACKING_URI: "http://{{ .Release.Name }}-mlflow-tracking"
        # JUPYTERLAB_DIR: "/opt"
+    resources:
+      limits:
+        memory: 16Gi
+      requests:
+        memory: 4Gi
  hub:
    password: "sneakypass"
    extraEnvVars:
        - name: "RAY_ADDRESS"
-          value: "{{ tpl .Release.Name $ }}-kuberay-head-svc"
+          value: "{{ .Release.Name }}-kuberay-head-svc"
        - name: "MLFLOW_TRACKING_URI"
-          value: "http://{{ tpl .Release.Name $ }}-mlflow-tracking"
+          value: "http://{{ .Release.Name }}-mlflow-tracking"

 ray-cluster:
  head:
@ -49,3 +73,19 @@ mlflow:
    auth:
      password: "changemeibegyou"
      flaskServerSecretKey: "noneedtochangethisone"
+
+huggingface:
+  # Provide your Hugging Face token here to download gated or private models.
+  # It is recommended to set this via --set or a separate values file, e.g.,
+  # --set huggingface.token=hf_...
+  token: ""
+
+modelsCachePvc:
+  enabled: true
+  # To use the default StorageClass, set storageClassName to null or omit it.
+  # To use a specific StorageClass (e.g. "standard-rwo" on GKE), provide its name.
+  # To create a PVC that doesn't request any StorageClass, set it to an empty string ("").
+  storageClassName: null
+  accessModes:
+    - ReadWriteOnce
+  size: 10Gi
--- a/ai/ai-starter-kit/requirements.txt
+++ b/ai/ai-starter-kit/requirements.txt
@ -1,2 +0,0 @@
-ray
-mlflow