implement models preloading and reusing

2025-07-21 14:15:37 +02:00 · 2025-07-21 14:15:37 +02:00 · 9c5faaf338
parent 03b2df17b0
commit 9c5faaf338
8 changed files with 161 additions and 9 deletions
--- a/ai/ai-starter-kit/Makefile
+++ b/ai/ai-starter-kit/Makefile
@ -5,7 +5,7 @@ dep_update:
 	helm dependency update helm-chart/ai-starter-kit
 install:
-	helm upgrade --install ai-starter-kit helm-chart/ai-starter-kit --timeout 10m
+	helm upgrade --install ai-starter-kit helm-chart/ai-starter-kit --set huggingface.token="your_hf_token" --timeout 10m
 start:
 	minikube start --cpus 4 --memory 8192
--- a/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/download_models.py
+++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/download_models.py
@ -0,0 +1,44 @@
 from huggingface_hub import login
 from pathlib import Path
 from transformers import AutoModel, AutoTokenizer
 TOKEN_PATH = Path("/etc/secrets/huggingface/token")
 if TOKEN_PATH.is_file():
    print("Hugging Face token file found.")
    try:
        token = TOKEN_PATH.read_text().strip()
        if token:
            print("Logging into Hugging Face Hub...")
            login(token=token)
            print("Login successful.")
        else:
            print("Token file is empty. Proceeding without login.")
    except Exception as e:
        print(f"Failed to read token or login: {e}")
 else:
    print("Hugging Face token not found. Proceeding without login.")
    print("Downloads for private or gated models may fail.")
 # --- Model Download ---
 # List your desired Hugging Face model names here
 model_names = [
    "Qwen/Qwen3-Embedding-0.6B",
 ]
 # The cache directory is mounted from a PersistentVolumeClaim
 save_base_dir = "/tmp/models-cache"
 for model_name in model_names:
    print(f"--- Downloading {model_name} ---")
    try:
         model = AutoModel.from_pretrained(model_name)
         tokenizer = AutoTokenizer.from_pretrained(model_name)
         save_dir = f"{save_base_dir}/{model_name}"
         model.save_pretrained(save_dir)
         tokenizer.save_pretrained(save_dir)
         print(f"Successfully cached {model_name} in {save_base_dir}")
    except Exception as e:
        print(f"Failed to download {model_name}. Error: {e}")
 print("--- Model download process finished. ---")
--- a/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/requirements.txt
+++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/requirements.txt
@ -0,0 +1,4 @@
 transformers
 torch
 huggingface_hub
 pathlib
--- a/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/configmaps.yaml
+++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/configmaps.yaml
@ -0,0 +1,29 @@
 ---
 {{- /*
 Create ConfigMaps for jupyterhub singleuser pods.
 These ConfigMaps are mounted as volumes.
 */ -}}
 ---
 apiVersion: v1
 kind: ConfigMap
 metadata:
  name: {{ .Release.Name }}-requirements-txt
  labels:
    app.kubernetes.io/managed-by: {{ .Release.Service | quote }}
    app.kubernetes.io/instance: {{ .Release.Name | quote }}
    helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}"
 data:
  requirements.txt: |-
 {{ .Files.Get "files/requirements.txt" | nindent 4 }}
 ---
 apiVersion: v1
 kind: ConfigMap
 metadata:
  name: {{ .Release.Name }}-hf-download-script
  labels:
    app.kubernetes.io/managed-by: {{ .Release.Service | quote }}
    app.kubernetes.io/instance: {{ .Release.Name | quote }}
    helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}"
 data:
  download_models.py: |-
 {{ .Files.Get "files/download_models.py" | nindent 4 }}
--- a/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/hf-secret.yaml
+++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/hf-secret.yaml
@ -0,0 +1,13 @@
 {{- if .Values.huggingface.token }}
 apiVersion: v1
 kind: Secret
 metadata:
  name: {{ .Release.Name }}-hf-token-secret
  labels:
    app.kubernetes.io/managed-by: {{ .Release.Service | quote }}
    app.kubernetes.io/instance: {{ .Release.Name | quote }}
    helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}"
 type: Opaque
 stringData:
  token: {{ .Values.huggingface.token }}
 {{- end }}
--- a/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/pvc.yaml
+++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/pvc.yaml
@ -0,0 +1,24 @@
 {{- if .Values.modelsCachePvc.enabled -}}
 apiVersion: v1
 kind: PersistentVolumeClaim
 metadata:
  name: {{ .Release.Name }}-models-cache-pvc
  labels:
    app.kubernetes.io/managed-by: {{ .Release.Service | quote }}
    app.kubernetes.io/instance: {{ .Release.Name | quote }}
    helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}"
 spec:
  accessModes:
    {{- toYaml .Values.modelsCachePvc.accessModes | nindent 4 }}
  {{- /*
    If storageClassName is set to a specific class, it will be used.
    If storageClassName is set to an empty string (""), no storage class will be used for provisioning.
    If storageClassName is null or omitted, the default storage class will be used.
  */}}
  {{- if or .Values.modelsCachePvc.storageClassName (eq .Values.modelsCachePvc.storageClassName "") }}
  storageClassName: {{ .Values.modelsCachePvc.storageClassName | quote }}
  {{- end }}
  resources:
    requests:
      storage: {{ .Values.modelsCachePvc.size }}
 {{- end -}}
--- a/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml
+++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml
@ -7,29 +7,53 @@ jupyterhub:
      password: "changeme"
  singleuser:
    fsGid: 100
    lifecycleHooks:
      postStart:
        exec:
-          command: ["/bin/sh", "-c", "pip install -r /tmp/requirements.txt"]
+          command: ["/bin/sh", "-c", "pip install -r /tmp/requirements.txt && python /tmp/download_models.py"]
    extraVolumes:
      - name: requirements-txt
        configMap:
-          name: requirements-txt
+          name: "{{ .Release.Name }}-requirements-txt"
      - name: models-cache
        persistentVolumeClaim:
          claimName: "{{ .Release.Name }}-models-cache-pvc"
      - name: hf-download-script
        configMap:
          name: "{{ .Release.Name }}-hf-download-script"
      - name: hf-token-secret
        secret:
          secretName: "{{ .Release.Name }}-hf-token-secret"
          optional: true
    extraVolumeMounts:
      - name: requirements-txt
        mountPath: /tmp/requirements.txt
        subPath: requirements.txt
      - name: models-cache
        mountPath: /tmp/models-cache
      - name: hf-download-script
        mountPath: /tmp/download_models.py
        subPath: download_models.py
      - name: hf-token-secret
        mountPath: "/etc/secrets/huggingface"
        readOnly: true
    extraEnvVars:
-        RAY_ADDRESS: "{{ tpl .Release.Name $ }}-kuberay-head-svc:6379"
+        RAY_ADDRESS: "{{ .Release.Name }}-kuberay-head-svc:6379"
-        MLFLOW_TRACKING_URI: "http://{{ tpl .Release.Name $ }}-mlflow-tracking"
+        MLFLOW_TRACKING_URI: "http://{{ .Release.Name }}-mlflow-tracking"
        # JUPYTERLAB_DIR: "/opt"
    resources:
      limits:
        memory: 16Gi
      requests:
        memory: 4Gi
  hub:
    password: "sneakypass"
    extraEnvVars:
        - name: "RAY_ADDRESS"
-          value: "{{ tpl .Release.Name $ }}-kuberay-head-svc"
+          value: "{{ .Release.Name }}-kuberay-head-svc"
        - name: "MLFLOW_TRACKING_URI"
-          value: "http://{{ tpl .Release.Name $ }}-mlflow-tracking"
+          value: "http://{{ .Release.Name }}-mlflow-tracking"
 ray-cluster:
  head:
@ -49,3 +73,19 @@ mlflow:
    auth:
      password: "changemeibegyou"
      flaskServerSecretKey: "noneedtochangethisone"
 huggingface:
  # Provide your Hugging Face token here to download gated or private models.
  # It is recommended to set this via --set or a separate values file, e.g.,
  # --set huggingface.token=hf_...
  token: ""
 modelsCachePvc:
  enabled: true
  # To use the default StorageClass, set storageClassName to null or omit it.
  # To use a specific StorageClass (e.g. "standard-rwo" on GKE), provide its name.
  # To create a PVC that doesn't request any StorageClass, set it to an empty string ("").
  storageClassName: null
  accessModes:
    - ReadWriteOnce
  size: 10Gi
--- a/ai/ai-starter-kit/requirements.txt
+++ b/ai/ai-starter-kit/requirements.txt
@ -1,2 +0,0 @@
 ray
 mlflow