From c27000c787468945bf6b03cf4909847e88e1563b Mon Sep 17 00:00:00 2001
From: Nikita Aleksandrov <nikita.alexandrov@akvelon.us>
Date: Tue, 29 Jul 2025 11:47:46 +0200
Subject: [PATCH] add sample notebook preload

---
 .../ai-starter-kit/files/requirements.txt     |   3 +-
 .../ai-starter-kit/files/welcome.ipynb        | 104 ++++++++++++++++++
 .../ai-starter-kit/templates/configmaps.yaml  |  14 ++-
 .../helm-chart/ai-starter-kit/values.yaml     |  11 ++
 4 files changed, 130 insertions(+), 2 deletions(-)
 create mode 100644 ai/ai-starter-kit/helm-chart/ai-starter-kit/files/welcome.ipynb

diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/requirements.txt b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/requirements.txt
index 0ecae476..511ba1bd 100644
--- a/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/requirements.txt
+++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/requirements.txt
@@ -1,4 +1,5 @@
 transformers
 torch
 huggingface_hub
-pathlib
\ No newline at end of file
+numpy
+ipywidgets
\ No newline at end of file
diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/welcome.ipynb b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/welcome.ipynb
new file mode 100644
index 00000000..8c900abc
--- /dev/null
+++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/welcome.ipynb
@@ -0,0 +1,104 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "8048aa56-4549-4afa-b8b0-d111cc7020c3",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[[0.7645573019981384, 0.14142529666423798], [0.13549786806106567, 0.5999557375907898]]\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Requires transformers>=4.51.0\n",
+    "\n",
+    "import torch\n",
+    "import torch.nn.functional as F\n",
+    "\n",
+    "from torch import Tensor\n",
+    "from transformers import AutoTokenizer, AutoModel\n",
+    "\n",
+    "\n",
+    "def last_token_pool(last_hidden_states: Tensor,\n",
+    "                 attention_mask: Tensor) -> Tensor:\n",
+    "    left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])\n",
+    "    if left_padding:\n",
+    "        return last_hidden_states[:, -1]\n",
+    "    else:\n",
+    "        sequence_lengths = attention_mask.sum(dim=1) - 1\n",
+    "        batch_size = last_hidden_states.shape[0]\n",
+    "        return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]\n",
+    "\n",
+    "\n",
+    "def get_detailed_instruct(task_description: str, query: str) -> str:\n",
+    "    return f'Instruct: {task_description}\\nQuery:{query}'\n",
+    "\n",
+    "# Each query must come with a one-sentence instruction that describes the task\n",
+    "task = 'Given a web search query, retrieve relevant passages that answer the query'\n",
+    "\n",
+    "queries = [\n",
+    "    get_detailed_instruct(task, 'What is the capital of China?'),\n",
+    "    get_detailed_instruct(task, 'Explain gravity')\n",
+    "]\n",
+    "# No need to add instruction for retrieval documents\n",
+    "documents = [\n",
+    "    \"The capital of China is Beijing.\",\n",
+    "    \"Gravity is a force that attracts two bodies towards each other. It gives weight to physical objects and is responsible for the movement of planets around the sun.\"\n",
+    "]\n",
+    "input_texts = queries + documents\n",
+    "\n",
+    "tokenizer = AutoTokenizer.from_pretrained('/tmp/models-cache/Qwen/Qwen3-Embedding-0.6B', padding_side='left')\n",
+    "model = AutoModel.from_pretrained('/tmp/models-cache/Qwen/Qwen3-Embedding-0.6B')\n",
+    "\n",
+    "# We recommend enabling flash_attention_2 for better acceleration and memory saving.\n",
+    "# model = AutoModel.from_pretrained('Qwen/Qwen3-Embedding-0.6B', attn_implementation=\"flash_attention_2\", torch_dtype=torch.float16).cuda()\n",
+    "\n",
+    "max_length = 8192\n",
+    "\n",
+    "# Tokenize the input texts\n",
+    "batch_dict = tokenizer(\n",
+    "    input_texts,\n",
+    "    padding=True,\n",
+    "    truncation=True,\n",
+    "    max_length=max_length,\n",
+    "    return_tensors=\"pt\",\n",
+    ")\n",
+    "batch_dict.to(model.device)\n",
+    "outputs = model(**batch_dict)\n",
+    "embeddings = last_token_pool(outputs.last_hidden_state, batch_dict['attention_mask'])\n",
+    "\n",
+    "# normalize embeddings\n",
+    "embeddings = F.normalize(embeddings, p=2, dim=1)\n",
+    "scores = (embeddings[:2] @ embeddings[2:].T)\n",
+    "print(scores.tolist())\n",
+    "# [[0.7645568251609802, 0.14142508804798126], [0.13549736142158508, 0.5999549627304077]]\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/configmaps.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/configmaps.yaml
index 23ab3040..0eb760b6 100644
--- a/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/configmaps.yaml
+++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/configmaps.yaml
@@ -26,4 +26,16 @@ metadata:
     helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}"
 data:
   download_models.py: |-
-{{ .Files.Get "files/download_models.py" | nindent 4 }}
\ No newline at end of file
+{{ .Files.Get "files/download_models.py" | nindent 4 }}
+---
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ .Release.Name }}-welcome-notebook
+  labels:
+    app.kubernetes.io/managed-by: {{ .Release.Service | quote }}
+    app.kubernetes.io/instance: {{ .Release.Name | quote }}
+    helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}"
+data:
+  welcome.ipynb: |-
+{{ .Files.Get "files/welcome.ipynb" | nindent 4 }}
\ No newline at end of file
diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml
index ea46acc1..b28baa3e 100644
--- a/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml
+++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml
@@ -8,6 +8,11 @@ jupyterhub:
 
   singleuser:
     fsGid: 100
+    defaultUrl: "/lab/tree/welcome.ipynb"
+    lifecycleHooks:
+      postStart:
+        exec:
+          command: ["/bin/sh", "-c", "pip install -r /tmp/requirements.txt"]
     initContainers:
       - name: model-initializer
         image: python:3.9-slim-bullseye
@@ -47,6 +52,9 @@ jupyterhub:
       - name: hf-download-script
         configMap:
           name: "{{ .Release.Name }}-hf-download-script"
+      - name: welcome-notebook
+        configMap:
+          name: "{{ .Release.Name }}-welcome-notebook"
       - name: hf-token-secret
         secret:
           secretName: "{{ .Release.Name }}-hf-token-secret"
@@ -60,6 +68,9 @@ jupyterhub:
       - name: hf-download-script
         mountPath: /tmp/download_models.py
         subPath: download_models.py
+      - name: welcome-notebook
+        mountPath: /tmp/welcome.ipynb
+        subPath: welcome.ipynb
       - name: hf-token-secret
         mountPath: "/etc/secrets/huggingface"
         readOnly: true