From c27000c787468945bf6b03cf4909847e88e1563b Mon Sep 17 00:00:00 2001 From: Nikita Aleksandrov Date: Tue, 29 Jul 2025 11:47:46 +0200 Subject: [PATCH] add sample notebook preload --- .../ai-starter-kit/files/requirements.txt | 3 +- .../ai-starter-kit/files/welcome.ipynb | 104 ++++++++++++++++++ .../ai-starter-kit/templates/configmaps.yaml | 14 ++- .../helm-chart/ai-starter-kit/values.yaml | 11 ++ 4 files changed, 130 insertions(+), 2 deletions(-) create mode 100644 ai/ai-starter-kit/helm-chart/ai-starter-kit/files/welcome.ipynb diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/requirements.txt b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/requirements.txt index 0ecae476..511ba1bd 100644 --- a/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/requirements.txt +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/requirements.txt @@ -1,4 +1,5 @@ transformers torch huggingface_hub -pathlib \ No newline at end of file +numpy +ipywidgets \ No newline at end of file diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/welcome.ipynb b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/welcome.ipynb new file mode 100644 index 00000000..8c900abc --- /dev/null +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/welcome.ipynb @@ -0,0 +1,104 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 5, + "id": "8048aa56-4549-4afa-b8b0-d111cc7020c3", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[0.7645573019981384, 0.14142529666423798], [0.13549786806106567, 0.5999557375907898]]\n" + ] + } + ], + "source": [ + "# Requires transformers>=4.51.0\n", + "\n", + "import torch\n", + "import torch.nn.functional as F\n", + "\n", + "from torch import Tensor\n", + "from transformers import AutoTokenizer, AutoModel\n", + "\n", + "\n", + "def last_token_pool(last_hidden_states: Tensor,\n", + " attention_mask: Tensor) -> Tensor:\n", + " left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])\n", + " if left_padding:\n", + " return last_hidden_states[:, -1]\n", + " else:\n", + " sequence_lengths = attention_mask.sum(dim=1) - 1\n", + " batch_size = last_hidden_states.shape[0]\n", + " return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]\n", + "\n", + "\n", + "def get_detailed_instruct(task_description: str, query: str) -> str:\n", + " return f'Instruct: {task_description}\\nQuery:{query}'\n", + "\n", + "# Each query must come with a one-sentence instruction that describes the task\n", + "task = 'Given a web search query, retrieve relevant passages that answer the query'\n", + "\n", + "queries = [\n", + " get_detailed_instruct(task, 'What is the capital of China?'),\n", + " get_detailed_instruct(task, 'Explain gravity')\n", + "]\n", + "# No need to add instruction for retrieval documents\n", + "documents = [\n", + " \"The capital of China is Beijing.\",\n", + " \"Gravity is a force that attracts two bodies towards each other. It gives weight to physical objects and is responsible for the movement of planets around the sun.\"\n", + "]\n", + "input_texts = queries + documents\n", + "\n", + "tokenizer = AutoTokenizer.from_pretrained('/tmp/models-cache/Qwen/Qwen3-Embedding-0.6B', padding_side='left')\n", + "model = AutoModel.from_pretrained('/tmp/models-cache/Qwen/Qwen3-Embedding-0.6B')\n", + "\n", + "# We recommend enabling flash_attention_2 for better acceleration and memory saving.\n", + "# model = AutoModel.from_pretrained('Qwen/Qwen3-Embedding-0.6B', attn_implementation=\"flash_attention_2\", torch_dtype=torch.float16).cuda()\n", + "\n", + "max_length = 8192\n", + "\n", + "# Tokenize the input texts\n", + "batch_dict = tokenizer(\n", + " input_texts,\n", + " padding=True,\n", + " truncation=True,\n", + " max_length=max_length,\n", + " return_tensors=\"pt\",\n", + ")\n", + "batch_dict.to(model.device)\n", + "outputs = model(**batch_dict)\n", + "embeddings = last_token_pool(outputs.last_hidden_state, batch_dict['attention_mask'])\n", + "\n", + "# normalize embeddings\n", + "embeddings = F.normalize(embeddings, p=2, dim=1)\n", + "scores = (embeddings[:2] @ embeddings[2:].T)\n", + "print(scores.tolist())\n", + "# [[0.7645568251609802, 0.14142508804798126], [0.13549736142158508, 0.5999549627304077]]\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/configmaps.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/configmaps.yaml index 23ab3040..0eb760b6 100644 --- a/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/configmaps.yaml +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/configmaps.yaml @@ -26,4 +26,16 @@ metadata: helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" data: download_models.py: |- -{{ .Files.Get "files/download_models.py" | nindent 4 }} \ No newline at end of file +{{ .Files.Get "files/download_models.py" | nindent 4 }} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ .Release.Name }}-welcome-notebook + labels: + app.kubernetes.io/managed-by: {{ .Release.Service | quote }} + app.kubernetes.io/instance: {{ .Release.Name | quote }} + helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" +data: + welcome.ipynb: |- +{{ .Files.Get "files/welcome.ipynb" | nindent 4 }} \ No newline at end of file diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml index ea46acc1..b28baa3e 100644 --- a/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml @@ -8,6 +8,11 @@ jupyterhub: singleuser: fsGid: 100 + defaultUrl: "/lab/tree/welcome.ipynb" + lifecycleHooks: + postStart: + exec: + command: ["/bin/sh", "-c", "pip install -r /tmp/requirements.txt"] initContainers: - name: model-initializer image: python:3.9-slim-bullseye @@ -47,6 +52,9 @@ jupyterhub: - name: hf-download-script configMap: name: "{{ .Release.Name }}-hf-download-script" + - name: welcome-notebook + configMap: + name: "{{ .Release.Name }}-welcome-notebook" - name: hf-token-secret secret: secretName: "{{ .Release.Name }}-hf-token-secret" @@ -60,6 +68,9 @@ jupyterhub: - name: hf-download-script mountPath: /tmp/download_models.py subPath: download_models.py + - name: welcome-notebook + mountPath: /tmp/welcome.ipynb + subPath: welcome.ipynb - name: hf-token-secret mountPath: "/etc/secrets/huggingface" readOnly: true