From b71c7007c843993fbd2a3dbc7156010191a1bce5 Mon Sep 17 00:00:00 2001
From: Dima Drogovoz <drogowoz@gmail.com>
Date: Fri, 8 Aug 2025 12:16:39 +0100
Subject: [PATCH 1/6] change kuberay docker image

---
 .../helm-chart/ai-starter-kit/values.yaml          | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml
index 3b80b988..b0c56eaf 100644
--- a/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml
+++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml
@@ -75,7 +75,7 @@ jupyterhub:
         mountPath: "/etc/secrets/huggingface"
         readOnly: true
     extraEnvVars:
-        RAY_ADDRESS: "{{ .Release.Name }}-kuberay-head-svc:6379"
+        RAY_ADDRESS: "ray://{{ .Release.Name }}-kuberay-head-svc:10001"
         MLFLOW_TRACKING_URI: "http://{{ .Release.Name }}-mlflow-tracking"
         # JUPYTERLAB_DIR: "/opt"
     resources:
@@ -85,13 +85,15 @@ jupyterhub:
         memory: 4Gi
   hub:
     password: "sneakypass"
-    extraEnvVars:
-        - name: "RAY_ADDRESS"
-          value: "{{ .Release.Name }}-kuberay-head-svc"
-        - name: "MLFLOW_TRACKING_URI"
-          value: "http://{{ .Release.Name }}-mlflow-tracking"
+    # extraEnvVars:
+    #     - name: "RAY_ADDRESS"
+    #       value: "{{ .Release.Name }}-kuberay-head-svc"
+    #     - name: "MLFLOW_TRACKING_URI"
+    #       value: "http://{{ .Release.Name }}-mlflow-tracking"
 
 ray-cluster:
+  image:
+    tag: 2.48.0.2c63f6-py312-cpu-aarch64
   head:
     serviceType: ClusterIP
     resources:

From 0e5908d1eb703f6a4bb5a0ae70d53c6745dd8d7b Mon Sep 17 00:00:00 2001
From: Dima Drogovoz <drogowoz@gmail.com>
Date: Fri, 8 Aug 2025 12:18:32 +0100
Subject: [PATCH 2/6] add ray.ipynb

---
 .../helm-chart/ai-starter-kit/files/ray.ipynb | 295 ++++++++++++++++++
 1 file changed, 295 insertions(+)
 create mode 100644 ai/ai-starter-kit/helm-chart/ai-starter-kit/files/ray.ipynb

diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/ray.ipynb b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/ray.ipynb
new file mode 100644
index 00000000..daeb6c88
--- /dev/null
+++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/ray.ipynb
@@ -0,0 +1,295 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "8dd42829-7ccc-4d00-9d5c-b206c93d6586",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Defaulting to user installation because normal site-packages is not writeable\n",
+      "Collecting ray[client,default]\n",
+      "  Downloading ray-2.48.0-cp312-cp312-manylinux2014_aarch64.whl.metadata (19 kB)\n",
+      "Requirement already satisfied: click>=7.0 in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from ray[client,default]) (8.2.1)\n",
+      "Requirement already satisfied: filelock in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from ray[client,default]) (3.18.0)\n",
+      "Requirement already satisfied: jsonschema in /opt/bitnami/miniforge/lib/python3.12/site-packages (from ray[client,default]) (4.24.0)\n",
+      "Collecting msgpack<2.0.0,>=1.0.0 (from ray[client,default])\n",
+      "  Downloading msgpack-1.1.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl.metadata (8.4 kB)\n",
+      "Requirement already satisfied: packaging in /opt/bitnami/miniforge/lib/python3.12/site-packages (from ray[client,default]) (25.0)\n",
+      "Requirement already satisfied: protobuf!=3.19.5,>=3.15.3 in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from ray[client,default]) (6.31.1)\n",
+      "Requirement already satisfied: pyyaml in /opt/bitnami/miniforge/lib/python3.12/site-packages (from ray[client,default]) (6.0.2)\n",
+      "Requirement already satisfied: requests in /opt/bitnami/miniforge/lib/python3.12/site-packages (from ray[client,default]) (2.32.3)\n",
+      "Collecting grpcio (from ray[client,default])\n",
+      "  Downloading grpcio-1.74.0-cp312-cp312-manylinux_2_17_aarch64.whl.metadata (3.8 kB)\n",
+      "Requirement already satisfied: aiohttp>=3.7 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from ray[client,default]) (3.12.12)\n",
+      "Collecting aiohttp_cors (from ray[client,default])\n",
+      "  Downloading aiohttp_cors-0.8.1-py3-none-any.whl.metadata (20 kB)\n",
+      "Collecting colorful (from ray[client,default])\n",
+      "  Downloading colorful-0.5.7-py2.py3-none-any.whl.metadata (16 kB)\n",
+      "Collecting py-spy>=0.4.0 (from ray[client,default])\n",
+      "  Downloading py_spy-0.4.1-py2.py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl.metadata (510 bytes)\n",
+      "Collecting opencensus (from ray[client,default])\n",
+      "  Downloading opencensus-0.11.4-py2.py3-none-any.whl.metadata (12 kB)\n",
+      "Requirement already satisfied: opentelemetry-sdk>=1.30.0 in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from ray[client,default]) (1.36.0)\n",
+      "Collecting opentelemetry-exporter-prometheus (from ray[client,default])\n",
+      "  Downloading opentelemetry_exporter_prometheus-0.57b0-py3-none-any.whl.metadata (1.8 kB)\n",
+      "Collecting opentelemetry-proto (from ray[client,default])\n",
+      "  Downloading opentelemetry_proto-1.36.0-py3-none-any.whl.metadata (2.3 kB)\n",
+      "Requirement already satisfied: pydantic!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,<3 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from ray[client,default]) (2.11.5)\n",
+      "Requirement already satisfied: prometheus_client>=0.7.1 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from ray[client,default]) (0.22.1)\n",
+      "Collecting smart_open (from ray[client,default])\n",
+      "  Downloading smart_open-7.3.0.post1-py3-none-any.whl.metadata (24 kB)\n",
+      "Collecting virtualenv!=20.21.1,>=20.0.24 (from ray[client,default])\n",
+      "  Downloading virtualenv-20.33.1-py3-none-any.whl.metadata (4.5 kB)\n",
+      "Requirement already satisfied: annotated-types>=0.6.0 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from pydantic!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,<3->ray[client,default]) (0.7.0)\n",
+      "Requirement already satisfied: pydantic-core==2.33.2 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from pydantic!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,<3->ray[client,default]) (2.33.2)\n",
+      "Requirement already satisfied: typing-extensions>=4.12.2 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from pydantic!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,<3->ray[client,default]) (4.14.0)\n",
+      "Requirement already satisfied: typing-inspection>=0.4.0 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from pydantic!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,<3->ray[client,default]) (0.4.1)\n",
+      "Requirement already satisfied: aiohappyeyeballs>=2.5.0 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from aiohttp>=3.7->ray[client,default]) (2.6.1)\n",
+      "Requirement already satisfied: aiosignal>=1.1.2 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from aiohttp>=3.7->ray[client,default]) (1.3.2)\n",
+      "Requirement already satisfied: attrs>=17.3.0 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from aiohttp>=3.7->ray[client,default]) (25.3.0)\n",
+      "Requirement already satisfied: frozenlist>=1.1.1 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from aiohttp>=3.7->ray[client,default]) (1.7.0)\n",
+      "Requirement already satisfied: multidict<7.0,>=4.5 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from aiohttp>=3.7->ray[client,default]) (6.4.4)\n",
+      "Requirement already satisfied: propcache>=0.2.0 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from aiohttp>=3.7->ray[client,default]) (0.3.2)\n",
+      "Requirement already satisfied: yarl<2.0,>=1.17.0 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from aiohttp>=3.7->ray[client,default]) (1.20.1)\n",
+      "Requirement already satisfied: idna>=2.0 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from yarl<2.0,>=1.17.0->aiohttp>=3.7->ray[client,default]) (3.10)\n",
+      "Requirement already satisfied: opentelemetry-api==1.36.0 in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from opentelemetry-sdk>=1.30.0->ray[client,default]) (1.36.0)\n",
+      "Requirement already satisfied: opentelemetry-semantic-conventions==0.57b0 in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from opentelemetry-sdk>=1.30.0->ray[client,default]) (0.57b0)\n",
+      "Requirement already satisfied: importlib-metadata<8.8.0,>=6.0 in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from opentelemetry-api==1.36.0->opentelemetry-sdk>=1.30.0->ray[client,default]) (8.7.0)\n",
+      "Requirement already satisfied: zipp>=3.20 in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from importlib-metadata<8.8.0,>=6.0->opentelemetry-api==1.36.0->opentelemetry-sdk>=1.30.0->ray[client,default]) (3.23.0)\n",
+      "Collecting distlib<1,>=0.3.7 (from virtualenv!=20.21.1,>=20.0.24->ray[client,default])\n",
+      "  Downloading distlib-0.4.0-py2.py3-none-any.whl.metadata (5.2 kB)\n",
+      "Requirement already satisfied: platformdirs<5,>=3.9.1 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from virtualenv!=20.21.1,>=20.0.24->ray[client,default]) (4.3.8)\n",
+      "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from jsonschema->ray[client,default]) (2025.4.1)\n",
+      "Requirement already satisfied: referencing>=0.28.4 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from jsonschema->ray[client,default]) (0.36.2)\n",
+      "Requirement already satisfied: rpds-py>=0.7.1 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from jsonschema->ray[client,default]) (0.25.1)\n",
+      "Collecting opencensus-context>=0.1.3 (from opencensus->ray[client,default])\n",
+      "  Downloading opencensus_context-0.1.3-py2.py3-none-any.whl.metadata (3.3 kB)\n",
+      "Requirement already satisfied: six~=1.16 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from opencensus->ray[client,default]) (1.17.0)\n",
+      "Collecting google-api-core<3.0.0,>=1.0.0 (from opencensus->ray[client,default])\n",
+      "  Downloading google_api_core-2.25.1-py3-none-any.whl.metadata (3.0 kB)\n",
+      "Collecting googleapis-common-protos<2.0.0,>=1.56.2 (from google-api-core<3.0.0,>=1.0.0->opencensus->ray[client,default])\n",
+      "  Downloading googleapis_common_protos-1.70.0-py3-none-any.whl.metadata (9.3 kB)\n",
+      "Collecting proto-plus<2.0.0,>=1.22.3 (from google-api-core<3.0.0,>=1.0.0->opencensus->ray[client,default])\n",
+      "  Downloading proto_plus-1.26.1-py3-none-any.whl.metadata (2.2 kB)\n",
+      "Requirement already satisfied: google-auth<3.0.0,>=2.14.1 in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from google-api-core<3.0.0,>=1.0.0->opencensus->ray[client,default]) (2.40.3)\n",
+      "Requirement already satisfied: cachetools<6.0,>=2.0.0 in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from google-auth<3.0.0,>=2.14.1->google-api-core<3.0.0,>=1.0.0->opencensus->ray[client,default]) (5.5.2)\n",
+      "Requirement already satisfied: pyasn1-modules>=0.2.1 in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from google-auth<3.0.0,>=2.14.1->google-api-core<3.0.0,>=1.0.0->opencensus->ray[client,default]) (0.4.2)\n",
+      "Requirement already satisfied: rsa<5,>=3.1.4 in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from google-auth<3.0.0,>=2.14.1->google-api-core<3.0.0,>=1.0.0->opencensus->ray[client,default]) (4.9.1)\n",
+      "Requirement already satisfied: charset_normalizer<4,>=2 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from requests->ray[client,default]) (3.4.2)\n",
+      "Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from requests->ray[client,default]) (2.4.0)\n",
+      "Requirement already satisfied: certifi>=2017.4.17 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from requests->ray[client,default]) (2025.4.26)\n",
+      "Requirement already satisfied: pyasn1>=0.1.3 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from rsa<5,>=3.1.4->google-auth<3.0.0,>=2.14.1->google-api-core<3.0.0,>=1.0.0->opencensus->ray[client,default]) (0.6.1)\n",
+      "Collecting wrapt (from smart_open->ray[client,default])\n",
+      "  Downloading wrapt-1.17.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl.metadata (6.4 kB)\n",
+      "Downloading ray-2.48.0-cp312-cp312-manylinux2014_aarch64.whl (69.2 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m69.2/69.2 MB\u001b[0m \u001b[31m39.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
+      "\u001b[?25hDownloading msgpack-1.1.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl (425 kB)\n",
+      "Downloading grpcio-1.74.0-cp312-cp312-manylinux_2_17_aarch64.whl (5.9 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.9/5.9 MB\u001b[0m \u001b[31m50.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading py_spy-0.4.1-py2.py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl (2.0 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m26.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading virtualenv-20.33.1-py3-none-any.whl (6.1 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.1/6.1 MB\u001b[0m \u001b[31m47.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading distlib-0.4.0-py2.py3-none-any.whl (469 kB)\n",
+      "Downloading aiohttp_cors-0.8.1-py3-none-any.whl (25 kB)\n",
+      "Downloading colorful-0.5.7-py2.py3-none-any.whl (201 kB)\n",
+      "Downloading opencensus-0.11.4-py2.py3-none-any.whl (128 kB)\n",
+      "Downloading google_api_core-2.25.1-py3-none-any.whl (160 kB)\n",
+      "Downloading googleapis_common_protos-1.70.0-py3-none-any.whl (294 kB)\n",
+      "Downloading proto_plus-1.26.1-py3-none-any.whl (50 kB)\n",
+      "Downloading opencensus_context-0.1.3-py2.py3-none-any.whl (5.1 kB)\n",
+      "Downloading opentelemetry_exporter_prometheus-0.57b0-py3-none-any.whl (12 kB)\n",
+      "Downloading opentelemetry_proto-1.36.0-py3-none-any.whl (72 kB)\n",
+      "Downloading smart_open-7.3.0.post1-py3-none-any.whl (61 kB)\n",
+      "Downloading wrapt-1.17.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl (88 kB)\n",
+      "Installing collected packages: py-spy, opencensus-context, distlib, colorful, wrapt, virtualenv, proto-plus, opentelemetry-proto, msgpack, grpcio, googleapis-common-protos, smart_open, google-api-core, aiohttp_cors, ray, opencensus, opentelemetry-exporter-prometheus\n",
+      "\u001b[33m  WARNING: The script virtualenv is installed in '/opt/bitnami/jupyterhub-singleuser/.local/bin' which is not on PATH.\n",
+      "  Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n",
+      "\u001b[33m  WARNING: The scripts ray, serve and tune are installed in '/opt/bitnami/jupyterhub-singleuser/.local/bin' which is not on PATH.\n",
+      "  Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m17/17\u001b[0m [opentelemetry-exporter-prometheus]\n",
+      "\u001b[1A\u001b[2KSuccessfully installed aiohttp_cors-0.8.1 colorful-0.5.7 distlib-0.4.0 google-api-core-2.25.1 googleapis-common-protos-1.70.0 grpcio-1.74.0 msgpack-1.1.1 opencensus-0.11.4 opencensus-context-0.1.3 opentelemetry-exporter-prometheus-0.57b0 opentelemetry-proto-1.36.0 proto-plus-1.26.1 py-spy-0.4.1 ray-2.48.0 smart_open-7.3.0.post1 virtualenv-20.33.1 wrapt-1.17.2\n"
+     ]
+    }
+   ],
+   "source": [
+    "!pip install \"ray[client,default]\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "b137c8a2-4d77-4ef5-b6d0-8bfd07662c51",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2025-08-08 11:10:51,416\tINFO worker.py:1606 -- Using address ray://ai-starter-kit-kuberay-head-svc:10001 set in the environment variable RAY_ADDRESS\n",
+      "2025-08-08 11:10:51,434\tINFO client_builder.py:242 -- Passing the following kwargs to ray.init() on the server: log_to_driver\n",
+      "SIGTERM handler is not set because current thread is not the main thread.\n",
+      "2025-08-08 11:10:53,738\tWARNING utils.py:1280 -- Python patch version mismatch: The cluster was started with:\n",
+      "    Ray: 2.48.0\n",
+      "    Python: 3.12.9\n",
+      "This process on Ray Client was started with:\n",
+      "    Ray: 2.48.0\n",
+      "    Python: 3.12.10\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "cb1e38749b8a4ea49b2c2bd4e204e338",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/html": [
+       "<div class=\"lm-Widget p-Widget lm-Panel p-Panel jp-Cell-outputWrapper\">\n",
+       "    <div style=\"margin-left: 50px;display: flex;flex-direction: row;align-items: center\">\n",
+       "        <div class=\"jp-RenderedHTMLCommon\" style=\"display: flex; flex-direction: row;\">\n",
+       "  <svg viewBox=\"0 0 567 224\" fill=\"none\" xmlns=\"http://www.w3.org/2000/svg\" style=\"height: 3em;\">\n",
+       "    <g clip-path=\"url(#clip0_4338_178347)\">\n",
+       "        <path d=\"M341.29 165.561H355.29L330.13 129.051C345.63 123.991 354.21 112.051 354.21 94.2307C354.21 71.3707 338.72 58.1807 311.88 58.1807H271V165.561H283.27V131.661H311.8C314.25 131.661 316.71 131.501 319.01 131.351L341.25 165.561H341.29ZM283.29 119.851V70.0007H311.82C331.3 70.0007 342.34 78.2907 342.34 94.5507C342.34 111.271 331.34 119.861 311.82 119.861L283.29 119.851ZM451.4 138.411L463.4 165.561H476.74L428.74 58.1807H416L367.83 165.561H380.83L392.83 138.411H451.4ZM446.19 126.601H398L422 72.1407L446.24 126.601H446.19ZM526.11 128.741L566.91 58.1807H554.35L519.99 114.181L485.17 58.1807H472.44L514.01 129.181V165.541H526.13V128.741H526.11Z\" fill=\"var(--jp-ui-font-color0)\"/>\n",
+       "        <path d=\"M82.35 104.44C84.0187 97.8827 87.8248 92.0678 93.1671 87.9146C98.5094 83.7614 105.083 81.5067 111.85 81.5067C118.617 81.5067 125.191 83.7614 130.533 87.9146C135.875 92.0678 139.681 97.8827 141.35 104.44H163.75C164.476 101.562 165.622 98.8057 167.15 96.2605L127.45 56.5605C121.071 60.3522 113.526 61.6823 106.235 60.3005C98.9443 58.9187 92.4094 54.9203 87.8602 49.0574C83.3109 43.1946 81.0609 35.8714 81.5332 28.4656C82.0056 21.0599 85.1679 14.0819 90.4252 8.8446C95.6824 3.60726 102.672 0.471508 110.08 0.0272655C117.487 -0.416977 124.802 1.86091 130.647 6.4324C136.493 11.0039 140.467 17.5539 141.821 24.8501C143.175 32.1463 141.816 39.6859 138 46.0505L177.69 85.7505C182.31 82.9877 187.58 81.4995 192.962 81.4375C198.345 81.3755 203.648 82.742 208.33 85.3976C213.012 88.0532 216.907 91.9029 219.616 96.5544C222.326 101.206 223.753 106.492 223.753 111.875C223.753 117.258 222.326 122.545 219.616 127.197C216.907 131.848 213.012 135.698 208.33 138.353C203.648 141.009 198.345 142.375 192.962 142.313C187.58 142.251 182.31 140.763 177.69 138L138 177.7C141.808 184.071 143.155 191.614 141.79 198.91C140.424 206.205 136.44 212.75 130.585 217.313C124.731 221.875 117.412 224.141 110.004 223.683C102.596 223.226 95.6103 220.077 90.3621 214.828C85.1139 209.58 81.9647 202.595 81.5072 195.187C81.0497 187.779 83.3154 180.459 87.878 174.605C92.4405 168.751 98.9853 164.766 106.281 163.401C113.576 162.035 121.119 163.383 127.49 167.19L167.19 127.49C165.664 124.941 164.518 122.182 163.79 119.3H141.39C139.721 125.858 135.915 131.673 130.573 135.826C125.231 139.98 118.657 142.234 111.89 142.234C105.123 142.234 98.5494 139.98 93.2071 135.826C87.8648 131.673 84.0587 125.858 82.39 119.3H60C58.1878 126.495 53.8086 132.78 47.6863 136.971C41.5641 141.163 34.1211 142.972 26.7579 142.059C19.3947 141.146 12.6191 137.574 7.70605 132.014C2.79302 126.454 0.0813599 119.29 0.0813599 111.87C0.0813599 104.451 2.79302 97.2871 7.70605 91.7272C12.6191 86.1673 19.3947 82.5947 26.7579 81.6817C34.1211 80.7686 41.5641 82.5781 47.6863 86.7696C53.8086 90.9611 58.1878 97.2456 60 104.44H82.35ZM100.86 204.32C103.407 206.868 106.759 208.453 110.345 208.806C113.93 209.159 117.527 208.258 120.522 206.256C123.517 204.254 125.725 201.276 126.771 197.828C127.816 194.38 127.633 190.677 126.253 187.349C124.874 184.021 122.383 181.274 119.205 179.577C116.027 177.88 112.359 177.337 108.826 178.042C105.293 178.746 102.113 180.654 99.8291 183.44C97.5451 186.226 96.2979 189.718 96.3 193.32C96.2985 195.364 96.7006 197.388 97.4831 199.275C98.2656 201.163 99.4132 202.877 100.86 204.32ZM204.32 122.88C206.868 120.333 208.453 116.981 208.806 113.396C209.159 109.811 208.258 106.214 206.256 103.219C204.254 100.223 201.275 98.0151 197.827 96.97C194.38 95.9249 190.676 96.1077 187.348 97.4873C184.02 98.8669 181.274 101.358 179.577 104.536C177.879 107.714 177.337 111.382 178.041 114.915C178.746 118.448 180.653 121.627 183.439 123.911C186.226 126.195 189.717 127.443 193.32 127.44C195.364 127.443 197.388 127.042 199.275 126.259C201.163 125.476 202.878 124.328 204.32 122.88ZM122.88 19.4205C120.333 16.8729 116.981 15.2876 113.395 14.9347C109.81 14.5817 106.213 15.483 103.218 17.4849C100.223 19.4868 98.0146 22.4654 96.9696 25.9131C95.9245 29.3608 96.1073 33.0642 97.4869 36.3922C98.8665 39.7202 101.358 42.4668 104.535 44.1639C107.713 45.861 111.381 46.4036 114.914 45.6992C118.447 44.9949 121.627 43.0871 123.911 40.301C126.195 37.515 127.442 34.0231 127.44 30.4205C127.44 28.3772 127.038 26.3539 126.255 24.4664C125.473 22.5788 124.326 20.8642 122.88 19.4205ZM19.42 100.86C16.8725 103.408 15.2872 106.76 14.9342 110.345C14.5813 113.93 15.4826 117.527 17.4844 120.522C19.4863 123.518 22.4649 125.726 25.9127 126.771C29.3604 127.816 33.0638 127.633 36.3918 126.254C39.7198 124.874 42.4664 122.383 44.1635 119.205C45.8606 116.027 46.4032 112.359 45.6988 108.826C44.9944 105.293 43.0866 102.114 40.3006 99.8296C37.5145 97.5455 34.0227 96.2983 30.42 96.3005C26.2938 96.3018 22.337 97.9421 19.42 100.86ZM100.86 100.86C98.3125 103.408 96.7272 106.76 96.3742 110.345C96.0213 113.93 96.9226 117.527 98.9244 120.522C100.926 123.518 103.905 125.726 107.353 126.771C110.8 127.816 114.504 127.633 117.832 126.254C121.16 124.874 123.906 122.383 125.604 119.205C127.301 116.027 127.843 112.359 127.139 108.826C126.434 105.293 124.527 102.114 121.741 99.8296C118.955 97.5455 115.463 96.2983 111.86 96.3005C109.817 96.299 107.793 96.701 105.905 97.4835C104.018 98.2661 102.303 99.4136 100.86 100.86Z\" fill=\"#00AEEF\"/>\n",
+       "    </g>\n",
+       "    <defs>\n",
+       "        <clipPath id=\"clip0_4338_178347\">\n",
+       "            <rect width=\"566.93\" height=\"223.75\" fill=\"white\"/>\n",
+       "        </clipPath>\n",
+       "    </defs>\n",
+       "  </svg>\n",
+       "</div>\n",
+       "\n",
+       "        <table class=\"jp-RenderedHTMLCommon\" style=\"border-collapse: collapse;color: var(--jp-ui-font-color1);font-size: var(--jp-ui-font-size1);\">\n",
+       "    <tr>\n",
+       "        <td style=\"text-align: left\"><b>Python version:</b></td>\n",
+       "        <td style=\"text-align: left\"><b>3.12.9</b></td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td style=\"text-align: left\"><b>Ray version:</b></td>\n",
+       "        <td style=\"text-align: left\"><b>2.48.0</b></td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "    <td style=\"text-align: left\"><b>Dashboard:</b></td>\n",
+       "    <td style=\"text-align: left\"><b><a href=\"http://10.244.0.9:8265\" target=\"_blank\">http://10.244.0.9:8265</a></b></td>\n",
+       "</tr>\n",
+       "\n",
+       "</table>\n",
+       "\n",
+       "    </div>\n",
+       "</div>\n"
+      ],
+      "text/plain": [
+       "ClientContext(dashboard_url='10.244.0.9:8265', python_version='3.12.9', ray_version='2.48.0', ray_commit='2c63f6fdd1995ad8462a8333596a11f2e57f3e05', _num_clients=1, _context_to_restore=<ray.util.client._ClientContext object at 0xffffa6b5a870>)"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import ray\n",
+    "ray.init()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "ad745c57-152a-4bb2-9d92-165dcd4a5789",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'node:10.244.0.9': 1.0, 'CPU': 2.0, 'memory': 9000000000.0, 'object_store_memory': 1823143525.0, 'node:__internal_head__': 1.0, 'node:10.244.0.10': 1.0}\n"
+     ]
+    }
+   ],
+   "source": [
+    "@ray.remote\n",
+    "def cluster_resources():\n",
+    "  return ray.cluster_resources()\n",
+    "\n",
+    "print(ray.get(cluster_resources.remote()))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "41b2fb19-69df-42e4-b5b7-f956ecf37bba",
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "ModuleNotFoundError",
+     "evalue": "No module named 'vllm'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
+      "\u001b[31mModuleNotFoundError\u001b[39m                       Traceback (most recent call last)",
+      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[4]\u001b[39m\u001b[32m, line 4\u001b[39m\n\u001b[32m      1\u001b[39m \u001b[38;5;66;03m# here is the reference https://docs.ray.io/en/latest/serve/llm/serving-llms.html#deployment-through-llmrouter\u001b[39;00m\n\u001b[32m      3\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mray\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m serve\n\u001b[32m----> \u001b[39m\u001b[32m4\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mray\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mserve\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m LLMConfig, build_openai_app\n\u001b[32m      6\u001b[39m llm_config = LLMConfig(\n\u001b[32m      7\u001b[39m     model_loading_config=\u001b[38;5;28mdict\u001b[39m(\n\u001b[32m      8\u001b[39m         model_id=\u001b[33m\"\u001b[39m\u001b[33mqwen-0.5b\u001b[39m\u001b[33m\"\u001b[39m,\n\u001b[32m   (...)\u001b[39m\u001b[32m     20\u001b[39m     ),\n\u001b[32m     21\u001b[39m )\n\u001b[32m     23\u001b[39m app = build_openai_app({\u001b[33m\"\u001b[39m\u001b[33mllm_configs\u001b[39m\u001b[33m\"\u001b[39m: [llm_config]})\n",
+      "\u001b[36mFile \u001b[39m\u001b[32m~/.local/lib/python3.12/site-packages/ray/serve/llm/__init__.py:10\u001b[39m\n\u001b[32m      1\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mtyping\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m TYPE_CHECKING, Optional\n\u001b[32m      3\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mray\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01m_internal\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mserve\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mconfigs\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mserver_models\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[32m      4\u001b[39m     CloudMirrorConfig \u001b[38;5;28;01mas\u001b[39;00m _CloudMirrorConfig,\n\u001b[32m      5\u001b[39m     LLMConfig \u001b[38;5;28;01mas\u001b[39;00m _LLMConfig,\n\u001b[32m   (...)\u001b[39m\u001b[32m      8\u001b[39m     ModelLoadingConfig \u001b[38;5;28;01mas\u001b[39;00m _ModelLoadingConfig,\n\u001b[32m      9\u001b[39m )\n\u001b[32m---> \u001b[39m\u001b[32m10\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mray\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01m_internal\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mserve\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mdeployments\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm_server\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[32m     11\u001b[39m     LLMServer \u001b[38;5;28;01mas\u001b[39;00m _LLMServer,\n\u001b[32m     12\u001b[39m )\n\u001b[32m     13\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mray\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01m_internal\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mserve\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mdeployments\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mrouters\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mrouter\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[32m     14\u001b[39m     LLMRouter \u001b[38;5;28;01mas\u001b[39;00m _LLMRouter,\n\u001b[32m     15\u001b[39m )\n\u001b[32m     16\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mray\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mutil\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mannotations\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m PublicAPI\n",
+      "\u001b[36mFile \u001b[39m\u001b[32m~/.local/lib/python3.12/site-packages/ray/llm/_internal/serve/deployments/llm/llm_server.py:52\u001b[39m\n\u001b[32m     48\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mray\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01m_internal\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mserve\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mdeployments\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm_engine\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m LLMEngine\n\u001b[32m     49\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mray\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01m_internal\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mserve\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mdeployments\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mmultiplex\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mlora_model_loader\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[32m     50\u001b[39m     LoraModelLoader,\n\u001b[32m     51\u001b[39m )\n\u001b[32m---> \u001b[39m\u001b[32m52\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mray\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01m_internal\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mserve\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mdeployments\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mvllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mvllm_engine\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m VLLMEngine\n\u001b[32m     53\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mray\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01m_internal\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mserve\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mdeployments\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mvllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mvllm_models\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[32m     54\u001b[39m     VLLMEmbeddingRequest,\n\u001b[32m     55\u001b[39m )\n\u001b[32m     56\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mray\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01m_internal\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mserve\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mdeployments\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mutils\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mbatcher\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m OpenAIResponseBatcher\n",
+      "\u001b[36mFile \u001b[39m\u001b[32m~/.local/lib/python3.12/site-packages/ray/llm/_internal/serve/deployments/llm/vllm/vllm_engine.py:38\u001b[39m\n\u001b[32m     32\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mray\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01m_internal\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mserve\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mdeployments\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm_engine\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m LLMEngine\n\u001b[32m     33\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mray\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01m_internal\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mserve\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mdeployments\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mvllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mvllm_engine_stats\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[32m     34\u001b[39m     ArgUsage,\n\u001b[32m     35\u001b[39m     VLLMEngineStatTracker,\n\u001b[32m     36\u001b[39m     usage_counters,\n\u001b[32m     37\u001b[39m )\n\u001b[32m---> \u001b[39m\u001b[32m38\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mray\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01m_internal\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mserve\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mdeployments\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mvllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mvllm_models\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[32m     39\u001b[39m     KV_TRANSFER_PARAMS_KEY,\n\u001b[32m     40\u001b[39m     VLLMEmbeddingRequest,\n\u001b[32m     41\u001b[39m     VLLMEngineConfig,\n\u001b[32m     42\u001b[39m     VLLMGenerationRequest,\n\u001b[32m     43\u001b[39m     VLLMSamplingParams,\n\u001b[32m     44\u001b[39m )\n\u001b[32m     45\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mray\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01m_internal\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mserve\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mdeployments\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mutils\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mnode_initialization_utils\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[32m     46\u001b[39m     InitializeNodeOutput,\n\u001b[32m     47\u001b[39m     initialize_node,\n\u001b[32m     48\u001b[39m )\n\u001b[32m     49\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mray\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01m_internal\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mserve\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mdeployments\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mutils\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mserver_utils\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m floats_to_base64\n",
+      "\u001b[36mFile \u001b[39m\u001b[32m~/.local/lib/python3.12/site-packages/ray/llm/_internal/serve/deployments/llm/vllm/vllm_models.py:6\u001b[39m\n\u001b[32m      3\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mtyping\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m TYPE_CHECKING, Any, Dict, List, Literal, Optional, Union\n\u001b[32m      5\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mpydantic\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m ConfigDict, Field, ValidationError, field_validator\n\u001b[32m----> \u001b[39m\u001b[32m6\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mvllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mengine\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01marg_utils\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m AsyncEngineArgs\n\u001b[32m      8\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mray\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01m_internal\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mcommon\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mbase_pydantic\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m BaseModelExtended\n\u001b[32m      9\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mray\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01m_internal\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mcommon\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mutils\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mcloud_utils\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m CloudMirrorConfig\n",
+      "\u001b[31mModuleNotFoundError\u001b[39m: No module named 'vllm'"
+     ]
+    }
+   ],
+   "source": [
+    "# here is the reference https://docs.ray.io/en/latest/serve/llm/serving-llms.html#deployment-through-llmrouter\n",
+    "\n",
+    "from ray import serve\n",
+    "from ray.serve.llm import LLMConfig, build_openai_app\n",
+    "\n",
+    "llm_config = LLMConfig(\n",
+    "    model_loading_config=dict(\n",
+    "        model_id=\"qwen-0.5b\",\n",
+    "        model_source=\"Qwen/Qwen2.5-0.5B-Instruct\",\n",
+    "    ),\n",
+    "    deployment_config=dict(\n",
+    "        autoscaling_config=dict(\n",
+    "            min_replicas=1, max_replicas=2,\n",
+    "        )\n",
+    "    ),\n",
+    "    # The accelerator_type and tensor_parallel_size are removed\n",
+    "    # or commented out as they are not applicable to the M1 chip\n",
+    "    engine_kwargs=dict(\n",
+    "        # tensor_parallel_size=2, # Removed for single-device inference\n",
+    "    ),\n",
+    ")\n",
+    "\n",
+    "app = build_openai_app({\"llm_configs\": [llm_config]})\n",
+    "serve.run(app, blocking=True)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

From 8c2762d2ee3b2cee49ad612bc17094310a599a18 Mon Sep 17 00:00:00 2001
From: Dima Drogovoz <drogowoz@gmail.com>
Date: Wed, 13 Aug 2025 16:26:55 +0100
Subject: [PATCH 3/6] update ray.ipynb

---
 .../helm-chart/ai-starter-kit/files/ray.ipynb | 541 ++++++++++++------
 1 file changed, 360 insertions(+), 181 deletions(-)

diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/ray.ipynb b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/ray.ipynb
index daeb6c88..ef1ea066 100644
--- a/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/ray.ipynb
+++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/ray.ipynb
@@ -1,100 +1,271 @@
 {
  "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "1c5f9954-1e94-45f4-a4a0-a02d05bd268f",
+   "metadata": {},
+   "source": [
+    "# MLFlow and Ray example\n",
+    "\n",
+    "In this notebook we will train an ML model and deploy it in to Ray cluster."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a4f320db-fb65-40b3-89b1-789711133e3d",
+   "metadata": {},
+   "source": [
+    "## MLFlow experiment tracking\n",
+    "\n",
+    "Here we train the ML model and log metrics using MLFlow tracking server."
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 1,
-   "id": "8dd42829-7ccc-4d00-9d5c-b206c93d6586",
+   "id": "d17448f3-dc2f-4fa7-8c6c-58ec4e0f8f02",
    "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2025/08/12 15:00:18 INFO mlflow.tracking.fluent: Experiment with name 'Diabetes_Prediction_PyTorch' does not exist. Creating a new experiment.\n",
+      "2025/08/12 15:00:18 WARNING mlflow.tracking.context.registry: Encountered unexpected error during resolving tags: 'getpwuid(): uid not found: 1001'\n",
+      "2025/08/12 15:00:18 WARNING mlflow.utils.git_utils: Failed to import Git (the Git executable is probably not on your PATH), so Git SHA is not available. Error: Failed to initialize: Bad git executable.\n",
+      "The git executable must be specified in one of the following ways:\n",
+      "    - be included in your $PATH\n",
+      "    - be set via $GIT_PYTHON_GIT_EXECUTABLE\n",
+      "    - explicitly set via git.refresh(<full-path-to-git-executable>)\n",
+      "\n",
+      "All git commands will error until this is rectified.\n",
+      "\n",
+      "This initial message can be silenced or aggravated in the future by setting the\n",
+      "$GIT_PYTHON_REFRESH environment variable. Use one of the following values:\n",
+      "    - quiet|q|silence|s|silent|none|n|0: for no message or exception\n",
+      "    - warn|w|warning|log|l|1: for a warning message (logging level CRITICAL, displayed by default)\n",
+      "    - error|e|exception|raise|r|2: for a raised exception\n",
+      "\n",
+      "Example:\n",
+      "    export GIT_PYTHON_REFRESH=quiet\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "🏃 View run stately-kite-741 at: http://ai-starter-kit-mlflow:5000/#/experiments/1/runs/872da23f5a5541a39c0f893adbe53466\n",
+      "🧪 View experiment at: http://ai-starter-kit-mlflow:5000/#/experiments/1\n"
+     ]
+    }
+   ],
+   "source": [
+    "import torch\n",
+    "import torch.nn as nn\n",
+    "import torch.optim as optim\n",
+    "from sklearn.datasets import load_diabetes\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "import numpy as np\n",
+    "import mlflow\n",
+    "import mlflow.pytorch\n",
+    "\n",
+    "# -------------------\n",
+    "# Prepare Data\n",
+    "# -------------------\n",
+    "mlflow.set_tracking_uri(uri=\"http://ai-starter-kit-mlflow:5000\")\n",
+    "data = load_diabetes()\n",
+    "X = data.data\n",
+    "y = data.target.reshape(-1, 1)\n",
+    "\n",
+    "X_train, X_test, y_train, y_test = train_test_split(\n",
+    "    X, y, test_size=0.2, random_state=42\n",
+    ")\n",
+    "\n",
+    "X_train_tensor = torch.tensor(X_train, dtype=torch.float32)\n",
+    "y_train_tensor = torch.tensor(y_train, dtype=torch.float32)\n",
+    "X_test_tensor = torch.tensor(X_test, dtype=torch.float32)\n",
+    "y_test_tensor = torch.tensor(y_test, dtype=torch.float32)\n",
+    "\n",
+    "# -------------------\n",
+    "# Define Model\n",
+    "# -------------------\n",
+    "class RegressionModel(nn.Module):\n",
+    "    def __init__(self, input_dim):\n",
+    "        super().__init__()\n",
+    "        self.linear = nn.Linear(input_dim, 1)\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        return self.linear(x)\n",
+    "\n",
+    "input_dim = X_train.shape[1]\n",
+    "model = RegressionModel(input_dim)\n",
+    "\n",
+    "# -------------------\n",
+    "# Training\n",
+    "# -------------------\n",
+    "epochs = 100\n",
+    "lr = 0.01\n",
+    "\n",
+    "criterion = nn.MSELoss()\n",
+    "optimizer = optim.Adam(model.parameters(), lr=lr)\n",
+    "\n",
+    "mlflow.set_experiment(\"Diabetes_Prediction_PyTorch\")\n",
+    "\n",
+    "with mlflow.start_run():\n",
+    "    mlflow.log_param(\"epochs\", epochs)\n",
+    "    mlflow.log_param(\"learning_rate\", lr)\n",
+    "    mlflow.log_param(\"optimizer\", \"Adam\")\n",
+    "    mlflow.log_param(\"loss_fn\", \"MSELoss\")\n",
+    "    mlflow.log_param(\"input_features\", input_dim)\n",
+    "\n",
+    "    for epoch in range(epochs):\n",
+    "        model.train()\n",
+    "        optimizer.zero_grad()\n",
+    "        outputs = model(X_train_tensor)\n",
+    "        loss = criterion(outputs, y_train_tensor)\n",
+    "        loss.backward()\n",
+    "        optimizer.step()\n",
+    "\n",
+    "    # -------------------\n",
+    "    # Evaluation\n",
+    "    # -------------------\n",
+    "    model.eval()\n",
+    "    with torch.no_grad():\n",
+    "        preds = model(X_test_tensor)\n",
+    "        mse = criterion(preds, y_test_tensor).item()\n",
+    "        rmse = np.sqrt(mse)\n",
+    "\n",
+    "    mlflow.log_metric(\"mse\", mse)\n",
+    "    mlflow.log_metric(\"rmse\", rmse)\n",
+    "\n",
+    "    # # Log model to MLflow\n",
+    "    # mlflow.pytorch.log_model(model, \"pytorch_model\")\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "208f1ba2-3ea2-4487-a18d-61778d83f5ae",
+   "metadata": {},
+   "source": [
+    "## Ray deployment\n",
+    "\n",
+    "In this step we will use the model from the previous step to deploy it to our Ray cluster."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "d2c6009a-991a-49e7-b345-e3e6d8cea648",
+   "metadata": {
+    "scrolled": true
+   },
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
       "Defaulting to user installation because normal site-packages is not writeable\n",
-      "Collecting ray[client,default]\n",
+      "Collecting ray[client,default,serve]\n",
       "  Downloading ray-2.48.0-cp312-cp312-manylinux2014_aarch64.whl.metadata (19 kB)\n",
-      "Requirement already satisfied: click>=7.0 in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from ray[client,default]) (8.2.1)\n",
-      "Requirement already satisfied: filelock in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from ray[client,default]) (3.18.0)\n",
-      "Requirement already satisfied: jsonschema in /opt/bitnami/miniforge/lib/python3.12/site-packages (from ray[client,default]) (4.24.0)\n",
-      "Collecting msgpack<2.0.0,>=1.0.0 (from ray[client,default])\n",
+      "Requirement already satisfied: click>=7.0 in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from ray[client,default,serve]) (8.2.1)\n",
+      "Requirement already satisfied: filelock in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from ray[client,default,serve]) (3.18.0)\n",
+      "Requirement already satisfied: jsonschema in /opt/bitnami/miniforge/lib/python3.12/site-packages (from ray[client,default,serve]) (4.24.0)\n",
+      "Collecting msgpack<2.0.0,>=1.0.0 (from ray[client,default,serve])\n",
       "  Downloading msgpack-1.1.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl.metadata (8.4 kB)\n",
-      "Requirement already satisfied: packaging in /opt/bitnami/miniforge/lib/python3.12/site-packages (from ray[client,default]) (25.0)\n",
-      "Requirement already satisfied: protobuf!=3.19.5,>=3.15.3 in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from ray[client,default]) (6.31.1)\n",
-      "Requirement already satisfied: pyyaml in /opt/bitnami/miniforge/lib/python3.12/site-packages (from ray[client,default]) (6.0.2)\n",
-      "Requirement already satisfied: requests in /opt/bitnami/miniforge/lib/python3.12/site-packages (from ray[client,default]) (2.32.3)\n",
-      "Collecting grpcio (from ray[client,default])\n",
+      "Requirement already satisfied: packaging in /opt/bitnami/miniforge/lib/python3.12/site-packages (from ray[client,default,serve]) (25.0)\n",
+      "Requirement already satisfied: protobuf!=3.19.5,>=3.15.3 in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from ray[client,default,serve]) (6.31.1)\n",
+      "Requirement already satisfied: pyyaml in /opt/bitnami/miniforge/lib/python3.12/site-packages (from ray[client,default,serve]) (6.0.2)\n",
+      "Requirement already satisfied: requests in /opt/bitnami/miniforge/lib/python3.12/site-packages (from ray[client,default,serve]) (2.32.3)\n",
+      "Collecting grpcio (from ray[client,default,serve])\n",
       "  Downloading grpcio-1.74.0-cp312-cp312-manylinux_2_17_aarch64.whl.metadata (3.8 kB)\n",
-      "Requirement already satisfied: aiohttp>=3.7 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from ray[client,default]) (3.12.12)\n",
-      "Collecting aiohttp_cors (from ray[client,default])\n",
+      "Requirement already satisfied: aiohttp>=3.7 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from ray[client,default,serve]) (3.12.12)\n",
+      "Collecting aiohttp_cors (from ray[client,default,serve])\n",
       "  Downloading aiohttp_cors-0.8.1-py3-none-any.whl.metadata (20 kB)\n",
-      "Collecting colorful (from ray[client,default])\n",
+      "Collecting colorful (from ray[client,default,serve])\n",
       "  Downloading colorful-0.5.7-py2.py3-none-any.whl.metadata (16 kB)\n",
-      "Collecting py-spy>=0.4.0 (from ray[client,default])\n",
+      "Collecting py-spy>=0.4.0 (from ray[client,default,serve])\n",
       "  Downloading py_spy-0.4.1-py2.py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl.metadata (510 bytes)\n",
-      "Collecting opencensus (from ray[client,default])\n",
+      "Collecting opencensus (from ray[client,default,serve])\n",
       "  Downloading opencensus-0.11.4-py2.py3-none-any.whl.metadata (12 kB)\n",
-      "Requirement already satisfied: opentelemetry-sdk>=1.30.0 in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from ray[client,default]) (1.36.0)\n",
-      "Collecting opentelemetry-exporter-prometheus (from ray[client,default])\n",
+      "Requirement already satisfied: opentelemetry-sdk>=1.30.0 in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from ray[client,default,serve]) (1.36.0)\n",
+      "Collecting opentelemetry-exporter-prometheus (from ray[client,default,serve])\n",
       "  Downloading opentelemetry_exporter_prometheus-0.57b0-py3-none-any.whl.metadata (1.8 kB)\n",
-      "Collecting opentelemetry-proto (from ray[client,default])\n",
+      "Collecting opentelemetry-proto (from ray[client,default,serve])\n",
       "  Downloading opentelemetry_proto-1.36.0-py3-none-any.whl.metadata (2.3 kB)\n",
-      "Requirement already satisfied: pydantic!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,<3 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from ray[client,default]) (2.11.5)\n",
-      "Requirement already satisfied: prometheus_client>=0.7.1 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from ray[client,default]) (0.22.1)\n",
-      "Collecting smart_open (from ray[client,default])\n",
+      "Requirement already satisfied: pydantic!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,<3 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from ray[client,default,serve]) (2.11.5)\n",
+      "Requirement already satisfied: prometheus_client>=0.7.1 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from ray[client,default,serve]) (0.22.1)\n",
+      "Collecting smart_open (from ray[client,default,serve])\n",
       "  Downloading smart_open-7.3.0.post1-py3-none-any.whl.metadata (24 kB)\n",
-      "Collecting virtualenv!=20.21.1,>=20.0.24 (from ray[client,default])\n",
+      "Collecting virtualenv!=20.21.1,>=20.0.24 (from ray[client,default,serve])\n",
       "  Downloading virtualenv-20.33.1-py3-none-any.whl.metadata (4.5 kB)\n",
-      "Requirement already satisfied: annotated-types>=0.6.0 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from pydantic!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,<3->ray[client,default]) (0.7.0)\n",
-      "Requirement already satisfied: pydantic-core==2.33.2 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from pydantic!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,<3->ray[client,default]) (2.33.2)\n",
-      "Requirement already satisfied: typing-extensions>=4.12.2 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from pydantic!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,<3->ray[client,default]) (4.14.0)\n",
-      "Requirement already satisfied: typing-inspection>=0.4.0 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from pydantic!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,<3->ray[client,default]) (0.4.1)\n",
-      "Requirement already satisfied: aiohappyeyeballs>=2.5.0 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from aiohttp>=3.7->ray[client,default]) (2.6.1)\n",
-      "Requirement already satisfied: aiosignal>=1.1.2 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from aiohttp>=3.7->ray[client,default]) (1.3.2)\n",
-      "Requirement already satisfied: attrs>=17.3.0 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from aiohttp>=3.7->ray[client,default]) (25.3.0)\n",
-      "Requirement already satisfied: frozenlist>=1.1.1 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from aiohttp>=3.7->ray[client,default]) (1.7.0)\n",
-      "Requirement already satisfied: multidict<7.0,>=4.5 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from aiohttp>=3.7->ray[client,default]) (6.4.4)\n",
-      "Requirement already satisfied: propcache>=0.2.0 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from aiohttp>=3.7->ray[client,default]) (0.3.2)\n",
-      "Requirement already satisfied: yarl<2.0,>=1.17.0 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from aiohttp>=3.7->ray[client,default]) (1.20.1)\n",
-      "Requirement already satisfied: idna>=2.0 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from yarl<2.0,>=1.17.0->aiohttp>=3.7->ray[client,default]) (3.10)\n",
-      "Requirement already satisfied: opentelemetry-api==1.36.0 in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from opentelemetry-sdk>=1.30.0->ray[client,default]) (1.36.0)\n",
-      "Requirement already satisfied: opentelemetry-semantic-conventions==0.57b0 in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from opentelemetry-sdk>=1.30.0->ray[client,default]) (0.57b0)\n",
-      "Requirement already satisfied: importlib-metadata<8.8.0,>=6.0 in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from opentelemetry-api==1.36.0->opentelemetry-sdk>=1.30.0->ray[client,default]) (8.7.0)\n",
-      "Requirement already satisfied: zipp>=3.20 in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from importlib-metadata<8.8.0,>=6.0->opentelemetry-api==1.36.0->opentelemetry-sdk>=1.30.0->ray[client,default]) (3.23.0)\n",
-      "Collecting distlib<1,>=0.3.7 (from virtualenv!=20.21.1,>=20.0.24->ray[client,default])\n",
+      "Collecting watchfiles (from ray[client,default,serve])\n",
+      "  Downloading watchfiles-1.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl.metadata (4.9 kB)\n",
+      "Requirement already satisfied: uvicorn[standard] in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from ray[client,default,serve]) (0.35.0)\n",
+      "Requirement already satisfied: fastapi in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from ray[client,default,serve]) (0.116.1)\n",
+      "Requirement already satisfied: starlette in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from ray[client,default,serve]) (0.47.2)\n",
+      "Requirement already satisfied: annotated-types>=0.6.0 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from pydantic!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,<3->ray[client,default,serve]) (0.7.0)\n",
+      "Requirement already satisfied: pydantic-core==2.33.2 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from pydantic!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,<3->ray[client,default,serve]) (2.33.2)\n",
+      "Requirement already satisfied: typing-extensions>=4.12.2 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from pydantic!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,<3->ray[client,default,serve]) (4.14.0)\n",
+      "Requirement already satisfied: typing-inspection>=0.4.0 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from pydantic!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,<3->ray[client,default,serve]) (0.4.1)\n",
+      "Requirement already satisfied: aiohappyeyeballs>=2.5.0 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from aiohttp>=3.7->ray[client,default,serve]) (2.6.1)\n",
+      "Requirement already satisfied: aiosignal>=1.1.2 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from aiohttp>=3.7->ray[client,default,serve]) (1.3.2)\n",
+      "Requirement already satisfied: attrs>=17.3.0 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from aiohttp>=3.7->ray[client,default,serve]) (25.3.0)\n",
+      "Requirement already satisfied: frozenlist>=1.1.1 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from aiohttp>=3.7->ray[client,default,serve]) (1.7.0)\n",
+      "Requirement already satisfied: multidict<7.0,>=4.5 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from aiohttp>=3.7->ray[client,default,serve]) (6.4.4)\n",
+      "Requirement already satisfied: propcache>=0.2.0 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from aiohttp>=3.7->ray[client,default,serve]) (0.3.2)\n",
+      "Requirement already satisfied: yarl<2.0,>=1.17.0 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from aiohttp>=3.7->ray[client,default,serve]) (1.20.1)\n",
+      "Requirement already satisfied: idna>=2.0 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from yarl<2.0,>=1.17.0->aiohttp>=3.7->ray[client,default,serve]) (3.10)\n",
+      "Requirement already satisfied: opentelemetry-api==1.36.0 in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from opentelemetry-sdk>=1.30.0->ray[client,default,serve]) (1.36.0)\n",
+      "Requirement already satisfied: opentelemetry-semantic-conventions==0.57b0 in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from opentelemetry-sdk>=1.30.0->ray[client,default,serve]) (0.57b0)\n",
+      "Requirement already satisfied: importlib-metadata<8.8.0,>=6.0 in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from opentelemetry-api==1.36.0->opentelemetry-sdk>=1.30.0->ray[client,default,serve]) (8.7.0)\n",
+      "Requirement already satisfied: zipp>=3.20 in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from importlib-metadata<8.8.0,>=6.0->opentelemetry-api==1.36.0->opentelemetry-sdk>=1.30.0->ray[client,default,serve]) (3.23.0)\n",
+      "Collecting distlib<1,>=0.3.7 (from virtualenv!=20.21.1,>=20.0.24->ray[client,default,serve])\n",
       "  Downloading distlib-0.4.0-py2.py3-none-any.whl.metadata (5.2 kB)\n",
-      "Requirement already satisfied: platformdirs<5,>=3.9.1 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from virtualenv!=20.21.1,>=20.0.24->ray[client,default]) (4.3.8)\n",
-      "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from jsonschema->ray[client,default]) (2025.4.1)\n",
-      "Requirement already satisfied: referencing>=0.28.4 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from jsonschema->ray[client,default]) (0.36.2)\n",
-      "Requirement already satisfied: rpds-py>=0.7.1 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from jsonschema->ray[client,default]) (0.25.1)\n",
-      "Collecting opencensus-context>=0.1.3 (from opencensus->ray[client,default])\n",
+      "Requirement already satisfied: platformdirs<5,>=3.9.1 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from virtualenv!=20.21.1,>=20.0.24->ray[client,default,serve]) (4.3.8)\n",
+      "Requirement already satisfied: anyio<5,>=3.6.2 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from starlette->ray[client,default,serve]) (4.9.0)\n",
+      "Requirement already satisfied: sniffio>=1.1 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from anyio<5,>=3.6.2->starlette->ray[client,default,serve]) (1.3.1)\n",
+      "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from jsonschema->ray[client,default,serve]) (2025.4.1)\n",
+      "Requirement already satisfied: referencing>=0.28.4 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from jsonschema->ray[client,default,serve]) (0.36.2)\n",
+      "Requirement already satisfied: rpds-py>=0.7.1 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from jsonschema->ray[client,default,serve]) (0.25.1)\n",
+      "Collecting opencensus-context>=0.1.3 (from opencensus->ray[client,default,serve])\n",
       "  Downloading opencensus_context-0.1.3-py2.py3-none-any.whl.metadata (3.3 kB)\n",
-      "Requirement already satisfied: six~=1.16 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from opencensus->ray[client,default]) (1.17.0)\n",
-      "Collecting google-api-core<3.0.0,>=1.0.0 (from opencensus->ray[client,default])\n",
+      "Requirement already satisfied: six~=1.16 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from opencensus->ray[client,default,serve]) (1.17.0)\n",
+      "Collecting google-api-core<3.0.0,>=1.0.0 (from opencensus->ray[client,default,serve])\n",
       "  Downloading google_api_core-2.25.1-py3-none-any.whl.metadata (3.0 kB)\n",
-      "Collecting googleapis-common-protos<2.0.0,>=1.56.2 (from google-api-core<3.0.0,>=1.0.0->opencensus->ray[client,default])\n",
+      "Collecting googleapis-common-protos<2.0.0,>=1.56.2 (from google-api-core<3.0.0,>=1.0.0->opencensus->ray[client,default,serve])\n",
       "  Downloading googleapis_common_protos-1.70.0-py3-none-any.whl.metadata (9.3 kB)\n",
-      "Collecting proto-plus<2.0.0,>=1.22.3 (from google-api-core<3.0.0,>=1.0.0->opencensus->ray[client,default])\n",
+      "Collecting proto-plus<2.0.0,>=1.22.3 (from google-api-core<3.0.0,>=1.0.0->opencensus->ray[client,default,serve])\n",
       "  Downloading proto_plus-1.26.1-py3-none-any.whl.metadata (2.2 kB)\n",
-      "Requirement already satisfied: google-auth<3.0.0,>=2.14.1 in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from google-api-core<3.0.0,>=1.0.0->opencensus->ray[client,default]) (2.40.3)\n",
-      "Requirement already satisfied: cachetools<6.0,>=2.0.0 in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from google-auth<3.0.0,>=2.14.1->google-api-core<3.0.0,>=1.0.0->opencensus->ray[client,default]) (5.5.2)\n",
-      "Requirement already satisfied: pyasn1-modules>=0.2.1 in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from google-auth<3.0.0,>=2.14.1->google-api-core<3.0.0,>=1.0.0->opencensus->ray[client,default]) (0.4.2)\n",
-      "Requirement already satisfied: rsa<5,>=3.1.4 in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from google-auth<3.0.0,>=2.14.1->google-api-core<3.0.0,>=1.0.0->opencensus->ray[client,default]) (4.9.1)\n",
-      "Requirement already satisfied: charset_normalizer<4,>=2 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from requests->ray[client,default]) (3.4.2)\n",
-      "Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from requests->ray[client,default]) (2.4.0)\n",
-      "Requirement already satisfied: certifi>=2017.4.17 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from requests->ray[client,default]) (2025.4.26)\n",
-      "Requirement already satisfied: pyasn1>=0.1.3 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from rsa<5,>=3.1.4->google-auth<3.0.0,>=2.14.1->google-api-core<3.0.0,>=1.0.0->opencensus->ray[client,default]) (0.6.1)\n",
-      "Collecting wrapt (from smart_open->ray[client,default])\n",
-      "  Downloading wrapt-1.17.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl.metadata (6.4 kB)\n",
+      "Requirement already satisfied: google-auth<3.0.0,>=2.14.1 in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from google-api-core<3.0.0,>=1.0.0->opencensus->ray[client,default,serve]) (2.40.3)\n",
+      "Requirement already satisfied: cachetools<6.0,>=2.0.0 in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from google-auth<3.0.0,>=2.14.1->google-api-core<3.0.0,>=1.0.0->opencensus->ray[client,default,serve]) (5.5.2)\n",
+      "Requirement already satisfied: pyasn1-modules>=0.2.1 in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from google-auth<3.0.0,>=2.14.1->google-api-core<3.0.0,>=1.0.0->opencensus->ray[client,default,serve]) (0.4.2)\n",
+      "Requirement already satisfied: rsa<5,>=3.1.4 in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from google-auth<3.0.0,>=2.14.1->google-api-core<3.0.0,>=1.0.0->opencensus->ray[client,default,serve]) (4.9.1)\n",
+      "Requirement already satisfied: charset_normalizer<4,>=2 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from requests->ray[client,default,serve]) (3.4.2)\n",
+      "Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from requests->ray[client,default,serve]) (2.4.0)\n",
+      "Requirement already satisfied: certifi>=2017.4.17 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from requests->ray[client,default,serve]) (2025.4.26)\n",
+      "Requirement already satisfied: pyasn1>=0.1.3 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from rsa<5,>=3.1.4->google-auth<3.0.0,>=2.14.1->google-api-core<3.0.0,>=1.0.0->opencensus->ray[client,default,serve]) (0.6.1)\n",
+      "Collecting wrapt (from smart_open->ray[client,default,serve])\n",
+      "  Downloading wrapt-1.17.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl.metadata (6.4 kB)\n",
+      "Requirement already satisfied: h11>=0.8 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from uvicorn[standard]; extra == \"serve\"->ray[client,default,serve]) (0.16.0)\n",
+      "Collecting httptools>=0.6.3 (from uvicorn[standard]; extra == \"serve\"->ray[client,default,serve])\n",
+      "  Downloading httptools-0.6.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl.metadata (3.6 kB)\n",
+      "Collecting python-dotenv>=0.13 (from uvicorn[standard]; extra == \"serve\"->ray[client,default,serve])\n",
+      "  Downloading python_dotenv-1.1.1-py3-none-any.whl.metadata (24 kB)\n",
+      "Collecting uvloop>=0.15.1 (from uvicorn[standard]; extra == \"serve\"->ray[client,default,serve])\n",
+      "  Downloading uvloop-0.21.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl.metadata (4.9 kB)\n",
+      "Collecting websockets>=10.4 (from uvicorn[standard]; extra == \"serve\"->ray[client,default,serve])\n",
+      "  Downloading websockets-15.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl.metadata (6.8 kB)\n",
       "Downloading ray-2.48.0-cp312-cp312-manylinux2014_aarch64.whl (69.2 MB)\n",
-      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m69.2/69.2 MB\u001b[0m \u001b[31m39.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
-      "\u001b[?25hDownloading msgpack-1.1.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl (425 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m69.2/69.2 MB\u001b[0m \u001b[31m36.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
+      "Downloading msgpack-1.1.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl (425 kB)\n",
       "Downloading grpcio-1.74.0-cp312-cp312-manylinux_2_17_aarch64.whl (5.9 MB)\n",
-      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.9/5.9 MB\u001b[0m \u001b[31m50.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.9/5.9 MB\u001b[0m \u001b[31m38.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
       "\u001b[?25hDownloading py_spy-0.4.1-py2.py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl (2.0 MB)\n",
-      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m26.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m37.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
       "\u001b[?25hDownloading virtualenv-20.33.1-py3-none-any.whl (6.1 MB)\n",
-      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.1/6.1 MB\u001b[0m \u001b[31m47.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-      "\u001b[?25hDownloading distlib-0.4.0-py2.py3-none-any.whl (469 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.1/6.1 MB\u001b[0m \u001b[31m40.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "Downloading distlib-0.4.0-py2.py3-none-any.whl (469 kB)\n",
       "Downloading aiohttp_cors-0.8.1-py3-none-any.whl (25 kB)\n",
       "Downloading colorful-0.5.7-py2.py3-none-any.whl (201 kB)\n",
       "Downloading opencensus-0.11.4-py2.py3-none-any.whl (128 kB)\n",
@@ -105,169 +276,177 @@
       "Downloading opentelemetry_exporter_prometheus-0.57b0-py3-none-any.whl (12 kB)\n",
       "Downloading opentelemetry_proto-1.36.0-py3-none-any.whl (72 kB)\n",
       "Downloading smart_open-7.3.0.post1-py3-none-any.whl (61 kB)\n",
-      "Downloading wrapt-1.17.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl (88 kB)\n",
-      "Installing collected packages: py-spy, opencensus-context, distlib, colorful, wrapt, virtualenv, proto-plus, opentelemetry-proto, msgpack, grpcio, googleapis-common-protos, smart_open, google-api-core, aiohttp_cors, ray, opencensus, opentelemetry-exporter-prometheus\n",
+      "Downloading httptools-0.6.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl (508 kB)\n",
+      "Downloading python_dotenv-1.1.1-py3-none-any.whl (20 kB)\n",
+      "Downloading uvloop-0.21.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl (4.6 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.6/4.6 MB\u001b[0m \u001b[31m32.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading watchfiles-1.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl (450 kB)\n",
+      "Downloading websockets-15.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl (183 kB)\n",
+      "Downloading wrapt-1.17.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl (88 kB)\n",
+      "Installing collected packages: py-spy, opencensus-context, distlib, colorful, wrapt, websockets, virtualenv, uvloop, python-dotenv, proto-plus, opentelemetry-proto, msgpack, httptools, grpcio, googleapis-common-protos, watchfiles, smart_open, google-api-core, aiohttp_cors, ray, opencensus, opentelemetry-exporter-prometheus\n",
+      "\u001b[33m  WARNING: The script websockets is installed in '/opt/bitnami/jupyterhub-singleuser/.local/bin' which is not on PATH.\n",
+      "  Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n",
       "\u001b[33m  WARNING: The script virtualenv is installed in '/opt/bitnami/jupyterhub-singleuser/.local/bin' which is not on PATH.\n",
       "  Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n",
+      "\u001b[33m  WARNING: The script dotenv is installed in '/opt/bitnami/jupyterhub-singleuser/.local/bin' which is not on PATH.\n",
+      "  Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n",
+      "\u001b[33m  WARNING: The script watchfiles is installed in '/opt/bitnami/jupyterhub-singleuser/.local/bin' which is not on PATH.\n",
+      "  Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n",
       "\u001b[33m  WARNING: The scripts ray, serve and tune are installed in '/opt/bitnami/jupyterhub-singleuser/.local/bin' which is not on PATH.\n",
       "  Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n",
-      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m17/17\u001b[0m [opentelemetry-exporter-prometheus]\n",
-      "\u001b[1A\u001b[2KSuccessfully installed aiohttp_cors-0.8.1 colorful-0.5.7 distlib-0.4.0 google-api-core-2.25.1 googleapis-common-protos-1.70.0 grpcio-1.74.0 msgpack-1.1.1 opencensus-0.11.4 opencensus-context-0.1.3 opentelemetry-exporter-prometheus-0.57b0 opentelemetry-proto-1.36.0 proto-plus-1.26.1 py-spy-0.4.1 ray-2.48.0 smart_open-7.3.0.post1 virtualenv-20.33.1 wrapt-1.17.2\n"
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m22/22\u001b[0m [opentelemetry-exporter-prometheus]\n",
+      "\u001b[1A\u001b[2KSuccessfully installed aiohttp_cors-0.8.1 colorful-0.5.7 distlib-0.4.0 google-api-core-2.25.1 googleapis-common-protos-1.70.0 grpcio-1.74.0 httptools-0.6.4 msgpack-1.1.1 opencensus-0.11.4 opencensus-context-0.1.3 opentelemetry-exporter-prometheus-0.57b0 opentelemetry-proto-1.36.0 proto-plus-1.26.1 py-spy-0.4.1 python-dotenv-1.1.1 ray-2.48.0 smart_open-7.3.0.post1 uvloop-0.21.0 virtualenv-20.33.1 watchfiles-1.1.0 websockets-15.0.1 wrapt-1.17.3\n"
      ]
     }
    ],
    "source": [
-    "!pip install \"ray[client,default]\""
+    "!pip install \"ray[serve,client,default]\""
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
-   "id": "b137c8a2-4d77-4ef5-b6d0-8bfd07662c51",
+   "execution_count": 3,
+   "id": "5be0b7fa-0815-4224-a4e6-ad5b74647e20",
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2025-08-08 11:10:51,416\tINFO worker.py:1606 -- Using address ray://ai-starter-kit-kuberay-head-svc:10001 set in the environment variable RAY_ADDRESS\n",
-      "2025-08-08 11:10:51,434\tINFO client_builder.py:242 -- Passing the following kwargs to ray.init() on the server: log_to_driver\n",
+      "2025-08-12 15:00:37,849\tINFO worker.py:1606 -- Using address ray://ai-starter-kit-kuberay-head-svc:10001 set in the environment variable RAY_ADDRESS\n",
+      "2025-08-12 15:00:37,850\tINFO client_builder.py:242 -- Passing the following kwargs to ray.init() on the server: log_to_driver\n",
       "SIGTERM handler is not set because current thread is not the main thread.\n",
-      "2025-08-08 11:10:53,738\tWARNING utils.py:1280 -- Python patch version mismatch: The cluster was started with:\n",
+      "2025-08-12 15:00:39,818\tWARNING utils.py:1280 -- Python patch version mismatch: The cluster was started with:\n",
       "    Ray: 2.48.0\n",
       "    Python: 3.12.9\n",
       "This process on Ray Client was started with:\n",
       "    Ray: 2.48.0\n",
       "    Python: 3.12.10\n",
-      "\n"
+      "\n",
+      "\u001b[36m(ProxyActor pid=3272)\u001b[0m INFO 2025-08-12 08:00:44,231 proxy 10.244.0.9 -- Proxy starting on node 66724a5e2332cd618965646d0b7ab0d4d89622990053ab677dc8f588 (HTTP port: 8000).\n",
+      "\u001b[36m(ProxyActor pid=3272)\u001b[0m INFO 2025-08-12 08:00:44,333 proxy 10.244.0.9 -- Got updated endpoints: {}.\n",
+      "INFO 2025-08-12 15:00:44,830 serve 72 -- Started Serve in namespace \"serve\".\n",
+      "\u001b[36m(ServeController pid=3174)\u001b[0m INFO 2025-08-12 08:00:46,387 controller 3174 -- Deploying new version of Deployment(name='PyTorchMLflowDeployment', app='default') (initial target replicas: 1).\n",
+      "\u001b[36m(ProxyActor pid=3272)\u001b[0m INFO 2025-08-12 08:00:46,396 proxy 10.244.0.9 -- Got updated endpoints: {Deployment(name='PyTorchMLflowDeployment', app='default'): EndpointInfo(route='/predict', app_is_cross_language=False)}.\n",
+      "\u001b[36m(ProxyActor pid=3272)\u001b[0m INFO 2025-08-12 08:00:46,423 proxy 10.244.0.9 -- Started <ray.serve._private.router.SharedRouterLongPollClient object at 0xffff4c01e4e0>.\n",
+      "\u001b[36m(ServeController pid=3174)\u001b[0m INFO 2025-08-12 08:00:46,503 controller 3174 -- Adding 1 replica to Deployment(name='PyTorchMLflowDeployment', app='default').\n"
      ]
     },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "cb1e38749b8a4ea49b2c2bd4e204e338",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/html": [
-       "<div class=\"lm-Widget p-Widget lm-Panel p-Panel jp-Cell-outputWrapper\">\n",
-       "    <div style=\"margin-left: 50px;display: flex;flex-direction: row;align-items: center\">\n",
-       "        <div class=\"jp-RenderedHTMLCommon\" style=\"display: flex; flex-direction: row;\">\n",
-       "  <svg viewBox=\"0 0 567 224\" fill=\"none\" xmlns=\"http://www.w3.org/2000/svg\" style=\"height: 3em;\">\n",
-       "    <g clip-path=\"url(#clip0_4338_178347)\">\n",
-       "        <path d=\"M341.29 165.561H355.29L330.13 129.051C345.63 123.991 354.21 112.051 354.21 94.2307C354.21 71.3707 338.72 58.1807 311.88 58.1807H271V165.561H283.27V131.661H311.8C314.25 131.661 316.71 131.501 319.01 131.351L341.25 165.561H341.29ZM283.29 119.851V70.0007H311.82C331.3 70.0007 342.34 78.2907 342.34 94.5507C342.34 111.271 331.34 119.861 311.82 119.861L283.29 119.851ZM451.4 138.411L463.4 165.561H476.74L428.74 58.1807H416L367.83 165.561H380.83L392.83 138.411H451.4ZM446.19 126.601H398L422 72.1407L446.24 126.601H446.19ZM526.11 128.741L566.91 58.1807H554.35L519.99 114.181L485.17 58.1807H472.44L514.01 129.181V165.541H526.13V128.741H526.11Z\" fill=\"var(--jp-ui-font-color0)\"/>\n",
-       "        <path d=\"M82.35 104.44C84.0187 97.8827 87.8248 92.0678 93.1671 87.9146C98.5094 83.7614 105.083 81.5067 111.85 81.5067C118.617 81.5067 125.191 83.7614 130.533 87.9146C135.875 92.0678 139.681 97.8827 141.35 104.44H163.75C164.476 101.562 165.622 98.8057 167.15 96.2605L127.45 56.5605C121.071 60.3522 113.526 61.6823 106.235 60.3005C98.9443 58.9187 92.4094 54.9203 87.8602 49.0574C83.3109 43.1946 81.0609 35.8714 81.5332 28.4656C82.0056 21.0599 85.1679 14.0819 90.4252 8.8446C95.6824 3.60726 102.672 0.471508 110.08 0.0272655C117.487 -0.416977 124.802 1.86091 130.647 6.4324C136.493 11.0039 140.467 17.5539 141.821 24.8501C143.175 32.1463 141.816 39.6859 138 46.0505L177.69 85.7505C182.31 82.9877 187.58 81.4995 192.962 81.4375C198.345 81.3755 203.648 82.742 208.33 85.3976C213.012 88.0532 216.907 91.9029 219.616 96.5544C222.326 101.206 223.753 106.492 223.753 111.875C223.753 117.258 222.326 122.545 219.616 127.197C216.907 131.848 213.012 135.698 208.33 138.353C203.648 141.009 198.345 142.375 192.962 142.313C187.58 142.251 182.31 140.763 177.69 138L138 177.7C141.808 184.071 143.155 191.614 141.79 198.91C140.424 206.205 136.44 212.75 130.585 217.313C124.731 221.875 117.412 224.141 110.004 223.683C102.596 223.226 95.6103 220.077 90.3621 214.828C85.1139 209.58 81.9647 202.595 81.5072 195.187C81.0497 187.779 83.3154 180.459 87.878 174.605C92.4405 168.751 98.9853 164.766 106.281 163.401C113.576 162.035 121.119 163.383 127.49 167.19L167.19 127.49C165.664 124.941 164.518 122.182 163.79 119.3H141.39C139.721 125.858 135.915 131.673 130.573 135.826C125.231 139.98 118.657 142.234 111.89 142.234C105.123 142.234 98.5494 139.98 93.2071 135.826C87.8648 131.673 84.0587 125.858 82.39 119.3H60C58.1878 126.495 53.8086 132.78 47.6863 136.971C41.5641 141.163 34.1211 142.972 26.7579 142.059C19.3947 141.146 12.6191 137.574 7.70605 132.014C2.79302 126.454 0.0813599 119.29 0.0813599 111.87C0.0813599 104.451 2.79302 97.2871 7.70605 91.7272C12.6191 86.1673 19.3947 82.5947 26.7579 81.6817C34.1211 80.7686 41.5641 82.5781 47.6863 86.7696C53.8086 90.9611 58.1878 97.2456 60 104.44H82.35ZM100.86 204.32C103.407 206.868 106.759 208.453 110.345 208.806C113.93 209.159 117.527 208.258 120.522 206.256C123.517 204.254 125.725 201.276 126.771 197.828C127.816 194.38 127.633 190.677 126.253 187.349C124.874 184.021 122.383 181.274 119.205 179.577C116.027 177.88 112.359 177.337 108.826 178.042C105.293 178.746 102.113 180.654 99.8291 183.44C97.5451 186.226 96.2979 189.718 96.3 193.32C96.2985 195.364 96.7006 197.388 97.4831 199.275C98.2656 201.163 99.4132 202.877 100.86 204.32ZM204.32 122.88C206.868 120.333 208.453 116.981 208.806 113.396C209.159 109.811 208.258 106.214 206.256 103.219C204.254 100.223 201.275 98.0151 197.827 96.97C194.38 95.9249 190.676 96.1077 187.348 97.4873C184.02 98.8669 181.274 101.358 179.577 104.536C177.879 107.714 177.337 111.382 178.041 114.915C178.746 118.448 180.653 121.627 183.439 123.911C186.226 126.195 189.717 127.443 193.32 127.44C195.364 127.443 197.388 127.042 199.275 126.259C201.163 125.476 202.878 124.328 204.32 122.88ZM122.88 19.4205C120.333 16.8729 116.981 15.2876 113.395 14.9347C109.81 14.5817 106.213 15.483 103.218 17.4849C100.223 19.4868 98.0146 22.4654 96.9696 25.9131C95.9245 29.3608 96.1073 33.0642 97.4869 36.3922C98.8665 39.7202 101.358 42.4668 104.535 44.1639C107.713 45.861 111.381 46.4036 114.914 45.6992C118.447 44.9949 121.627 43.0871 123.911 40.301C126.195 37.515 127.442 34.0231 127.44 30.4205C127.44 28.3772 127.038 26.3539 126.255 24.4664C125.473 22.5788 124.326 20.8642 122.88 19.4205ZM19.42 100.86C16.8725 103.408 15.2872 106.76 14.9342 110.345C14.5813 113.93 15.4826 117.527 17.4844 120.522C19.4863 123.518 22.4649 125.726 25.9127 126.771C29.3604 127.816 33.0638 127.633 36.3918 126.254C39.7198 124.874 42.4664 122.383 44.1635 119.205C45.8606 116.027 46.4032 112.359 45.6988 108.826C44.9944 105.293 43.0866 102.114 40.3006 99.8296C37.5145 97.5455 34.0227 96.2983 30.42 96.3005C26.2938 96.3018 22.337 97.9421 19.42 100.86ZM100.86 100.86C98.3125 103.408 96.7272 106.76 96.3742 110.345C96.0213 113.93 96.9226 117.527 98.9244 120.522C100.926 123.518 103.905 125.726 107.353 126.771C110.8 127.816 114.504 127.633 117.832 126.254C121.16 124.874 123.906 122.383 125.604 119.205C127.301 116.027 127.843 112.359 127.139 108.826C126.434 105.293 124.527 102.114 121.741 99.8296C118.955 97.5455 115.463 96.2983 111.86 96.3005C109.817 96.299 107.793 96.701 105.905 97.4835C104.018 98.2661 102.303 99.4136 100.86 100.86Z\" fill=\"#00AEEF\"/>\n",
-       "    </g>\n",
-       "    <defs>\n",
-       "        <clipPath id=\"clip0_4338_178347\">\n",
-       "            <rect width=\"566.93\" height=\"223.75\" fill=\"white\"/>\n",
-       "        </clipPath>\n",
-       "    </defs>\n",
-       "  </svg>\n",
-       "</div>\n",
-       "\n",
-       "        <table class=\"jp-RenderedHTMLCommon\" style=\"border-collapse: collapse;color: var(--jp-ui-font-color1);font-size: var(--jp-ui-font-size1);\">\n",
-       "    <tr>\n",
-       "        <td style=\"text-align: left\"><b>Python version:</b></td>\n",
-       "        <td style=\"text-align: left\"><b>3.12.9</b></td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td style=\"text-align: left\"><b>Ray version:</b></td>\n",
-       "        <td style=\"text-align: left\"><b>2.48.0</b></td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "    <td style=\"text-align: left\"><b>Dashboard:</b></td>\n",
-       "    <td style=\"text-align: left\"><b><a href=\"http://10.244.0.9:8265\" target=\"_blank\">http://10.244.0.9:8265</a></b></td>\n",
-       "</tr>\n",
-       "\n",
-       "</table>\n",
-       "\n",
-       "    </div>\n",
-       "</div>\n"
-      ],
-      "text/plain": [
-       "ClientContext(dashboard_url='10.244.0.9:8265', python_version='3.12.9', ray_version='2.48.0', ray_commit='2c63f6fdd1995ad8462a8333596a11f2e57f3e05', _num_clients=1, _context_to_restore=<ray.util.client._ClientContext object at 0xffffa6b5a870>)"
-      ]
-     },
-     "execution_count": 2,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "import ray\n",
-    "ray.init()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "id": "ad745c57-152a-4bb2-9d92-165dcd4a5789",
-   "metadata": {},
-   "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "{'node:10.244.0.9': 1.0, 'CPU': 2.0, 'memory': 9000000000.0, 'object_store_memory': 1823143525.0, 'node:__internal_head__': 1.0, 'node:10.244.0.10': 1.0}\n"
+      "\u001b[36m(ServeReplica:default:PyTorchMLflowDeployment pid=1940, ip=10.244.0.10)\u001b[0m Loading model from MLflow...\n",
+      "\u001b[36m(ServeReplica:default:PyTorchMLflowDeployment pid=1940, ip=10.244.0.10)\u001b[0m Model loaded successfully.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO 2025-08-12 15:00:50,456 serve 72 -- Application 'default' is ready at http://127.0.0.1:8000/predict.\n",
+      "INFO 2025-08-12 15:00:50,519 serve 72 -- Started <ray.serve._private.router.SharedRouterLongPollClient object at 0xffff704638f0>.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "DeploymentHandle(deployment='PyTorchMLflowDeployment')"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING 2025-08-12 15:00:50,695 serve 72 -- Failed to get queue length from Replica(id='7iflc5wc', deployment='PyTorchMLflowDeployment', app='default') within 0.1s. If this happens repeatedly it's likely caused by high network latency in the cluster. You can configure the deadline using the `RAY_SERVE_QUEUE_LENGTH_RESPONSE_DEADLINE_S` environment variable.\n",
+      "2025-08-12 15:00:50,725\tERROR dataclient.py:312 -- Callback error:\n",
+      "Traceback (most recent call last):\n",
+      "  File \"/opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages/ray/util/client/dataclient.py\", line 301, in _process_response\n",
+      "    can_remove = callback(response)\n",
+      "                 ^^^^^^^^^^^^^^^^^^\n",
+      "  File \"/opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages/ray/util/client/dataclient.py\", line 179, in __call__\n",
+      "    self.callback(self.data)\n",
+      "  File \"/opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages/ray/util/client/common.py\", line 179, in deserialize_obj\n",
+      "    py_callback(data)\n",
+      "  File \"/opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages/ray/util/client/common.py\", line 147, in set_future\n",
+      "    fut.set_result(data)\n",
+      "  File \"/opt/bitnami/miniforge/lib/python3.12/concurrent/futures/_base.py\", line 544, in set_result\n",
+      "    raise InvalidStateError('{}: {!r}'.format(self._state, self))\n",
+      "concurrent.futures._base.InvalidStateError: CANCELLED: <Future at 0xffff7065c740 state=cancelled>\n",
+      "\u001b[36m(ProxyActor pid=1993, ip=10.244.0.10)\u001b[0m INFO 2025-08-12 08:00:51,185 proxy 10.244.0.10 -- Proxy starting on node 1d5e87519ede5427735866024d720fbcd019925255178b592dd245a3 (HTTP port: 8000).\n",
+      "\u001b[36m(ProxyActor pid=1993, ip=10.244.0.10)\u001b[0m INFO 2025-08-12 08:00:51,223 proxy 10.244.0.10 -- Got updated endpoints: {Deployment(name='PyTorchMLflowDeployment', app='default'): EndpointInfo(route='/predict', app_is_cross_language=False)}.\n",
+      "\u001b[36m(ProxyActor pid=1993, ip=10.244.0.10)\u001b[0m INFO 2025-08-12 08:00:51,230 proxy 10.244.0.10 -- Started <ray.serve._private.router.SharedRouterLongPollClient object at 0xffff782785c0>.\n",
+      "\u001b[36m(ServeReplica:default:PyTorchMLflowDeployment pid=1940, ip=10.244.0.10)\u001b[0m INFO 2025-08-12 08:01:29,315 default_PyTorchMLflowDeployment 7iflc5wc 4d7cb9a3-7b88-40e3-ab03-d933717e195a -- POST /predict 200 40.5ms\n",
+      "\u001b[36m(ServeController pid=3174)\u001b[0m INFO 2025-08-12 08:01:38,376 controller 3174 -- Removing 1 replica from Deployment(name='PyTorchMLflowDeployment', app='default').\n",
+      "\u001b[36m(ServeController pid=3174)\u001b[0m INFO 2025-08-12 08:01:40,427 controller 3174 -- Replica(id='7iflc5wc', deployment='PyTorchMLflowDeployment', app='default') is stopped.\n"
      ]
     }
    ],
    "source": [
-    "@ray.remote\n",
-    "def cluster_resources():\n",
-    "  return ray.cluster_resources()\n",
+    "import torch\n",
+    "import mlflow.pytorch\n",
+    "import numpy as np\n",
+    "from starlette.requests import Request\n",
+    "from typing import Dict\n",
     "\n",
-    "print(ray.get(cluster_resources.remote()))"
+    "from ray import serve\n",
+    "import ray\n",
+    "\n",
+    "# ray.init(\"ray://ai-starter-kit-kuberay-head-svc:10001\", namespace=\"my_new_namespace\")\n",
+    "\n",
+    "# MLFLOW_MODEL_URI = \"mlruns/0/<RUN_ID>/artifacts/pytorch_model\"  # Change to your run path\n",
+    "\n",
+    "@serve.deployment\n",
+    "class PyTorchMLflowDeployment:\n",
+    "    def __init__(self):\n",
+    "        print(\"Loading model from MLflow...\")\n",
+    "        # # self.model = mlflow.pytorch.load_model(MLFLOW_MODEL_URI)\n",
+    "        self.model = model\n",
+    "        self.model.eval()\n",
+    "        print(\"Model loaded successfully.\")\n",
+    "\n",
+    "    async def __call__(self, request: Request) -> Dict:\n",
+    "        try:\n",
+    "            data = await request.json()\n",
+    "            features = data.get(\"features\", None)\n",
+    "            if features is None:\n",
+    "                return {\"error\": \"Missing 'features' in request\"}\n",
+    "\n",
+    "            X = np.array(features).reshape(1, -1)\n",
+    "            X_tensor = torch.tensor(X, dtype=torch.float32)\n",
+    "\n",
+    "            with torch.no_grad():\n",
+    "                prediction = self.model(X_tensor).numpy().tolist()\n",
+    "\n",
+    "            return {\"prediction\": prediction}\n",
+    "        except Exception as e:\n",
+    "            return {\"error\": str(e)}\n",
+    "\n",
+    "app = PyTorchMLflowDeployment.bind()\n",
+    "serve.run(app, route_prefix=\"/predict\")\n"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 4,
-   "id": "41b2fb19-69df-42e4-b5b7-f956ecf37bba",
+   "id": "1e29a0ae-7514-4881-ae81-99b4e01e4e1f",
    "metadata": {},
    "outputs": [
     {
-     "ename": "ModuleNotFoundError",
-     "evalue": "No module named 'vllm'",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
-      "\u001b[31mModuleNotFoundError\u001b[39m                       Traceback (most recent call last)",
-      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[4]\u001b[39m\u001b[32m, line 4\u001b[39m\n\u001b[32m      1\u001b[39m \u001b[38;5;66;03m# here is the reference https://docs.ray.io/en/latest/serve/llm/serving-llms.html#deployment-through-llmrouter\u001b[39;00m\n\u001b[32m      3\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mray\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m serve\n\u001b[32m----> \u001b[39m\u001b[32m4\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mray\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mserve\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m LLMConfig, build_openai_app\n\u001b[32m      6\u001b[39m llm_config = LLMConfig(\n\u001b[32m      7\u001b[39m     model_loading_config=\u001b[38;5;28mdict\u001b[39m(\n\u001b[32m      8\u001b[39m         model_id=\u001b[33m\"\u001b[39m\u001b[33mqwen-0.5b\u001b[39m\u001b[33m\"\u001b[39m,\n\u001b[32m   (...)\u001b[39m\u001b[32m     20\u001b[39m     ),\n\u001b[32m     21\u001b[39m )\n\u001b[32m     23\u001b[39m app = build_openai_app({\u001b[33m\"\u001b[39m\u001b[33mllm_configs\u001b[39m\u001b[33m\"\u001b[39m: [llm_config]})\n",
-      "\u001b[36mFile \u001b[39m\u001b[32m~/.local/lib/python3.12/site-packages/ray/serve/llm/__init__.py:10\u001b[39m\n\u001b[32m      1\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mtyping\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m TYPE_CHECKING, Optional\n\u001b[32m      3\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mray\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01m_internal\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mserve\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mconfigs\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mserver_models\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[32m      4\u001b[39m     CloudMirrorConfig \u001b[38;5;28;01mas\u001b[39;00m _CloudMirrorConfig,\n\u001b[32m      5\u001b[39m     LLMConfig \u001b[38;5;28;01mas\u001b[39;00m _LLMConfig,\n\u001b[32m   (...)\u001b[39m\u001b[32m      8\u001b[39m     ModelLoadingConfig \u001b[38;5;28;01mas\u001b[39;00m _ModelLoadingConfig,\n\u001b[32m      9\u001b[39m )\n\u001b[32m---> \u001b[39m\u001b[32m10\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mray\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01m_internal\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mserve\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mdeployments\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm_server\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[32m     11\u001b[39m     LLMServer \u001b[38;5;28;01mas\u001b[39;00m _LLMServer,\n\u001b[32m     12\u001b[39m )\n\u001b[32m     13\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mray\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01m_internal\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mserve\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mdeployments\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mrouters\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mrouter\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[32m     14\u001b[39m     LLMRouter \u001b[38;5;28;01mas\u001b[39;00m _LLMRouter,\n\u001b[32m     15\u001b[39m )\n\u001b[32m     16\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mray\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mutil\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mannotations\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m PublicAPI\n",
-      "\u001b[36mFile \u001b[39m\u001b[32m~/.local/lib/python3.12/site-packages/ray/llm/_internal/serve/deployments/llm/llm_server.py:52\u001b[39m\n\u001b[32m     48\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mray\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01m_internal\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mserve\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mdeployments\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm_engine\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m LLMEngine\n\u001b[32m     49\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mray\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01m_internal\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mserve\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mdeployments\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mmultiplex\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mlora_model_loader\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[32m     50\u001b[39m     LoraModelLoader,\n\u001b[32m     51\u001b[39m )\n\u001b[32m---> \u001b[39m\u001b[32m52\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mray\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01m_internal\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mserve\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mdeployments\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mvllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mvllm_engine\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m VLLMEngine\n\u001b[32m     53\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mray\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01m_internal\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mserve\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mdeployments\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mvllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mvllm_models\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[32m     54\u001b[39m     VLLMEmbeddingRequest,\n\u001b[32m     55\u001b[39m )\n\u001b[32m     56\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mray\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01m_internal\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mserve\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mdeployments\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mutils\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mbatcher\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m OpenAIResponseBatcher\n",
-      "\u001b[36mFile \u001b[39m\u001b[32m~/.local/lib/python3.12/site-packages/ray/llm/_internal/serve/deployments/llm/vllm/vllm_engine.py:38\u001b[39m\n\u001b[32m     32\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mray\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01m_internal\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mserve\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mdeployments\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm_engine\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m LLMEngine\n\u001b[32m     33\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mray\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01m_internal\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mserve\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mdeployments\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mvllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mvllm_engine_stats\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[32m     34\u001b[39m     ArgUsage,\n\u001b[32m     35\u001b[39m     VLLMEngineStatTracker,\n\u001b[32m     36\u001b[39m     usage_counters,\n\u001b[32m     37\u001b[39m )\n\u001b[32m---> \u001b[39m\u001b[32m38\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mray\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01m_internal\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mserve\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mdeployments\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mvllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mvllm_models\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[32m     39\u001b[39m     KV_TRANSFER_PARAMS_KEY,\n\u001b[32m     40\u001b[39m     VLLMEmbeddingRequest,\n\u001b[32m     41\u001b[39m     VLLMEngineConfig,\n\u001b[32m     42\u001b[39m     VLLMGenerationRequest,\n\u001b[32m     43\u001b[39m     VLLMSamplingParams,\n\u001b[32m     44\u001b[39m )\n\u001b[32m     45\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mray\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01m_internal\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mserve\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mdeployments\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mutils\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mnode_initialization_utils\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[32m     46\u001b[39m     InitializeNodeOutput,\n\u001b[32m     47\u001b[39m     initialize_node,\n\u001b[32m     48\u001b[39m )\n\u001b[32m     49\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mray\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01m_internal\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mserve\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mdeployments\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mutils\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mserver_utils\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m floats_to_base64\n",
-      "\u001b[36mFile \u001b[39m\u001b[32m~/.local/lib/python3.12/site-packages/ray/llm/_internal/serve/deployments/llm/vllm/vllm_models.py:6\u001b[39m\n\u001b[32m      3\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mtyping\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m TYPE_CHECKING, Any, Dict, List, Literal, Optional, Union\n\u001b[32m      5\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mpydantic\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m ConfigDict, Field, ValidationError, field_validator\n\u001b[32m----> \u001b[39m\u001b[32m6\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mvllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mengine\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01marg_utils\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m AsyncEngineArgs\n\u001b[32m      8\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mray\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01m_internal\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mcommon\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mbase_pydantic\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m BaseModelExtended\n\u001b[32m      9\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mray\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01m_internal\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mcommon\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mutils\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mcloud_utils\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m CloudMirrorConfig\n",
-      "\u001b[31mModuleNotFoundError\u001b[39m: No module named 'vllm'"
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO 2025-08-12 15:01:38,215 serve 72 -- Deleting app ['PyTorchMLflowDeployment']\n"
      ]
     }
    ],
    "source": [
-    "# here is the reference https://docs.ray.io/en/latest/serve/llm/serving-llms.html#deployment-through-llmrouter\n",
-    "\n",
-    "from ray import serve\n",
-    "from ray.serve.llm import LLMConfig, build_openai_app\n",
-    "\n",
-    "llm_config = LLMConfig(\n",
-    "    model_loading_config=dict(\n",
-    "        model_id=\"qwen-0.5b\",\n",
-    "        model_source=\"Qwen/Qwen2.5-0.5B-Instruct\",\n",
-    "    ),\n",
-    "    deployment_config=dict(\n",
-    "        autoscaling_config=dict(\n",
-    "            min_replicas=1, max_replicas=2,\n",
-    "        )\n",
-    "    ),\n",
-    "    # The accelerator_type and tensor_parallel_size are removed\n",
-    "    # or commented out as they are not applicable to the M1 chip\n",
-    "    engine_kwargs=dict(\n",
-    "        # tensor_parallel_size=2, # Removed for single-device inference\n",
-    "    ),\n",
-    ")\n",
-    "\n",
-    "app = build_openai_app({\"llm_configs\": [llm_config]})\n",
-    "serve.run(app, blocking=True)"
+    "serve.delete(\"PyTorchMLflowDeployment\")\n",
+    "serve.shutdown()"
    ]
   }
  ],

From b1fec5a88c4c67e3b819d0b1ebf75573c4acb11c Mon Sep 17 00:00:00 2001
From: Dima Drogovoz <drogowoz@gmail.com>
Date: Thu, 14 Aug 2025 13:29:55 +0100
Subject: [PATCH 4/6] add libraries to ray-cluster

---
 ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml
index b0c56eaf..ef4a21e4 100644
--- a/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml
+++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml
@@ -99,6 +99,9 @@ ray-cluster:
     resources:
       limits:
         memory: "8G"
+    command: ["pip", "install", "ray[serve,default,client]", "transformers", "torch", "torchvision", "accelerate", "scikit-learn"]
+  worker:
+    command: ["pip", "install", "ray[serve,default,client]", "transformers", "torch", "torchvision", "accelerate", "scikit-learn"]
 
 huggingface:
   # Provide your Hugging Face token here to download gated or private models.

From 399c4609f2cd8465bdf8181478395ebe2f1e05fc Mon Sep 17 00:00:00 2001
From: Dima Drogovoz <drogowoz@gmail.com>
Date: Thu, 14 Aug 2025 16:11:31 +0100
Subject: [PATCH 5/6] add resource requests/limits

---
 .../helm-chart/ai-starter-kit/values.yaml     | 31 +++++++++++++++++--
 1 file changed, 28 insertions(+), 3 deletions(-)

diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml
index ef4a21e4..e0946668 100644
--- a/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml
+++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml
@@ -85,6 +85,11 @@ jupyterhub:
         memory: 4Gi
   hub:
     password: "sneakypass"
+    resources:
+      limits:
+        memory: 512Mi
+      requests:
+        memory: 512Mi
     # extraEnvVars:
     #     - name: "RAY_ADDRESS"
     #       value: "{{ .Release.Name }}-kuberay-head-svc"
@@ -96,12 +101,32 @@ ray-cluster:
     tag: 2.48.0.2c63f6-py312-cpu-aarch64
   head:
     serviceType: ClusterIP
-    resources:
-      limits:
-        memory: "8G"
     command: ["pip", "install", "ray[serve,default,client]", "transformers", "torch", "torchvision", "accelerate", "scikit-learn"]
+    resources:
+      requests:
+        cpu: "1"
+        memory: "2G"
+      limits:
+        cpu: "4"
+        memory: "8G"
   worker:
     command: ["pip", "install", "ray[serve,default,client]", "transformers", "torch", "torchvision", "accelerate", "scikit-learn"]
+    resources:
+      requests:
+        cpu: "1"
+        memory: "2G"
+      limits:
+        cpu: "4"
+        memory: "8G"
+
+mlflow:
+  resources:
+    requests:
+      cpu: 100m
+      memory: 512Mi
+    limits:
+      cpu: 1000m
+      memory: 1Gi
 
 huggingface:
   # Provide your Hugging Face token here to download gated or private models.

From 9026beab7304483da76c7507015f62dc10c4f82e Mon Sep 17 00:00:00 2001
From: Dima Drogovoz <drogowoz@gmail.com>
Date: Thu, 21 Aug 2025 12:07:37 +0100
Subject: [PATCH 6/6] add ray.ipynb to jupyter user notebook

---
 .../ai-starter-kit/templates/configmaps.yaml       | 14 +++++++++++++-
 .../helm-chart/ai-starter-kit/values.yaml          |  6 ++++++
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/configmaps.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/configmaps.yaml
index 0eb760b6..33d0dfdd 100644
--- a/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/configmaps.yaml
+++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/configmaps.yaml
@@ -38,4 +38,16 @@ metadata:
     helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}"
 data:
   welcome.ipynb: |-
-{{ .Files.Get "files/welcome.ipynb" | nindent 4 }}
\ No newline at end of file
+{{ .Files.Get "files/welcome.ipynb" | nindent 4 }}
+---
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ .Release.Name }}-ray-notebook
+  labels:
+    app.kubernetes.io/managed-by: {{ .Release.Service | quote }}
+    app.kubernetes.io/instance: {{ .Release.Name | quote }}
+    helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}"
+data:
+  ray.ipynb: |-
+{{ .Files.Get "files/ray.ipynb" | nindent 4 }}
diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml
index e0946668..f01100c3 100644
--- a/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml
+++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml
@@ -55,6 +55,9 @@ jupyterhub:
       - name: welcome-notebook
         configMap:
           name: "{{ .Release.Name }}-welcome-notebook"
+      - name: ray-notebook
+        configMap:
+          name: "{{ .Release.Name }}-ray-notebook"
       - name: hf-token-secret
         secret:
           secretName: "{{ .Release.Name }}-hf-token-secret"
@@ -71,6 +74,9 @@ jupyterhub:
       - name: welcome-notebook
         mountPath: /tmp/welcome.ipynb
         subPath: welcome.ipynb
+      - name: ray-notebook
+        mountPath: /tmp/ray.ipynb
+        subPath: ray.ipynb
       - name: hf-token-secret
         mountPath: "/etc/secrets/huggingface"
         readOnly: true