From b71c7007c843993fbd2a3dbc7156010191a1bce5 Mon Sep 17 00:00:00 2001 From: Dima Drogovoz Date: Fri, 8 Aug 2025 12:16:39 +0100 Subject: [PATCH 1/6] change kuberay docker image --- .../helm-chart/ai-starter-kit/values.yaml | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml index 3b80b988..b0c56eaf 100644 --- a/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml @@ -75,7 +75,7 @@ jupyterhub: mountPath: "/etc/secrets/huggingface" readOnly: true extraEnvVars: - RAY_ADDRESS: "{{ .Release.Name }}-kuberay-head-svc:6379" + RAY_ADDRESS: "ray://{{ .Release.Name }}-kuberay-head-svc:10001" MLFLOW_TRACKING_URI: "http://{{ .Release.Name }}-mlflow-tracking" # JUPYTERLAB_DIR: "/opt" resources: @@ -85,13 +85,15 @@ jupyterhub: memory: 4Gi hub: password: "sneakypass" - extraEnvVars: - - name: "RAY_ADDRESS" - value: "{{ .Release.Name }}-kuberay-head-svc" - - name: "MLFLOW_TRACKING_URI" - value: "http://{{ .Release.Name }}-mlflow-tracking" + # extraEnvVars: + # - name: "RAY_ADDRESS" + # value: "{{ .Release.Name }}-kuberay-head-svc" + # - name: "MLFLOW_TRACKING_URI" + # value: "http://{{ .Release.Name }}-mlflow-tracking" ray-cluster: + image: + tag: 2.48.0.2c63f6-py312-cpu-aarch64 head: serviceType: ClusterIP resources: From 0e5908d1eb703f6a4bb5a0ae70d53c6745dd8d7b Mon Sep 17 00:00:00 2001 From: Dima Drogovoz Date: Fri, 8 Aug 2025 12:18:32 +0100 Subject: [PATCH 2/6] add ray.ipynb --- .../helm-chart/ai-starter-kit/files/ray.ipynb | 295 ++++++++++++++++++ 1 file changed, 295 insertions(+) create mode 100644 ai/ai-starter-kit/helm-chart/ai-starter-kit/files/ray.ipynb diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/ray.ipynb b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/ray.ipynb new file mode 100644 index 00000000..daeb6c88 --- /dev/null +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/ray.ipynb @@ -0,0 +1,295 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "8dd42829-7ccc-4d00-9d5c-b206c93d6586", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Defaulting to user installation because normal site-packages is not writeable\n", + "Collecting ray[client,default]\n", + " Downloading ray-2.48.0-cp312-cp312-manylinux2014_aarch64.whl.metadata (19 kB)\n", + "Requirement already satisfied: click>=7.0 in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from ray[client,default]) (8.2.1)\n", + "Requirement already satisfied: filelock in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from ray[client,default]) (3.18.0)\n", + "Requirement already satisfied: jsonschema in /opt/bitnami/miniforge/lib/python3.12/site-packages (from ray[client,default]) (4.24.0)\n", + "Collecting msgpack<2.0.0,>=1.0.0 (from ray[client,default])\n", + " Downloading msgpack-1.1.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl.metadata (8.4 kB)\n", + "Requirement already satisfied: packaging in /opt/bitnami/miniforge/lib/python3.12/site-packages (from ray[client,default]) (25.0)\n", + "Requirement already satisfied: protobuf!=3.19.5,>=3.15.3 in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from ray[client,default]) (6.31.1)\n", + "Requirement already satisfied: pyyaml in /opt/bitnami/miniforge/lib/python3.12/site-packages (from ray[client,default]) (6.0.2)\n", + "Requirement already satisfied: requests in /opt/bitnami/miniforge/lib/python3.12/site-packages (from ray[client,default]) (2.32.3)\n", + "Collecting grpcio (from ray[client,default])\n", + " Downloading grpcio-1.74.0-cp312-cp312-manylinux_2_17_aarch64.whl.metadata (3.8 kB)\n", + "Requirement already satisfied: aiohttp>=3.7 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from ray[client,default]) (3.12.12)\n", + "Collecting aiohttp_cors (from ray[client,default])\n", + " Downloading aiohttp_cors-0.8.1-py3-none-any.whl.metadata (20 kB)\n", + "Collecting colorful (from ray[client,default])\n", + " Downloading colorful-0.5.7-py2.py3-none-any.whl.metadata (16 kB)\n", + "Collecting py-spy>=0.4.0 (from ray[client,default])\n", + " Downloading py_spy-0.4.1-py2.py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl.metadata (510 bytes)\n", + "Collecting opencensus (from ray[client,default])\n", + " Downloading opencensus-0.11.4-py2.py3-none-any.whl.metadata (12 kB)\n", + "Requirement already satisfied: opentelemetry-sdk>=1.30.0 in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from ray[client,default]) (1.36.0)\n", + "Collecting opentelemetry-exporter-prometheus (from ray[client,default])\n", + " Downloading opentelemetry_exporter_prometheus-0.57b0-py3-none-any.whl.metadata (1.8 kB)\n", + "Collecting opentelemetry-proto (from ray[client,default])\n", + " Downloading opentelemetry_proto-1.36.0-py3-none-any.whl.metadata (2.3 kB)\n", + "Requirement already satisfied: pydantic!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,<3 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from ray[client,default]) (2.11.5)\n", + "Requirement already satisfied: prometheus_client>=0.7.1 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from ray[client,default]) (0.22.1)\n", + "Collecting smart_open (from ray[client,default])\n", + " Downloading smart_open-7.3.0.post1-py3-none-any.whl.metadata (24 kB)\n", + "Collecting virtualenv!=20.21.1,>=20.0.24 (from ray[client,default])\n", + " Downloading virtualenv-20.33.1-py3-none-any.whl.metadata (4.5 kB)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from pydantic!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,<3->ray[client,default]) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.33.2 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from pydantic!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,<3->ray[client,default]) (2.33.2)\n", + "Requirement already satisfied: typing-extensions>=4.12.2 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from pydantic!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,<3->ray[client,default]) (4.14.0)\n", + "Requirement already satisfied: typing-inspection>=0.4.0 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from pydantic!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,<3->ray[client,default]) (0.4.1)\n", + "Requirement already satisfied: aiohappyeyeballs>=2.5.0 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from aiohttp>=3.7->ray[client,default]) (2.6.1)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from aiohttp>=3.7->ray[client,default]) (1.3.2)\n", + "Requirement already satisfied: attrs>=17.3.0 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from aiohttp>=3.7->ray[client,default]) (25.3.0)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from aiohttp>=3.7->ray[client,default]) (1.7.0)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from aiohttp>=3.7->ray[client,default]) (6.4.4)\n", + "Requirement already satisfied: propcache>=0.2.0 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from aiohttp>=3.7->ray[client,default]) (0.3.2)\n", + "Requirement already satisfied: yarl<2.0,>=1.17.0 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from aiohttp>=3.7->ray[client,default]) (1.20.1)\n", + "Requirement already satisfied: idna>=2.0 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from yarl<2.0,>=1.17.0->aiohttp>=3.7->ray[client,default]) (3.10)\n", + "Requirement already satisfied: opentelemetry-api==1.36.0 in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from opentelemetry-sdk>=1.30.0->ray[client,default]) (1.36.0)\n", + "Requirement already satisfied: opentelemetry-semantic-conventions==0.57b0 in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from opentelemetry-sdk>=1.30.0->ray[client,default]) (0.57b0)\n", + "Requirement already satisfied: importlib-metadata<8.8.0,>=6.0 in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from opentelemetry-api==1.36.0->opentelemetry-sdk>=1.30.0->ray[client,default]) (8.7.0)\n", + "Requirement already satisfied: zipp>=3.20 in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from importlib-metadata<8.8.0,>=6.0->opentelemetry-api==1.36.0->opentelemetry-sdk>=1.30.0->ray[client,default]) (3.23.0)\n", + "Collecting distlib<1,>=0.3.7 (from virtualenv!=20.21.1,>=20.0.24->ray[client,default])\n", + " Downloading distlib-0.4.0-py2.py3-none-any.whl.metadata (5.2 kB)\n", + "Requirement already satisfied: platformdirs<5,>=3.9.1 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from virtualenv!=20.21.1,>=20.0.24->ray[client,default]) (4.3.8)\n", + "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from jsonschema->ray[client,default]) (2025.4.1)\n", + "Requirement already satisfied: referencing>=0.28.4 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from jsonschema->ray[client,default]) (0.36.2)\n", + "Requirement already satisfied: rpds-py>=0.7.1 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from jsonschema->ray[client,default]) (0.25.1)\n", + "Collecting opencensus-context>=0.1.3 (from opencensus->ray[client,default])\n", + " Downloading opencensus_context-0.1.3-py2.py3-none-any.whl.metadata (3.3 kB)\n", + "Requirement already satisfied: six~=1.16 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from opencensus->ray[client,default]) (1.17.0)\n", + "Collecting google-api-core<3.0.0,>=1.0.0 (from opencensus->ray[client,default])\n", + " Downloading google_api_core-2.25.1-py3-none-any.whl.metadata (3.0 kB)\n", + "Collecting googleapis-common-protos<2.0.0,>=1.56.2 (from google-api-core<3.0.0,>=1.0.0->opencensus->ray[client,default])\n", + " Downloading googleapis_common_protos-1.70.0-py3-none-any.whl.metadata (9.3 kB)\n", + "Collecting proto-plus<2.0.0,>=1.22.3 (from google-api-core<3.0.0,>=1.0.0->opencensus->ray[client,default])\n", + " Downloading proto_plus-1.26.1-py3-none-any.whl.metadata (2.2 kB)\n", + "Requirement already satisfied: google-auth<3.0.0,>=2.14.1 in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from google-api-core<3.0.0,>=1.0.0->opencensus->ray[client,default]) (2.40.3)\n", + "Requirement already satisfied: cachetools<6.0,>=2.0.0 in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from google-auth<3.0.0,>=2.14.1->google-api-core<3.0.0,>=1.0.0->opencensus->ray[client,default]) (5.5.2)\n", + "Requirement already satisfied: pyasn1-modules>=0.2.1 in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from google-auth<3.0.0,>=2.14.1->google-api-core<3.0.0,>=1.0.0->opencensus->ray[client,default]) (0.4.2)\n", + "Requirement already satisfied: rsa<5,>=3.1.4 in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from google-auth<3.0.0,>=2.14.1->google-api-core<3.0.0,>=1.0.0->opencensus->ray[client,default]) (4.9.1)\n", + "Requirement already satisfied: charset_normalizer<4,>=2 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from requests->ray[client,default]) (3.4.2)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from requests->ray[client,default]) (2.4.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from requests->ray[client,default]) (2025.4.26)\n", + "Requirement already satisfied: pyasn1>=0.1.3 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from rsa<5,>=3.1.4->google-auth<3.0.0,>=2.14.1->google-api-core<3.0.0,>=1.0.0->opencensus->ray[client,default]) (0.6.1)\n", + "Collecting wrapt (from smart_open->ray[client,default])\n", + " Downloading wrapt-1.17.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl.metadata (6.4 kB)\n", + "Downloading ray-2.48.0-cp312-cp312-manylinux2014_aarch64.whl (69.2 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m69.2/69.2 MB\u001b[0m \u001b[31m39.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hDownloading msgpack-1.1.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl (425 kB)\n", + "Downloading grpcio-1.74.0-cp312-cp312-manylinux_2_17_aarch64.whl (5.9 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.9/5.9 MB\u001b[0m \u001b[31m50.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading py_spy-0.4.1-py2.py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl (2.0 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m26.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading virtualenv-20.33.1-py3-none-any.whl (6.1 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.1/6.1 MB\u001b[0m \u001b[31m47.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading distlib-0.4.0-py2.py3-none-any.whl (469 kB)\n", + "Downloading aiohttp_cors-0.8.1-py3-none-any.whl (25 kB)\n", + "Downloading colorful-0.5.7-py2.py3-none-any.whl (201 kB)\n", + "Downloading opencensus-0.11.4-py2.py3-none-any.whl (128 kB)\n", + "Downloading google_api_core-2.25.1-py3-none-any.whl (160 kB)\n", + "Downloading googleapis_common_protos-1.70.0-py3-none-any.whl (294 kB)\n", + "Downloading proto_plus-1.26.1-py3-none-any.whl (50 kB)\n", + "Downloading opencensus_context-0.1.3-py2.py3-none-any.whl (5.1 kB)\n", + "Downloading opentelemetry_exporter_prometheus-0.57b0-py3-none-any.whl (12 kB)\n", + "Downloading opentelemetry_proto-1.36.0-py3-none-any.whl (72 kB)\n", + "Downloading smart_open-7.3.0.post1-py3-none-any.whl (61 kB)\n", + "Downloading wrapt-1.17.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl (88 kB)\n", + "Installing collected packages: py-spy, opencensus-context, distlib, colorful, wrapt, virtualenv, proto-plus, opentelemetry-proto, msgpack, grpcio, googleapis-common-protos, smart_open, google-api-core, aiohttp_cors, ray, opencensus, opentelemetry-exporter-prometheus\n", + "\u001b[33m WARNING: The script virtualenv is installed in '/opt/bitnami/jupyterhub-singleuser/.local/bin' which is not on PATH.\n", + " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n", + "\u001b[33m WARNING: The scripts ray, serve and tune are installed in '/opt/bitnami/jupyterhub-singleuser/.local/bin' which is not on PATH.\n", + " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m17/17\u001b[0m [opentelemetry-exporter-prometheus]\n", + "\u001b[1A\u001b[2KSuccessfully installed aiohttp_cors-0.8.1 colorful-0.5.7 distlib-0.4.0 google-api-core-2.25.1 googleapis-common-protos-1.70.0 grpcio-1.74.0 msgpack-1.1.1 opencensus-0.11.4 opencensus-context-0.1.3 opentelemetry-exporter-prometheus-0.57b0 opentelemetry-proto-1.36.0 proto-plus-1.26.1 py-spy-0.4.1 ray-2.48.0 smart_open-7.3.0.post1 virtualenv-20.33.1 wrapt-1.17.2\n" + ] + } + ], + "source": [ + "!pip install \"ray[client,default]\"" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "b137c8a2-4d77-4ef5-b6d0-8bfd07662c51", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025-08-08 11:10:51,416\tINFO worker.py:1606 -- Using address ray://ai-starter-kit-kuberay-head-svc:10001 set in the environment variable RAY_ADDRESS\n", + "2025-08-08 11:10:51,434\tINFO client_builder.py:242 -- Passing the following kwargs to ray.init() on the server: log_to_driver\n", + "SIGTERM handler is not set because current thread is not the main thread.\n", + "2025-08-08 11:10:53,738\tWARNING utils.py:1280 -- Python patch version mismatch: The cluster was started with:\n", + " Ray: 2.48.0\n", + " Python: 3.12.9\n", + "This process on Ray Client was started with:\n", + " Ray: 2.48.0\n", + " Python: 3.12.10\n", + "\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "cb1e38749b8a4ea49b2c2bd4e204e338", + "version_major": 2, + "version_minor": 0 + }, + "text/html": [ + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + "\n", + "
Python version:3.12.9
Ray version:2.48.0
Dashboard:http://10.244.0.9:8265
\n", + "\n", + "
\n", + "
\n" + ], + "text/plain": [ + "ClientContext(dashboard_url='10.244.0.9:8265', python_version='3.12.9', ray_version='2.48.0', ray_commit='2c63f6fdd1995ad8462a8333596a11f2e57f3e05', _num_clients=1, _context_to_restore=)" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import ray\n", + "ray.init()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "ad745c57-152a-4bb2-9d92-165dcd4a5789", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'node:10.244.0.9': 1.0, 'CPU': 2.0, 'memory': 9000000000.0, 'object_store_memory': 1823143525.0, 'node:__internal_head__': 1.0, 'node:10.244.0.10': 1.0}\n" + ] + } + ], + "source": [ + "@ray.remote\n", + "def cluster_resources():\n", + " return ray.cluster_resources()\n", + "\n", + "print(ray.get(cluster_resources.remote()))" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "41b2fb19-69df-42e4-b5b7-f956ecf37bba", + "metadata": {}, + "outputs": [ + { + "ename": "ModuleNotFoundError", + "evalue": "No module named 'vllm'", + "output_type": "error", + "traceback": [ + "\u001b[31m---------------------------------------------------------------------------\u001b[39m", + "\u001b[31mModuleNotFoundError\u001b[39m Traceback (most recent call last)", + "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[4]\u001b[39m\u001b[32m, line 4\u001b[39m\n\u001b[32m 1\u001b[39m \u001b[38;5;66;03m# here is the reference https://docs.ray.io/en/latest/serve/llm/serving-llms.html#deployment-through-llmrouter\u001b[39;00m\n\u001b[32m 3\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mray\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m serve\n\u001b[32m----> \u001b[39m\u001b[32m4\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mray\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mserve\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m LLMConfig, build_openai_app\n\u001b[32m 6\u001b[39m llm_config = LLMConfig(\n\u001b[32m 7\u001b[39m model_loading_config=\u001b[38;5;28mdict\u001b[39m(\n\u001b[32m 8\u001b[39m model_id=\u001b[33m\"\u001b[39m\u001b[33mqwen-0.5b\u001b[39m\u001b[33m\"\u001b[39m,\n\u001b[32m (...)\u001b[39m\u001b[32m 20\u001b[39m ),\n\u001b[32m 21\u001b[39m )\n\u001b[32m 23\u001b[39m app = build_openai_app({\u001b[33m\"\u001b[39m\u001b[33mllm_configs\u001b[39m\u001b[33m\"\u001b[39m: [llm_config]})\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/.local/lib/python3.12/site-packages/ray/serve/llm/__init__.py:10\u001b[39m\n\u001b[32m 1\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mtyping\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m TYPE_CHECKING, Optional\n\u001b[32m 3\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mray\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01m_internal\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mserve\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mconfigs\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mserver_models\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[32m 4\u001b[39m CloudMirrorConfig \u001b[38;5;28;01mas\u001b[39;00m _CloudMirrorConfig,\n\u001b[32m 5\u001b[39m LLMConfig \u001b[38;5;28;01mas\u001b[39;00m _LLMConfig,\n\u001b[32m (...)\u001b[39m\u001b[32m 8\u001b[39m ModelLoadingConfig \u001b[38;5;28;01mas\u001b[39;00m _ModelLoadingConfig,\n\u001b[32m 9\u001b[39m )\n\u001b[32m---> \u001b[39m\u001b[32m10\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mray\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01m_internal\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mserve\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mdeployments\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm_server\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[32m 11\u001b[39m LLMServer \u001b[38;5;28;01mas\u001b[39;00m _LLMServer,\n\u001b[32m 12\u001b[39m )\n\u001b[32m 13\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mray\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01m_internal\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mserve\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mdeployments\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mrouters\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mrouter\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[32m 14\u001b[39m LLMRouter \u001b[38;5;28;01mas\u001b[39;00m _LLMRouter,\n\u001b[32m 15\u001b[39m )\n\u001b[32m 16\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mray\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mutil\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mannotations\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m PublicAPI\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/.local/lib/python3.12/site-packages/ray/llm/_internal/serve/deployments/llm/llm_server.py:52\u001b[39m\n\u001b[32m 48\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mray\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01m_internal\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mserve\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mdeployments\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm_engine\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m LLMEngine\n\u001b[32m 49\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mray\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01m_internal\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mserve\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mdeployments\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mmultiplex\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mlora_model_loader\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[32m 50\u001b[39m LoraModelLoader,\n\u001b[32m 51\u001b[39m )\n\u001b[32m---> \u001b[39m\u001b[32m52\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mray\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01m_internal\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mserve\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mdeployments\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mvllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mvllm_engine\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m VLLMEngine\n\u001b[32m 53\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mray\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01m_internal\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mserve\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mdeployments\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mvllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mvllm_models\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[32m 54\u001b[39m VLLMEmbeddingRequest,\n\u001b[32m 55\u001b[39m )\n\u001b[32m 56\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mray\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01m_internal\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mserve\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mdeployments\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mutils\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mbatcher\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m OpenAIResponseBatcher\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/.local/lib/python3.12/site-packages/ray/llm/_internal/serve/deployments/llm/vllm/vllm_engine.py:38\u001b[39m\n\u001b[32m 32\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mray\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01m_internal\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mserve\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mdeployments\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm_engine\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m LLMEngine\n\u001b[32m 33\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mray\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01m_internal\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mserve\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mdeployments\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mvllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mvllm_engine_stats\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[32m 34\u001b[39m ArgUsage,\n\u001b[32m 35\u001b[39m VLLMEngineStatTracker,\n\u001b[32m 36\u001b[39m usage_counters,\n\u001b[32m 37\u001b[39m )\n\u001b[32m---> \u001b[39m\u001b[32m38\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mray\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01m_internal\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mserve\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mdeployments\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mvllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mvllm_models\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[32m 39\u001b[39m KV_TRANSFER_PARAMS_KEY,\n\u001b[32m 40\u001b[39m VLLMEmbeddingRequest,\n\u001b[32m 41\u001b[39m VLLMEngineConfig,\n\u001b[32m 42\u001b[39m VLLMGenerationRequest,\n\u001b[32m 43\u001b[39m VLLMSamplingParams,\n\u001b[32m 44\u001b[39m )\n\u001b[32m 45\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mray\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01m_internal\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mserve\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mdeployments\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mutils\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mnode_initialization_utils\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[32m 46\u001b[39m InitializeNodeOutput,\n\u001b[32m 47\u001b[39m initialize_node,\n\u001b[32m 48\u001b[39m )\n\u001b[32m 49\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mray\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01m_internal\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mserve\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mdeployments\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mutils\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mserver_utils\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m floats_to_base64\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/.local/lib/python3.12/site-packages/ray/llm/_internal/serve/deployments/llm/vllm/vllm_models.py:6\u001b[39m\n\u001b[32m 3\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mtyping\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m TYPE_CHECKING, Any, Dict, List, Literal, Optional, Union\n\u001b[32m 5\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mpydantic\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m ConfigDict, Field, ValidationError, field_validator\n\u001b[32m----> \u001b[39m\u001b[32m6\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mvllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mengine\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01marg_utils\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m AsyncEngineArgs\n\u001b[32m 8\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mray\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01m_internal\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mcommon\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mbase_pydantic\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m BaseModelExtended\n\u001b[32m 9\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mray\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01m_internal\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mcommon\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mutils\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mcloud_utils\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m CloudMirrorConfig\n", + "\u001b[31mModuleNotFoundError\u001b[39m: No module named 'vllm'" + ] + } + ], + "source": [ + "# here is the reference https://docs.ray.io/en/latest/serve/llm/serving-llms.html#deployment-through-llmrouter\n", + "\n", + "from ray import serve\n", + "from ray.serve.llm import LLMConfig, build_openai_app\n", + "\n", + "llm_config = LLMConfig(\n", + " model_loading_config=dict(\n", + " model_id=\"qwen-0.5b\",\n", + " model_source=\"Qwen/Qwen2.5-0.5B-Instruct\",\n", + " ),\n", + " deployment_config=dict(\n", + " autoscaling_config=dict(\n", + " min_replicas=1, max_replicas=2,\n", + " )\n", + " ),\n", + " # The accelerator_type and tensor_parallel_size are removed\n", + " # or commented out as they are not applicable to the M1 chip\n", + " engine_kwargs=dict(\n", + " # tensor_parallel_size=2, # Removed for single-device inference\n", + " ),\n", + ")\n", + "\n", + "app = build_openai_app({\"llm_configs\": [llm_config]})\n", + "serve.run(app, blocking=True)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 8c2762d2ee3b2cee49ad612bc17094310a599a18 Mon Sep 17 00:00:00 2001 From: Dima Drogovoz Date: Wed, 13 Aug 2025 16:26:55 +0100 Subject: [PATCH 3/6] update ray.ipynb --- .../helm-chart/ai-starter-kit/files/ray.ipynb | 541 ++++++++++++------ 1 file changed, 360 insertions(+), 181 deletions(-) diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/ray.ipynb b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/ray.ipynb index daeb6c88..ef1ea066 100644 --- a/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/ray.ipynb +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/ray.ipynb @@ -1,100 +1,271 @@ { "cells": [ + { + "cell_type": "markdown", + "id": "1c5f9954-1e94-45f4-a4a0-a02d05bd268f", + "metadata": {}, + "source": [ + "# MLFlow and Ray example\n", + "\n", + "In this notebook we will train an ML model and deploy it in to Ray cluster." + ] + }, + { + "cell_type": "markdown", + "id": "a4f320db-fb65-40b3-89b1-789711133e3d", + "metadata": {}, + "source": [ + "## MLFlow experiment tracking\n", + "\n", + "Here we train the ML model and log metrics using MLFlow tracking server." + ] + }, { "cell_type": "code", "execution_count": 1, - "id": "8dd42829-7ccc-4d00-9d5c-b206c93d6586", + "id": "d17448f3-dc2f-4fa7-8c6c-58ec4e0f8f02", "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025/08/12 15:00:18 INFO mlflow.tracking.fluent: Experiment with name 'Diabetes_Prediction_PyTorch' does not exist. Creating a new experiment.\n", + "2025/08/12 15:00:18 WARNING mlflow.tracking.context.registry: Encountered unexpected error during resolving tags: 'getpwuid(): uid not found: 1001'\n", + "2025/08/12 15:00:18 WARNING mlflow.utils.git_utils: Failed to import Git (the Git executable is probably not on your PATH), so Git SHA is not available. Error: Failed to initialize: Bad git executable.\n", + "The git executable must be specified in one of the following ways:\n", + " - be included in your $PATH\n", + " - be set via $GIT_PYTHON_GIT_EXECUTABLE\n", + " - explicitly set via git.refresh()\n", + "\n", + "All git commands will error until this is rectified.\n", + "\n", + "This initial message can be silenced or aggravated in the future by setting the\n", + "$GIT_PYTHON_REFRESH environment variable. Use one of the following values:\n", + " - quiet|q|silence|s|silent|none|n|0: for no message or exception\n", + " - warn|w|warning|log|l|1: for a warning message (logging level CRITICAL, displayed by default)\n", + " - error|e|exception|raise|r|2: for a raised exception\n", + "\n", + "Example:\n", + " export GIT_PYTHON_REFRESH=quiet\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "🏃 View run stately-kite-741 at: http://ai-starter-kit-mlflow:5000/#/experiments/1/runs/872da23f5a5541a39c0f893adbe53466\n", + "🧪 View experiment at: http://ai-starter-kit-mlflow:5000/#/experiments/1\n" + ] + } + ], + "source": [ + "import torch\n", + "import torch.nn as nn\n", + "import torch.optim as optim\n", + "from sklearn.datasets import load_diabetes\n", + "from sklearn.model_selection import train_test_split\n", + "import numpy as np\n", + "import mlflow\n", + "import mlflow.pytorch\n", + "\n", + "# -------------------\n", + "# Prepare Data\n", + "# -------------------\n", + "mlflow.set_tracking_uri(uri=\"http://ai-starter-kit-mlflow:5000\")\n", + "data = load_diabetes()\n", + "X = data.data\n", + "y = data.target.reshape(-1, 1)\n", + "\n", + "X_train, X_test, y_train, y_test = train_test_split(\n", + " X, y, test_size=0.2, random_state=42\n", + ")\n", + "\n", + "X_train_tensor = torch.tensor(X_train, dtype=torch.float32)\n", + "y_train_tensor = torch.tensor(y_train, dtype=torch.float32)\n", + "X_test_tensor = torch.tensor(X_test, dtype=torch.float32)\n", + "y_test_tensor = torch.tensor(y_test, dtype=torch.float32)\n", + "\n", + "# -------------------\n", + "# Define Model\n", + "# -------------------\n", + "class RegressionModel(nn.Module):\n", + " def __init__(self, input_dim):\n", + " super().__init__()\n", + " self.linear = nn.Linear(input_dim, 1)\n", + "\n", + " def forward(self, x):\n", + " return self.linear(x)\n", + "\n", + "input_dim = X_train.shape[1]\n", + "model = RegressionModel(input_dim)\n", + "\n", + "# -------------------\n", + "# Training\n", + "# -------------------\n", + "epochs = 100\n", + "lr = 0.01\n", + "\n", + "criterion = nn.MSELoss()\n", + "optimizer = optim.Adam(model.parameters(), lr=lr)\n", + "\n", + "mlflow.set_experiment(\"Diabetes_Prediction_PyTorch\")\n", + "\n", + "with mlflow.start_run():\n", + " mlflow.log_param(\"epochs\", epochs)\n", + " mlflow.log_param(\"learning_rate\", lr)\n", + " mlflow.log_param(\"optimizer\", \"Adam\")\n", + " mlflow.log_param(\"loss_fn\", \"MSELoss\")\n", + " mlflow.log_param(\"input_features\", input_dim)\n", + "\n", + " for epoch in range(epochs):\n", + " model.train()\n", + " optimizer.zero_grad()\n", + " outputs = model(X_train_tensor)\n", + " loss = criterion(outputs, y_train_tensor)\n", + " loss.backward()\n", + " optimizer.step()\n", + "\n", + " # -------------------\n", + " # Evaluation\n", + " # -------------------\n", + " model.eval()\n", + " with torch.no_grad():\n", + " preds = model(X_test_tensor)\n", + " mse = criterion(preds, y_test_tensor).item()\n", + " rmse = np.sqrt(mse)\n", + "\n", + " mlflow.log_metric(\"mse\", mse)\n", + " mlflow.log_metric(\"rmse\", rmse)\n", + "\n", + " # # Log model to MLflow\n", + " # mlflow.pytorch.log_model(model, \"pytorch_model\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "208f1ba2-3ea2-4487-a18d-61778d83f5ae", + "metadata": {}, + "source": [ + "## Ray deployment\n", + "\n", + "In this step we will use the model from the previous step to deploy it to our Ray cluster." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "d2c6009a-991a-49e7-b345-e3e6d8cea648", + "metadata": { + "scrolled": true + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Defaulting to user installation because normal site-packages is not writeable\n", - "Collecting ray[client,default]\n", + "Collecting ray[client,default,serve]\n", " Downloading ray-2.48.0-cp312-cp312-manylinux2014_aarch64.whl.metadata (19 kB)\n", - "Requirement already satisfied: click>=7.0 in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from ray[client,default]) (8.2.1)\n", - "Requirement already satisfied: filelock in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from ray[client,default]) (3.18.0)\n", - "Requirement already satisfied: jsonschema in /opt/bitnami/miniforge/lib/python3.12/site-packages (from ray[client,default]) (4.24.0)\n", - "Collecting msgpack<2.0.0,>=1.0.0 (from ray[client,default])\n", + "Requirement already satisfied: click>=7.0 in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from ray[client,default,serve]) (8.2.1)\n", + "Requirement already satisfied: filelock in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from ray[client,default,serve]) (3.18.0)\n", + "Requirement already satisfied: jsonschema in /opt/bitnami/miniforge/lib/python3.12/site-packages (from ray[client,default,serve]) (4.24.0)\n", + "Collecting msgpack<2.0.0,>=1.0.0 (from ray[client,default,serve])\n", " Downloading msgpack-1.1.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl.metadata (8.4 kB)\n", - "Requirement already satisfied: packaging in /opt/bitnami/miniforge/lib/python3.12/site-packages (from ray[client,default]) (25.0)\n", - "Requirement already satisfied: protobuf!=3.19.5,>=3.15.3 in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from ray[client,default]) (6.31.1)\n", - "Requirement already satisfied: pyyaml in /opt/bitnami/miniforge/lib/python3.12/site-packages (from ray[client,default]) (6.0.2)\n", - "Requirement already satisfied: requests in /opt/bitnami/miniforge/lib/python3.12/site-packages (from ray[client,default]) (2.32.3)\n", - "Collecting grpcio (from ray[client,default])\n", + "Requirement already satisfied: packaging in /opt/bitnami/miniforge/lib/python3.12/site-packages (from ray[client,default,serve]) (25.0)\n", + "Requirement already satisfied: protobuf!=3.19.5,>=3.15.3 in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from ray[client,default,serve]) (6.31.1)\n", + "Requirement already satisfied: pyyaml in /opt/bitnami/miniforge/lib/python3.12/site-packages (from ray[client,default,serve]) (6.0.2)\n", + "Requirement already satisfied: requests in /opt/bitnami/miniforge/lib/python3.12/site-packages (from ray[client,default,serve]) (2.32.3)\n", + "Collecting grpcio (from ray[client,default,serve])\n", " Downloading grpcio-1.74.0-cp312-cp312-manylinux_2_17_aarch64.whl.metadata (3.8 kB)\n", - "Requirement already satisfied: aiohttp>=3.7 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from ray[client,default]) (3.12.12)\n", - "Collecting aiohttp_cors (from ray[client,default])\n", + "Requirement already satisfied: aiohttp>=3.7 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from ray[client,default,serve]) (3.12.12)\n", + "Collecting aiohttp_cors (from ray[client,default,serve])\n", " Downloading aiohttp_cors-0.8.1-py3-none-any.whl.metadata (20 kB)\n", - "Collecting colorful (from ray[client,default])\n", + "Collecting colorful (from ray[client,default,serve])\n", " Downloading colorful-0.5.7-py2.py3-none-any.whl.metadata (16 kB)\n", - "Collecting py-spy>=0.4.0 (from ray[client,default])\n", + "Collecting py-spy>=0.4.0 (from ray[client,default,serve])\n", " Downloading py_spy-0.4.1-py2.py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl.metadata (510 bytes)\n", - "Collecting opencensus (from ray[client,default])\n", + "Collecting opencensus (from ray[client,default,serve])\n", " Downloading opencensus-0.11.4-py2.py3-none-any.whl.metadata (12 kB)\n", - "Requirement already satisfied: opentelemetry-sdk>=1.30.0 in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from ray[client,default]) (1.36.0)\n", - "Collecting opentelemetry-exporter-prometheus (from ray[client,default])\n", + "Requirement already satisfied: opentelemetry-sdk>=1.30.0 in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from ray[client,default,serve]) (1.36.0)\n", + "Collecting opentelemetry-exporter-prometheus (from ray[client,default,serve])\n", " Downloading opentelemetry_exporter_prometheus-0.57b0-py3-none-any.whl.metadata (1.8 kB)\n", - "Collecting opentelemetry-proto (from ray[client,default])\n", + "Collecting opentelemetry-proto (from ray[client,default,serve])\n", " Downloading opentelemetry_proto-1.36.0-py3-none-any.whl.metadata (2.3 kB)\n", - "Requirement already satisfied: pydantic!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,<3 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from ray[client,default]) (2.11.5)\n", - "Requirement already satisfied: prometheus_client>=0.7.1 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from ray[client,default]) (0.22.1)\n", - "Collecting smart_open (from ray[client,default])\n", + "Requirement already satisfied: pydantic!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,<3 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from ray[client,default,serve]) (2.11.5)\n", + "Requirement already satisfied: prometheus_client>=0.7.1 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from ray[client,default,serve]) (0.22.1)\n", + "Collecting smart_open (from ray[client,default,serve])\n", " Downloading smart_open-7.3.0.post1-py3-none-any.whl.metadata (24 kB)\n", - "Collecting virtualenv!=20.21.1,>=20.0.24 (from ray[client,default])\n", + "Collecting virtualenv!=20.21.1,>=20.0.24 (from ray[client,default,serve])\n", " Downloading virtualenv-20.33.1-py3-none-any.whl.metadata (4.5 kB)\n", - "Requirement already satisfied: annotated-types>=0.6.0 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from pydantic!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,<3->ray[client,default]) (0.7.0)\n", - "Requirement already satisfied: pydantic-core==2.33.2 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from pydantic!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,<3->ray[client,default]) (2.33.2)\n", - "Requirement already satisfied: typing-extensions>=4.12.2 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from pydantic!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,<3->ray[client,default]) (4.14.0)\n", - "Requirement already satisfied: typing-inspection>=0.4.0 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from pydantic!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,<3->ray[client,default]) (0.4.1)\n", - "Requirement already satisfied: aiohappyeyeballs>=2.5.0 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from aiohttp>=3.7->ray[client,default]) (2.6.1)\n", - "Requirement already satisfied: aiosignal>=1.1.2 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from aiohttp>=3.7->ray[client,default]) (1.3.2)\n", - "Requirement already satisfied: attrs>=17.3.0 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from aiohttp>=3.7->ray[client,default]) (25.3.0)\n", - "Requirement already satisfied: frozenlist>=1.1.1 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from aiohttp>=3.7->ray[client,default]) (1.7.0)\n", - "Requirement already satisfied: multidict<7.0,>=4.5 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from aiohttp>=3.7->ray[client,default]) (6.4.4)\n", - "Requirement already satisfied: propcache>=0.2.0 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from aiohttp>=3.7->ray[client,default]) (0.3.2)\n", - "Requirement already satisfied: yarl<2.0,>=1.17.0 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from aiohttp>=3.7->ray[client,default]) (1.20.1)\n", - "Requirement already satisfied: idna>=2.0 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from yarl<2.0,>=1.17.0->aiohttp>=3.7->ray[client,default]) (3.10)\n", - "Requirement already satisfied: opentelemetry-api==1.36.0 in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from opentelemetry-sdk>=1.30.0->ray[client,default]) (1.36.0)\n", - "Requirement already satisfied: opentelemetry-semantic-conventions==0.57b0 in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from opentelemetry-sdk>=1.30.0->ray[client,default]) (0.57b0)\n", - "Requirement already satisfied: importlib-metadata<8.8.0,>=6.0 in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from opentelemetry-api==1.36.0->opentelemetry-sdk>=1.30.0->ray[client,default]) (8.7.0)\n", - "Requirement already satisfied: zipp>=3.20 in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from importlib-metadata<8.8.0,>=6.0->opentelemetry-api==1.36.0->opentelemetry-sdk>=1.30.0->ray[client,default]) (3.23.0)\n", - "Collecting distlib<1,>=0.3.7 (from virtualenv!=20.21.1,>=20.0.24->ray[client,default])\n", + "Collecting watchfiles (from ray[client,default,serve])\n", + " Downloading watchfiles-1.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl.metadata (4.9 kB)\n", + "Requirement already satisfied: uvicorn[standard] in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from ray[client,default,serve]) (0.35.0)\n", + "Requirement already satisfied: fastapi in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from ray[client,default,serve]) (0.116.1)\n", + "Requirement already satisfied: starlette in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from ray[client,default,serve]) (0.47.2)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from pydantic!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,<3->ray[client,default,serve]) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.33.2 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from pydantic!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,<3->ray[client,default,serve]) (2.33.2)\n", + "Requirement already satisfied: typing-extensions>=4.12.2 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from pydantic!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,<3->ray[client,default,serve]) (4.14.0)\n", + "Requirement already satisfied: typing-inspection>=0.4.0 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from pydantic!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,<3->ray[client,default,serve]) (0.4.1)\n", + "Requirement already satisfied: aiohappyeyeballs>=2.5.0 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from aiohttp>=3.7->ray[client,default,serve]) (2.6.1)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from aiohttp>=3.7->ray[client,default,serve]) (1.3.2)\n", + "Requirement already satisfied: attrs>=17.3.0 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from aiohttp>=3.7->ray[client,default,serve]) (25.3.0)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from aiohttp>=3.7->ray[client,default,serve]) (1.7.0)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from aiohttp>=3.7->ray[client,default,serve]) (6.4.4)\n", + "Requirement already satisfied: propcache>=0.2.0 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from aiohttp>=3.7->ray[client,default,serve]) (0.3.2)\n", + "Requirement already satisfied: yarl<2.0,>=1.17.0 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from aiohttp>=3.7->ray[client,default,serve]) (1.20.1)\n", + "Requirement already satisfied: idna>=2.0 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from yarl<2.0,>=1.17.0->aiohttp>=3.7->ray[client,default,serve]) (3.10)\n", + "Requirement already satisfied: opentelemetry-api==1.36.0 in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from opentelemetry-sdk>=1.30.0->ray[client,default,serve]) (1.36.0)\n", + "Requirement already satisfied: opentelemetry-semantic-conventions==0.57b0 in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from opentelemetry-sdk>=1.30.0->ray[client,default,serve]) (0.57b0)\n", + "Requirement already satisfied: importlib-metadata<8.8.0,>=6.0 in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from opentelemetry-api==1.36.0->opentelemetry-sdk>=1.30.0->ray[client,default,serve]) (8.7.0)\n", + "Requirement already satisfied: zipp>=3.20 in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from importlib-metadata<8.8.0,>=6.0->opentelemetry-api==1.36.0->opentelemetry-sdk>=1.30.0->ray[client,default,serve]) (3.23.0)\n", + "Collecting distlib<1,>=0.3.7 (from virtualenv!=20.21.1,>=20.0.24->ray[client,default,serve])\n", " Downloading distlib-0.4.0-py2.py3-none-any.whl.metadata (5.2 kB)\n", - "Requirement already satisfied: platformdirs<5,>=3.9.1 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from virtualenv!=20.21.1,>=20.0.24->ray[client,default]) (4.3.8)\n", - "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from jsonschema->ray[client,default]) (2025.4.1)\n", - "Requirement already satisfied: referencing>=0.28.4 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from jsonschema->ray[client,default]) (0.36.2)\n", - "Requirement already satisfied: rpds-py>=0.7.1 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from jsonschema->ray[client,default]) (0.25.1)\n", - "Collecting opencensus-context>=0.1.3 (from opencensus->ray[client,default])\n", + "Requirement already satisfied: platformdirs<5,>=3.9.1 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from virtualenv!=20.21.1,>=20.0.24->ray[client,default,serve]) (4.3.8)\n", + "Requirement already satisfied: anyio<5,>=3.6.2 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from starlette->ray[client,default,serve]) (4.9.0)\n", + "Requirement already satisfied: sniffio>=1.1 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from anyio<5,>=3.6.2->starlette->ray[client,default,serve]) (1.3.1)\n", + "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from jsonschema->ray[client,default,serve]) (2025.4.1)\n", + "Requirement already satisfied: referencing>=0.28.4 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from jsonschema->ray[client,default,serve]) (0.36.2)\n", + "Requirement already satisfied: rpds-py>=0.7.1 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from jsonschema->ray[client,default,serve]) (0.25.1)\n", + "Collecting opencensus-context>=0.1.3 (from opencensus->ray[client,default,serve])\n", " Downloading opencensus_context-0.1.3-py2.py3-none-any.whl.metadata (3.3 kB)\n", - "Requirement already satisfied: six~=1.16 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from opencensus->ray[client,default]) (1.17.0)\n", - "Collecting google-api-core<3.0.0,>=1.0.0 (from opencensus->ray[client,default])\n", + "Requirement already satisfied: six~=1.16 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from opencensus->ray[client,default,serve]) (1.17.0)\n", + "Collecting google-api-core<3.0.0,>=1.0.0 (from opencensus->ray[client,default,serve])\n", " Downloading google_api_core-2.25.1-py3-none-any.whl.metadata (3.0 kB)\n", - "Collecting googleapis-common-protos<2.0.0,>=1.56.2 (from google-api-core<3.0.0,>=1.0.0->opencensus->ray[client,default])\n", + "Collecting googleapis-common-protos<2.0.0,>=1.56.2 (from google-api-core<3.0.0,>=1.0.0->opencensus->ray[client,default,serve])\n", " Downloading googleapis_common_protos-1.70.0-py3-none-any.whl.metadata (9.3 kB)\n", - "Collecting proto-plus<2.0.0,>=1.22.3 (from google-api-core<3.0.0,>=1.0.0->opencensus->ray[client,default])\n", + "Collecting proto-plus<2.0.0,>=1.22.3 (from google-api-core<3.0.0,>=1.0.0->opencensus->ray[client,default,serve])\n", " Downloading proto_plus-1.26.1-py3-none-any.whl.metadata (2.2 kB)\n", - "Requirement already satisfied: google-auth<3.0.0,>=2.14.1 in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from google-api-core<3.0.0,>=1.0.0->opencensus->ray[client,default]) (2.40.3)\n", - "Requirement already satisfied: cachetools<6.0,>=2.0.0 in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from google-auth<3.0.0,>=2.14.1->google-api-core<3.0.0,>=1.0.0->opencensus->ray[client,default]) (5.5.2)\n", - "Requirement already satisfied: pyasn1-modules>=0.2.1 in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from google-auth<3.0.0,>=2.14.1->google-api-core<3.0.0,>=1.0.0->opencensus->ray[client,default]) (0.4.2)\n", - "Requirement already satisfied: rsa<5,>=3.1.4 in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from google-auth<3.0.0,>=2.14.1->google-api-core<3.0.0,>=1.0.0->opencensus->ray[client,default]) (4.9.1)\n", - "Requirement already satisfied: charset_normalizer<4,>=2 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from requests->ray[client,default]) (3.4.2)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from requests->ray[client,default]) (2.4.0)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from requests->ray[client,default]) (2025.4.26)\n", - "Requirement already satisfied: pyasn1>=0.1.3 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from rsa<5,>=3.1.4->google-auth<3.0.0,>=2.14.1->google-api-core<3.0.0,>=1.0.0->opencensus->ray[client,default]) (0.6.1)\n", - "Collecting wrapt (from smart_open->ray[client,default])\n", - " Downloading wrapt-1.17.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl.metadata (6.4 kB)\n", + "Requirement already satisfied: google-auth<3.0.0,>=2.14.1 in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from google-api-core<3.0.0,>=1.0.0->opencensus->ray[client,default,serve]) (2.40.3)\n", + "Requirement already satisfied: cachetools<6.0,>=2.0.0 in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from google-auth<3.0.0,>=2.14.1->google-api-core<3.0.0,>=1.0.0->opencensus->ray[client,default,serve]) (5.5.2)\n", + "Requirement already satisfied: pyasn1-modules>=0.2.1 in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from google-auth<3.0.0,>=2.14.1->google-api-core<3.0.0,>=1.0.0->opencensus->ray[client,default,serve]) (0.4.2)\n", + "Requirement already satisfied: rsa<5,>=3.1.4 in /opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages (from google-auth<3.0.0,>=2.14.1->google-api-core<3.0.0,>=1.0.0->opencensus->ray[client,default,serve]) (4.9.1)\n", + "Requirement already satisfied: charset_normalizer<4,>=2 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from requests->ray[client,default,serve]) (3.4.2)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from requests->ray[client,default,serve]) (2.4.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from requests->ray[client,default,serve]) (2025.4.26)\n", + "Requirement already satisfied: pyasn1>=0.1.3 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from rsa<5,>=3.1.4->google-auth<3.0.0,>=2.14.1->google-api-core<3.0.0,>=1.0.0->opencensus->ray[client,default,serve]) (0.6.1)\n", + "Collecting wrapt (from smart_open->ray[client,default,serve])\n", + " Downloading wrapt-1.17.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl.metadata (6.4 kB)\n", + "Requirement already satisfied: h11>=0.8 in /opt/bitnami/miniforge/lib/python3.12/site-packages (from uvicorn[standard]; extra == \"serve\"->ray[client,default,serve]) (0.16.0)\n", + "Collecting httptools>=0.6.3 (from uvicorn[standard]; extra == \"serve\"->ray[client,default,serve])\n", + " Downloading httptools-0.6.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl.metadata (3.6 kB)\n", + "Collecting python-dotenv>=0.13 (from uvicorn[standard]; extra == \"serve\"->ray[client,default,serve])\n", + " Downloading python_dotenv-1.1.1-py3-none-any.whl.metadata (24 kB)\n", + "Collecting uvloop>=0.15.1 (from uvicorn[standard]; extra == \"serve\"->ray[client,default,serve])\n", + " Downloading uvloop-0.21.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl.metadata (4.9 kB)\n", + "Collecting websockets>=10.4 (from uvicorn[standard]; extra == \"serve\"->ray[client,default,serve])\n", + " Downloading websockets-15.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl.metadata (6.8 kB)\n", "Downloading ray-2.48.0-cp312-cp312-manylinux2014_aarch64.whl (69.2 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m69.2/69.2 MB\u001b[0m \u001b[31m39.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", - "\u001b[?25hDownloading msgpack-1.1.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl (425 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m69.2/69.2 MB\u001b[0m \u001b[31m36.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", + "Downloading msgpack-1.1.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl (425 kB)\n", "Downloading grpcio-1.74.0-cp312-cp312-manylinux_2_17_aarch64.whl (5.9 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.9/5.9 MB\u001b[0m \u001b[31m50.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.9/5.9 MB\u001b[0m \u001b[31m38.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading py_spy-0.4.1-py2.py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl (2.0 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m26.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m37.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading virtualenv-20.33.1-py3-none-any.whl (6.1 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.1/6.1 MB\u001b[0m \u001b[31m47.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading distlib-0.4.0-py2.py3-none-any.whl (469 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.1/6.1 MB\u001b[0m \u001b[31m40.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "Downloading distlib-0.4.0-py2.py3-none-any.whl (469 kB)\n", "Downloading aiohttp_cors-0.8.1-py3-none-any.whl (25 kB)\n", "Downloading colorful-0.5.7-py2.py3-none-any.whl (201 kB)\n", "Downloading opencensus-0.11.4-py2.py3-none-any.whl (128 kB)\n", @@ -105,169 +276,177 @@ "Downloading opentelemetry_exporter_prometheus-0.57b0-py3-none-any.whl (12 kB)\n", "Downloading opentelemetry_proto-1.36.0-py3-none-any.whl (72 kB)\n", "Downloading smart_open-7.3.0.post1-py3-none-any.whl (61 kB)\n", - "Downloading wrapt-1.17.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl (88 kB)\n", - "Installing collected packages: py-spy, opencensus-context, distlib, colorful, wrapt, virtualenv, proto-plus, opentelemetry-proto, msgpack, grpcio, googleapis-common-protos, smart_open, google-api-core, aiohttp_cors, ray, opencensus, opentelemetry-exporter-prometheus\n", + "Downloading httptools-0.6.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl (508 kB)\n", + "Downloading python_dotenv-1.1.1-py3-none-any.whl (20 kB)\n", + "Downloading uvloop-0.21.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl (4.6 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.6/4.6 MB\u001b[0m \u001b[31m32.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading watchfiles-1.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl (450 kB)\n", + "Downloading websockets-15.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl (183 kB)\n", + "Downloading wrapt-1.17.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl (88 kB)\n", + "Installing collected packages: py-spy, opencensus-context, distlib, colorful, wrapt, websockets, virtualenv, uvloop, python-dotenv, proto-plus, opentelemetry-proto, msgpack, httptools, grpcio, googleapis-common-protos, watchfiles, smart_open, google-api-core, aiohttp_cors, ray, opencensus, opentelemetry-exporter-prometheus\n", + "\u001b[33m WARNING: The script websockets is installed in '/opt/bitnami/jupyterhub-singleuser/.local/bin' which is not on PATH.\n", + " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n", "\u001b[33m WARNING: The script virtualenv is installed in '/opt/bitnami/jupyterhub-singleuser/.local/bin' which is not on PATH.\n", " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n", + "\u001b[33m WARNING: The script dotenv is installed in '/opt/bitnami/jupyterhub-singleuser/.local/bin' which is not on PATH.\n", + " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n", + "\u001b[33m WARNING: The script watchfiles is installed in '/opt/bitnami/jupyterhub-singleuser/.local/bin' which is not on PATH.\n", + " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n", "\u001b[33m WARNING: The scripts ray, serve and tune are installed in '/opt/bitnami/jupyterhub-singleuser/.local/bin' which is not on PATH.\n", " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m17/17\u001b[0m [opentelemetry-exporter-prometheus]\n", - "\u001b[1A\u001b[2KSuccessfully installed aiohttp_cors-0.8.1 colorful-0.5.7 distlib-0.4.0 google-api-core-2.25.1 googleapis-common-protos-1.70.0 grpcio-1.74.0 msgpack-1.1.1 opencensus-0.11.4 opencensus-context-0.1.3 opentelemetry-exporter-prometheus-0.57b0 opentelemetry-proto-1.36.0 proto-plus-1.26.1 py-spy-0.4.1 ray-2.48.0 smart_open-7.3.0.post1 virtualenv-20.33.1 wrapt-1.17.2\n" + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m22/22\u001b[0m [opentelemetry-exporter-prometheus]\n", + "\u001b[1A\u001b[2KSuccessfully installed aiohttp_cors-0.8.1 colorful-0.5.7 distlib-0.4.0 google-api-core-2.25.1 googleapis-common-protos-1.70.0 grpcio-1.74.0 httptools-0.6.4 msgpack-1.1.1 opencensus-0.11.4 opencensus-context-0.1.3 opentelemetry-exporter-prometheus-0.57b0 opentelemetry-proto-1.36.0 proto-plus-1.26.1 py-spy-0.4.1 python-dotenv-1.1.1 ray-2.48.0 smart_open-7.3.0.post1 uvloop-0.21.0 virtualenv-20.33.1 watchfiles-1.1.0 websockets-15.0.1 wrapt-1.17.3\n" ] } ], "source": [ - "!pip install \"ray[client,default]\"" + "!pip install \"ray[serve,client,default]\"" ] }, { "cell_type": "code", - "execution_count": 2, - "id": "b137c8a2-4d77-4ef5-b6d0-8bfd07662c51", + "execution_count": 3, + "id": "5be0b7fa-0815-4224-a4e6-ad5b74647e20", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "2025-08-08 11:10:51,416\tINFO worker.py:1606 -- Using address ray://ai-starter-kit-kuberay-head-svc:10001 set in the environment variable RAY_ADDRESS\n", - "2025-08-08 11:10:51,434\tINFO client_builder.py:242 -- Passing the following kwargs to ray.init() on the server: log_to_driver\n", + "2025-08-12 15:00:37,849\tINFO worker.py:1606 -- Using address ray://ai-starter-kit-kuberay-head-svc:10001 set in the environment variable RAY_ADDRESS\n", + "2025-08-12 15:00:37,850\tINFO client_builder.py:242 -- Passing the following kwargs to ray.init() on the server: log_to_driver\n", "SIGTERM handler is not set because current thread is not the main thread.\n", - "2025-08-08 11:10:53,738\tWARNING utils.py:1280 -- Python patch version mismatch: The cluster was started with:\n", + "2025-08-12 15:00:39,818\tWARNING utils.py:1280 -- Python patch version mismatch: The cluster was started with:\n", " Ray: 2.48.0\n", " Python: 3.12.9\n", "This process on Ray Client was started with:\n", " Ray: 2.48.0\n", " Python: 3.12.10\n", - "\n" + "\n", + "\u001b[36m(ProxyActor pid=3272)\u001b[0m INFO 2025-08-12 08:00:44,231 proxy 10.244.0.9 -- Proxy starting on node 66724a5e2332cd618965646d0b7ab0d4d89622990053ab677dc8f588 (HTTP port: 8000).\n", + "\u001b[36m(ProxyActor pid=3272)\u001b[0m INFO 2025-08-12 08:00:44,333 proxy 10.244.0.9 -- Got updated endpoints: {}.\n", + "INFO 2025-08-12 15:00:44,830 serve 72 -- Started Serve in namespace \"serve\".\n", + "\u001b[36m(ServeController pid=3174)\u001b[0m INFO 2025-08-12 08:00:46,387 controller 3174 -- Deploying new version of Deployment(name='PyTorchMLflowDeployment', app='default') (initial target replicas: 1).\n", + "\u001b[36m(ProxyActor pid=3272)\u001b[0m INFO 2025-08-12 08:00:46,396 proxy 10.244.0.9 -- Got updated endpoints: {Deployment(name='PyTorchMLflowDeployment', app='default'): EndpointInfo(route='/predict', app_is_cross_language=False)}.\n", + "\u001b[36m(ProxyActor pid=3272)\u001b[0m INFO 2025-08-12 08:00:46,423 proxy 10.244.0.9 -- Started .\n", + "\u001b[36m(ServeController pid=3174)\u001b[0m INFO 2025-08-12 08:00:46,503 controller 3174 -- Adding 1 replica to Deployment(name='PyTorchMLflowDeployment', app='default').\n" ] }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "cb1e38749b8a4ea49b2c2bd4e204e338", - "version_major": 2, - "version_minor": 0 - }, - "text/html": [ - "
\n", - "
\n", - "
\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - "\n", - "
Python version:3.12.9
Ray version:2.48.0
Dashboard:http://10.244.0.9:8265
\n", - "\n", - "
\n", - "
\n" - ], - "text/plain": [ - "ClientContext(dashboard_url='10.244.0.9:8265', python_version='3.12.9', ray_version='2.48.0', ray_commit='2c63f6fdd1995ad8462a8333596a11f2e57f3e05', _num_clients=1, _context_to_restore=)" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import ray\n", - "ray.init()" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "ad745c57-152a-4bb2-9d92-165dcd4a5789", - "metadata": {}, - "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "{'node:10.244.0.9': 1.0, 'CPU': 2.0, 'memory': 9000000000.0, 'object_store_memory': 1823143525.0, 'node:__internal_head__': 1.0, 'node:10.244.0.10': 1.0}\n" + "\u001b[36m(ServeReplica:default:PyTorchMLflowDeployment pid=1940, ip=10.244.0.10)\u001b[0m Loading model from MLflow...\n", + "\u001b[36m(ServeReplica:default:PyTorchMLflowDeployment pid=1940, ip=10.244.0.10)\u001b[0m Model loaded successfully.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO 2025-08-12 15:00:50,456 serve 72 -- Application 'default' is ready at http://127.0.0.1:8000/predict.\n", + "INFO 2025-08-12 15:00:50,519 serve 72 -- Started .\n" + ] + }, + { + "data": { + "text/plain": [ + "DeploymentHandle(deployment='PyTorchMLflowDeployment')" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING 2025-08-12 15:00:50,695 serve 72 -- Failed to get queue length from Replica(id='7iflc5wc', deployment='PyTorchMLflowDeployment', app='default') within 0.1s. If this happens repeatedly it's likely caused by high network latency in the cluster. You can configure the deadline using the `RAY_SERVE_QUEUE_LENGTH_RESPONSE_DEADLINE_S` environment variable.\n", + "2025-08-12 15:00:50,725\tERROR dataclient.py:312 -- Callback error:\n", + "Traceback (most recent call last):\n", + " File \"/opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages/ray/util/client/dataclient.py\", line 301, in _process_response\n", + " can_remove = callback(response)\n", + " ^^^^^^^^^^^^^^^^^^\n", + " File \"/opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages/ray/util/client/dataclient.py\", line 179, in __call__\n", + " self.callback(self.data)\n", + " File \"/opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages/ray/util/client/common.py\", line 179, in deserialize_obj\n", + " py_callback(data)\n", + " File \"/opt/bitnami/jupyterhub-singleuser/.local/lib/python3.12/site-packages/ray/util/client/common.py\", line 147, in set_future\n", + " fut.set_result(data)\n", + " File \"/opt/bitnami/miniforge/lib/python3.12/concurrent/futures/_base.py\", line 544, in set_result\n", + " raise InvalidStateError('{}: {!r}'.format(self._state, self))\n", + "concurrent.futures._base.InvalidStateError: CANCELLED: \n", + "\u001b[36m(ProxyActor pid=1993, ip=10.244.0.10)\u001b[0m INFO 2025-08-12 08:00:51,185 proxy 10.244.0.10 -- Proxy starting on node 1d5e87519ede5427735866024d720fbcd019925255178b592dd245a3 (HTTP port: 8000).\n", + "\u001b[36m(ProxyActor pid=1993, ip=10.244.0.10)\u001b[0m INFO 2025-08-12 08:00:51,223 proxy 10.244.0.10 -- Got updated endpoints: {Deployment(name='PyTorchMLflowDeployment', app='default'): EndpointInfo(route='/predict', app_is_cross_language=False)}.\n", + "\u001b[36m(ProxyActor pid=1993, ip=10.244.0.10)\u001b[0m INFO 2025-08-12 08:00:51,230 proxy 10.244.0.10 -- Started .\n", + "\u001b[36m(ServeReplica:default:PyTorchMLflowDeployment pid=1940, ip=10.244.0.10)\u001b[0m INFO 2025-08-12 08:01:29,315 default_PyTorchMLflowDeployment 7iflc5wc 4d7cb9a3-7b88-40e3-ab03-d933717e195a -- POST /predict 200 40.5ms\n", + "\u001b[36m(ServeController pid=3174)\u001b[0m INFO 2025-08-12 08:01:38,376 controller 3174 -- Removing 1 replica from Deployment(name='PyTorchMLflowDeployment', app='default').\n", + "\u001b[36m(ServeController pid=3174)\u001b[0m INFO 2025-08-12 08:01:40,427 controller 3174 -- Replica(id='7iflc5wc', deployment='PyTorchMLflowDeployment', app='default') is stopped.\n" ] } ], "source": [ - "@ray.remote\n", - "def cluster_resources():\n", - " return ray.cluster_resources()\n", + "import torch\n", + "import mlflow.pytorch\n", + "import numpy as np\n", + "from starlette.requests import Request\n", + "from typing import Dict\n", "\n", - "print(ray.get(cluster_resources.remote()))" + "from ray import serve\n", + "import ray\n", + "\n", + "# ray.init(\"ray://ai-starter-kit-kuberay-head-svc:10001\", namespace=\"my_new_namespace\")\n", + "\n", + "# MLFLOW_MODEL_URI = \"mlruns/0//artifacts/pytorch_model\" # Change to your run path\n", + "\n", + "@serve.deployment\n", + "class PyTorchMLflowDeployment:\n", + " def __init__(self):\n", + " print(\"Loading model from MLflow...\")\n", + " # # self.model = mlflow.pytorch.load_model(MLFLOW_MODEL_URI)\n", + " self.model = model\n", + " self.model.eval()\n", + " print(\"Model loaded successfully.\")\n", + "\n", + " async def __call__(self, request: Request) -> Dict:\n", + " try:\n", + " data = await request.json()\n", + " features = data.get(\"features\", None)\n", + " if features is None:\n", + " return {\"error\": \"Missing 'features' in request\"}\n", + "\n", + " X = np.array(features).reshape(1, -1)\n", + " X_tensor = torch.tensor(X, dtype=torch.float32)\n", + "\n", + " with torch.no_grad():\n", + " prediction = self.model(X_tensor).numpy().tolist()\n", + "\n", + " return {\"prediction\": prediction}\n", + " except Exception as e:\n", + " return {\"error\": str(e)}\n", + "\n", + "app = PyTorchMLflowDeployment.bind()\n", + "serve.run(app, route_prefix=\"/predict\")\n" ] }, { "cell_type": "code", "execution_count": 4, - "id": "41b2fb19-69df-42e4-b5b7-f956ecf37bba", + "id": "1e29a0ae-7514-4881-ae81-99b4e01e4e1f", "metadata": {}, "outputs": [ { - "ename": "ModuleNotFoundError", - "evalue": "No module named 'vllm'", - "output_type": "error", - "traceback": [ - "\u001b[31m---------------------------------------------------------------------------\u001b[39m", - "\u001b[31mModuleNotFoundError\u001b[39m Traceback (most recent call last)", - "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[4]\u001b[39m\u001b[32m, line 4\u001b[39m\n\u001b[32m 1\u001b[39m \u001b[38;5;66;03m# here is the reference https://docs.ray.io/en/latest/serve/llm/serving-llms.html#deployment-through-llmrouter\u001b[39;00m\n\u001b[32m 3\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mray\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m serve\n\u001b[32m----> \u001b[39m\u001b[32m4\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mray\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mserve\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m LLMConfig, build_openai_app\n\u001b[32m 6\u001b[39m llm_config = LLMConfig(\n\u001b[32m 7\u001b[39m model_loading_config=\u001b[38;5;28mdict\u001b[39m(\n\u001b[32m 8\u001b[39m model_id=\u001b[33m\"\u001b[39m\u001b[33mqwen-0.5b\u001b[39m\u001b[33m\"\u001b[39m,\n\u001b[32m (...)\u001b[39m\u001b[32m 20\u001b[39m ),\n\u001b[32m 21\u001b[39m )\n\u001b[32m 23\u001b[39m app = build_openai_app({\u001b[33m\"\u001b[39m\u001b[33mllm_configs\u001b[39m\u001b[33m\"\u001b[39m: [llm_config]})\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/.local/lib/python3.12/site-packages/ray/serve/llm/__init__.py:10\u001b[39m\n\u001b[32m 1\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mtyping\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m TYPE_CHECKING, Optional\n\u001b[32m 3\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mray\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01m_internal\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mserve\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mconfigs\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mserver_models\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[32m 4\u001b[39m CloudMirrorConfig \u001b[38;5;28;01mas\u001b[39;00m _CloudMirrorConfig,\n\u001b[32m 5\u001b[39m LLMConfig \u001b[38;5;28;01mas\u001b[39;00m _LLMConfig,\n\u001b[32m (...)\u001b[39m\u001b[32m 8\u001b[39m ModelLoadingConfig \u001b[38;5;28;01mas\u001b[39;00m _ModelLoadingConfig,\n\u001b[32m 9\u001b[39m )\n\u001b[32m---> \u001b[39m\u001b[32m10\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mray\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01m_internal\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mserve\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mdeployments\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm_server\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[32m 11\u001b[39m LLMServer \u001b[38;5;28;01mas\u001b[39;00m _LLMServer,\n\u001b[32m 12\u001b[39m )\n\u001b[32m 13\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mray\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01m_internal\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mserve\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mdeployments\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mrouters\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mrouter\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[32m 14\u001b[39m LLMRouter \u001b[38;5;28;01mas\u001b[39;00m _LLMRouter,\n\u001b[32m 15\u001b[39m )\n\u001b[32m 16\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mray\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mutil\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mannotations\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m PublicAPI\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/.local/lib/python3.12/site-packages/ray/llm/_internal/serve/deployments/llm/llm_server.py:52\u001b[39m\n\u001b[32m 48\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mray\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01m_internal\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mserve\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mdeployments\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm_engine\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m LLMEngine\n\u001b[32m 49\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mray\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01m_internal\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mserve\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mdeployments\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mmultiplex\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mlora_model_loader\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[32m 50\u001b[39m LoraModelLoader,\n\u001b[32m 51\u001b[39m )\n\u001b[32m---> \u001b[39m\u001b[32m52\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mray\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01m_internal\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mserve\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mdeployments\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mvllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mvllm_engine\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m VLLMEngine\n\u001b[32m 53\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mray\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01m_internal\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mserve\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mdeployments\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mvllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mvllm_models\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[32m 54\u001b[39m VLLMEmbeddingRequest,\n\u001b[32m 55\u001b[39m )\n\u001b[32m 56\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mray\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01m_internal\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mserve\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mdeployments\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mutils\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mbatcher\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m OpenAIResponseBatcher\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/.local/lib/python3.12/site-packages/ray/llm/_internal/serve/deployments/llm/vllm/vllm_engine.py:38\u001b[39m\n\u001b[32m 32\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mray\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01m_internal\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mserve\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mdeployments\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm_engine\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m LLMEngine\n\u001b[32m 33\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mray\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01m_internal\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mserve\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mdeployments\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mvllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mvllm_engine_stats\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[32m 34\u001b[39m ArgUsage,\n\u001b[32m 35\u001b[39m VLLMEngineStatTracker,\n\u001b[32m 36\u001b[39m usage_counters,\n\u001b[32m 37\u001b[39m )\n\u001b[32m---> \u001b[39m\u001b[32m38\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mray\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01m_internal\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mserve\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mdeployments\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mvllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mvllm_models\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[32m 39\u001b[39m KV_TRANSFER_PARAMS_KEY,\n\u001b[32m 40\u001b[39m VLLMEmbeddingRequest,\n\u001b[32m 41\u001b[39m VLLMEngineConfig,\n\u001b[32m 42\u001b[39m VLLMGenerationRequest,\n\u001b[32m 43\u001b[39m VLLMSamplingParams,\n\u001b[32m 44\u001b[39m )\n\u001b[32m 45\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mray\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01m_internal\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mserve\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mdeployments\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mutils\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mnode_initialization_utils\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[32m 46\u001b[39m InitializeNodeOutput,\n\u001b[32m 47\u001b[39m initialize_node,\n\u001b[32m 48\u001b[39m )\n\u001b[32m 49\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mray\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01m_internal\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mserve\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mdeployments\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mutils\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mserver_utils\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m floats_to_base64\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/.local/lib/python3.12/site-packages/ray/llm/_internal/serve/deployments/llm/vllm/vllm_models.py:6\u001b[39m\n\u001b[32m 3\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mtyping\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m TYPE_CHECKING, Any, Dict, List, Literal, Optional, Union\n\u001b[32m 5\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mpydantic\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m ConfigDict, Field, ValidationError, field_validator\n\u001b[32m----> \u001b[39m\u001b[32m6\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mvllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mengine\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01marg_utils\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m AsyncEngineArgs\n\u001b[32m 8\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mray\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01m_internal\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mcommon\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mbase_pydantic\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m BaseModelExtended\n\u001b[32m 9\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mray\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01m_internal\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mcommon\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mutils\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mcloud_utils\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m CloudMirrorConfig\n", - "\u001b[31mModuleNotFoundError\u001b[39m: No module named 'vllm'" + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO 2025-08-12 15:01:38,215 serve 72 -- Deleting app ['PyTorchMLflowDeployment']\n" ] } ], "source": [ - "# here is the reference https://docs.ray.io/en/latest/serve/llm/serving-llms.html#deployment-through-llmrouter\n", - "\n", - "from ray import serve\n", - "from ray.serve.llm import LLMConfig, build_openai_app\n", - "\n", - "llm_config = LLMConfig(\n", - " model_loading_config=dict(\n", - " model_id=\"qwen-0.5b\",\n", - " model_source=\"Qwen/Qwen2.5-0.5B-Instruct\",\n", - " ),\n", - " deployment_config=dict(\n", - " autoscaling_config=dict(\n", - " min_replicas=1, max_replicas=2,\n", - " )\n", - " ),\n", - " # The accelerator_type and tensor_parallel_size are removed\n", - " # or commented out as they are not applicable to the M1 chip\n", - " engine_kwargs=dict(\n", - " # tensor_parallel_size=2, # Removed for single-device inference\n", - " ),\n", - ")\n", - "\n", - "app = build_openai_app({\"llm_configs\": [llm_config]})\n", - "serve.run(app, blocking=True)" + "serve.delete(\"PyTorchMLflowDeployment\")\n", + "serve.shutdown()" ] } ], From b1fec5a88c4c67e3b819d0b1ebf75573c4acb11c Mon Sep 17 00:00:00 2001 From: Dima Drogovoz Date: Thu, 14 Aug 2025 13:29:55 +0100 Subject: [PATCH 4/6] add libraries to ray-cluster --- ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml index b0c56eaf..ef4a21e4 100644 --- a/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml @@ -99,6 +99,9 @@ ray-cluster: resources: limits: memory: "8G" + command: ["pip", "install", "ray[serve,default,client]", "transformers", "torch", "torchvision", "accelerate", "scikit-learn"] + worker: + command: ["pip", "install", "ray[serve,default,client]", "transformers", "torch", "torchvision", "accelerate", "scikit-learn"] huggingface: # Provide your Hugging Face token here to download gated or private models. From 399c4609f2cd8465bdf8181478395ebe2f1e05fc Mon Sep 17 00:00:00 2001 From: Dima Drogovoz Date: Thu, 14 Aug 2025 16:11:31 +0100 Subject: [PATCH 5/6] add resource requests/limits --- .../helm-chart/ai-starter-kit/values.yaml | 31 +++++++++++++++++-- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml index ef4a21e4..e0946668 100644 --- a/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml @@ -85,6 +85,11 @@ jupyterhub: memory: 4Gi hub: password: "sneakypass" + resources: + limits: + memory: 512Mi + requests: + memory: 512Mi # extraEnvVars: # - name: "RAY_ADDRESS" # value: "{{ .Release.Name }}-kuberay-head-svc" @@ -96,12 +101,32 @@ ray-cluster: tag: 2.48.0.2c63f6-py312-cpu-aarch64 head: serviceType: ClusterIP - resources: - limits: - memory: "8G" command: ["pip", "install", "ray[serve,default,client]", "transformers", "torch", "torchvision", "accelerate", "scikit-learn"] + resources: + requests: + cpu: "1" + memory: "2G" + limits: + cpu: "4" + memory: "8G" worker: command: ["pip", "install", "ray[serve,default,client]", "transformers", "torch", "torchvision", "accelerate", "scikit-learn"] + resources: + requests: + cpu: "1" + memory: "2G" + limits: + cpu: "4" + memory: "8G" + +mlflow: + resources: + requests: + cpu: 100m + memory: 512Mi + limits: + cpu: 1000m + memory: 1Gi huggingface: # Provide your Hugging Face token here to download gated or private models. From 9026beab7304483da76c7507015f62dc10c4f82e Mon Sep 17 00:00:00 2001 From: Dima Drogovoz Date: Thu, 21 Aug 2025 12:07:37 +0100 Subject: [PATCH 6/6] add ray.ipynb to jupyter user notebook --- .../ai-starter-kit/templates/configmaps.yaml | 14 +++++++++++++- .../helm-chart/ai-starter-kit/values.yaml | 6 ++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/configmaps.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/configmaps.yaml index 0eb760b6..33d0dfdd 100644 --- a/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/configmaps.yaml +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/configmaps.yaml @@ -38,4 +38,16 @@ metadata: helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" data: welcome.ipynb: |- -{{ .Files.Get "files/welcome.ipynb" | nindent 4 }} \ No newline at end of file +{{ .Files.Get "files/welcome.ipynb" | nindent 4 }} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ .Release.Name }}-ray-notebook + labels: + app.kubernetes.io/managed-by: {{ .Release.Service | quote }} + app.kubernetes.io/instance: {{ .Release.Name | quote }} + helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" +data: + ray.ipynb: |- +{{ .Files.Get "files/ray.ipynb" | nindent 4 }} diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml index e0946668..f01100c3 100644 --- a/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml @@ -55,6 +55,9 @@ jupyterhub: - name: welcome-notebook configMap: name: "{{ .Release.Name }}-welcome-notebook" + - name: ray-notebook + configMap: + name: "{{ .Release.Name }}-ray-notebook" - name: hf-token-secret secret: secretName: "{{ .Release.Name }}-hf-token-secret" @@ -71,6 +74,9 @@ jupyterhub: - name: welcome-notebook mountPath: /tmp/welcome.ipynb subPath: welcome.ipynb + - name: ray-notebook + mountPath: /tmp/ray.ipynb + subPath: ray.ipynb - name: hf-token-secret mountPath: "/etc/secrets/huggingface" readOnly: true