From 4e1b0b878608d2046d1898b69c46e1b9fa608aa2 Mon Sep 17 00:00:00 2001 From: Aleksandar Stefanovic Date: Mon, 1 Sep 2025 19:06:20 +0200 Subject: [PATCH 1/2] Adding in multi-agent notebook --- .../helm-chart/ai-starter-kit/values-gke.yaml | 16 +- ai/ai-starter-kit/notebooks/multi-agent.ipynb | 621 ++++++++++++++++++ 2 files changed, 629 insertions(+), 8 deletions(-) create mode 100644 ai/ai-starter-kit/notebooks/multi-agent.ipynb diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/values-gke.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/values-gke.yaml index a641cc45..ebe9da79 100644 --- a/ai/ai-starter-kit/helm-chart/ai-starter-kit/values-gke.yaml +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/values-gke.yaml @@ -142,12 +142,12 @@ ray-cluster: serviceType: ClusterIP resources: requests: - cpu: "1" - memory: "2G" + cpu: "4" + memory: "4G" ephemeral-storage: 10Gi limits: - cpu: "4" - memory: "8G" + cpu: "8" + memory: "6G" ephemeral-storage: 10Gi volumes: - name: ray-pvc-storage @@ -161,12 +161,12 @@ ray-cluster: worker: resources: requests: - cpu: "1" - memory: "2G" + cpu: "4" + memory: "4G" ephemeral-storage: 10Gi limits: - cpu: "4" - memory: "8G" + cpu: "8" + memory: "6G" ephemeral-storage: 10Gi volumes: - name: ray-pvc-storage diff --git a/ai/ai-starter-kit/notebooks/multi-agent.ipynb b/ai/ai-starter-kit/notebooks/multi-agent.ipynb new file mode 100644 index 00000000..ea2f3caa --- /dev/null +++ b/ai/ai-starter-kit/notebooks/multi-agent.ipynb @@ -0,0 +1,621 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "079fadd2-200e-4d37-8ae2-be2792e3a24e", + "metadata": {}, + "source": [ + "### Cell 1 - Initialize Ray endpoints and verify dashboard\n", + "\n", + "Installs requests, derives the Ray head host from RAY_ADDRESS, builds Dashboard/Serve/MLflow URLs, reads an Hugging Face token, and prints the endpoints plus the Jobs API version for a quick health check." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "79db57cd-fb72-4b10-b0fb-5e9cd5c007b6", + "metadata": {}, + "outputs": [], + "source": [ + "!pip -q install requests==2.* --disable-pip-version-check\n", + "\n", + "import os, textwrap, base64, time, json, requests\n", + "from string import Template\n", + "\n", + "raw_addr = os.getenv(\"RAY_ADDRESS\", \"ray://ai-starter-kit-kuberay-head-svc:10001\")\n", + "if raw_addr.startswith(\"ray://\"):\n", + " HEAD_HOST = raw_addr.split(\"://\", 1)[1].split(\":\", 1)[0]\n", + "else:\n", + " HEAD_HOST = raw_addr.split(\":\", 1)[0] or \"ai-starter-kit-kuberay-head-svc\"\n", + "\n", + "DASH_URL = f\"http://{HEAD_HOST}:8265\"\n", + "SERVE_PORT = int(os.getenv(\"SERVE_PORT\", \"8000\"))\n", + "SERVE_ROUTE = \"/v1\"\n", + "\n", + "HF_TOKEN_PATH = \"/etc/secrets/huggingface/token\"\n", + "HF_TOKEN = \"\"\n", + "if os.path.exists(HF_TOKEN_PATH):\n", + " try:\n", + " HF_TOKEN = open(HF_TOKEN_PATH).read().strip()\n", + " except Exception:\n", + " HF_TOKEN = \"\"\n", + "\n", + "print(\"Head host:\", HEAD_HOST)\n", + "print(\"Jobs API :\", f\"{DASH_URL}/api/jobs/\")\n", + "print(\"Serve URL:\", f\"http://{HEAD_HOST}:{SERVE_PORT}{SERVE_ROUTE}\")\n", + "print(\"MLflow :\", os.getenv(\"MLFLOW_TRACKING_URI\", \"http://ai-starter-kit-mlflow:5000\"))\n", + "\n", + "print(\"Jobs API version:\", requests.get(f\"{DASH_URL}/api/version\", timeout=10).json())\n" + ] + }, + { + "cell_type": "markdown", + "id": "fe862173-fd9a-41ae-a27b-63875f788024", + "metadata": {}, + "source": [ + "### Cell 2 - Deploy a minimal Ray Serve smoke test and verify readiness\n", + "\n", + "Submits a tiny FastAPI app to Ray Serve (one /healthz endpoint under /smoke) as a Ray Job, installing FastAPI on the fly. It polls the Jobs API for status and hits :8000/smoke/healthz up to 60 seconds, printing when the service responds 200 (i.e., smoke test passes)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "34da3e26-6276-48b7-b3ac-c90359df6547", + "metadata": {}, + "outputs": [], + "source": [ + "import os, base64, textwrap, time, requests\n", + "\n", + "DASH_URL = \"http://ai-starter-kit-kuberay-head-svc:8265\"\n", + "\n", + "print(\"Jobs API:\", requests.get(f\"{DASH_URL}/api/version\", timeout=10).json())\n", + "\n", + "serve_py = textwrap.dedent(\"\"\"\n", + " from fastapi import FastAPI\n", + " from ray import serve\n", + " serve.start(detached=True, http_options={\"host\":\"0.0.0.0\",\"port\":8000})\n", + " app = FastAPI()\n", + "\n", + " @serve.deployment(name=\"smoke\", num_replicas=1)\n", + " @serve.ingress(app)\n", + " class Smoke:\n", + " @app.get(\"/healthz\")\n", + " async def health(self): return {\"ok\": True}\n", + "\n", + " serve.run(Smoke.bind(), route_prefix=\"/smoke\")\n", + " print(\"READY: smoke\", flush=True)\n", + "\"\"\").strip()\n", + "\n", + "b64 = base64.b64encode(serve_py.encode()).decode()\n", + "entry = f'python -c \"import base64; exec(base64.b64decode(\\'{b64}\\'))\"'\n", + "submit = requests.post(f\"{DASH_URL}/api/jobs/\", json={\"entrypoint\": entry, \"runtime_env\": {\"pip\": [\"fastapi>=0.110\"]}}, timeout=60).json()\n", + "job_id = submit[\"job_id\"]\n", + "print(\"Job:\", job_id)\n", + "\n", + "svc = \"http://ai-starter-kit-kuberay-head-svc:8000/smoke/healthz\"\n", + "for i in range(60):\n", + " s = requests.get(f\"{DASH_URL}/api/jobs/{job_id}\", timeout=10).json()[\"status\"]\n", + " try:\n", + " r = requests.get(svc, timeout=2)\n", + " print(f\"tick {i:02d}: job={s}, health={r.status_code}\")\n", + " if r.status_code == 200:\n", + " print(\"Smoke OK\")\n", + " break\n", + " except Exception as e:\n", + " print(f\"tick {i:02d}: job={s}, health=ERR {e}\")\n", + " time.sleep(1)" + ] + }, + { + "cell_type": "markdown", + "id": "8111d705-595e-4e65-8479-bdc76191fa31", + "metadata": {}, + "source": [ + "### Cell 3 - Deploy model on Ray Serve with llama-cpp\n", + "\n", + "Packages and submits a Ray Job that spins up a Ray Serve app exposing /v1/healthz and /v1/chat/completions. It downloads the preferred GGUF from Hugging Face, initializes llama-cpp-python, logs to MLflow, and prints the deployed health/chat URLs." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bbea1539-e9ab-460a-9cfc-20a42807f616", + "metadata": {}, + "outputs": [], + "source": [ + "import os, base64, textwrap, requests\n", + "\n", + "HEAD = os.environ.get(\"RAY_HEAD_SVC\", \"ai-starter-kit-kuberay-head-svc\")\n", + "DASH_URL = f\"http://{HEAD}:8265\"\n", + "SERVE_PORT = 8000\n", + "SERVE_ROUTE = \"/v1\"\n", + "\n", + "runtime_env = {\n", + " \"pip\": [\n", + " \"fastapi==0.110.0\",\n", + " \"uvicorn==0.23.2\",\n", + " \"huggingface_hub==0.25.2\",\n", + " \"llama-cpp-python==0.3.16\", \n", + " \"hf_transfer==0.1.6\",\n", + " \"mlflow==2.14.3\", \n", + " ],\n", + " \"env_vars\": {\n", + " \"HF_HUB_ENABLE_HF_TRANSFER\": \"1\",\n", + " \"HUGGINGFACE_HUB_TOKEN\": os.environ.get(\"HUGGINGFACE_HUB_TOKEN\", \"\"),\n", + " \"SERVE_PORT\": str(SERVE_PORT),\n", + "\n", + " \"MODEL_REPO\": \"Qwen/Qwen2.5-1.5B-Instruct-GGUF\",\n", + " \"GGUF_PREF_ORDER\": \"q4_k_m,q4_0,q3_k_m,q2_k\",\n", + "\n", + " \"LLM_CONTEXT\": os.environ.get(\"LLM_CONTEXT\", \"1024\"),\n", + " \"LLM_MAX_TOKENS\": os.environ.get(\"LLM_MAX_TOKENS\", \"256\"),\n", + " \"SERVER_MAX_NEW_TOKENS\": os.environ.get(\"SERVER_MAX_NEW_TOKENS\", \"512\"),\n", + "\n", + " \"LLM_THREADS\": os.environ.get(\"LLM_THREADS\", \"6\"),\n", + " \"OMP_NUM_THREADS\": os.environ.get(\"OMP_NUM_THREADS\", \"6\"),\n", + " \"GPU_LAYERS\": \"0\", \n", + " \n", + " \"PIP_PREFER_BINARY\": \"1\",\n", + " \"CMAKE_ARGS\": \"-DGGML_OPENMP=OFF -DLLAMA_NATIVE=OFF\",\n", + "\n", + " \"HF_HOME\": \"/tmp/hf-cache\",\n", + " \"TRANSFORMERS_CACHE\": \"/tmp/hf-cache\",\n", + "\n", + " \"MLFLOW_TRACKING_URI\": os.environ.get(\"MLFLOW_TRACKING_URI\", \"\"),\n", + " \"MLFLOW_EXPERIMENT_NAME\": os.environ.get(\"MLFLOW_EXPERIMENT_NAME\", \"ray-llama-cpp\"),\n", + " },\n", + "}\n", + "\n", + "serve_py = textwrap.dedent(f\"\"\"\n", + "import os, time, multiprocessing, uuid\n", + "from typing import List, Dict, Any\n", + "from fastapi import FastAPI, Request\n", + "from fastapi.responses import JSONResponse\n", + "from huggingface_hub import HfApi, hf_hub_download\n", + "from ray import serve\n", + "from llama_cpp import Llama\n", + "\n", + "USE_MLFLOW = False\n", + "try:\n", + " import mlflow\n", + " if os.getenv(\"MLFLOW_TRACKING_URI\"):\n", + " mlflow.set_tracking_uri(os.getenv(\"MLFLOW_TRACKING_URI\"))\n", + " mlflow.set_experiment(os.getenv(\"MLFLOW_EXPERIMENT_NAME\",\"ray-llama-cpp\"))\n", + " USE_MLFLOW = True\n", + "except Exception as _e:\n", + " USE_MLFLOW = False\n", + "\n", + "SERVE_PORT = int(os.getenv(\"SERVE_PORT\", \"{SERVE_PORT}\"))\n", + "SERVE_ROUTE = \"{SERVE_ROUTE}\"\n", + "MODEL_REPO = os.getenv(\"MODEL_REPO\", \"Qwen/Qwen2.5-1.5B-Instruct-GGUF\")\n", + "GGUF_PREFS = [s.strip() for s in os.getenv(\"GGUF_PREF_ORDER\",\"q4_k_m,q4_0,q3_k_m,q2_k\").split(\",\") if s.strip()]\n", + "CTX_LEN = int(os.getenv(\"LLM_CONTEXT\", \"2048\"))\n", + "MAX_TOKENS = int(os.getenv(\"LLM_MAX_TOKENS\", \"256\"))\n", + "HF_TOKEN = os.getenv(\"HUGGINGFACE_HUB_TOKEN\") or None\n", + "\n", + "serve.start(detached=True, http_options={{\"host\":\"0.0.0.0\", \"port\":SERVE_PORT}})\n", + "app = FastAPI()\n", + "\n", + "def pick_one_file(repo_id: str, prefs):\n", + " api = HfApi()\n", + " files = api.list_repo_files(repo_id=repo_id, repo_type=\"model\", token=HF_TOKEN)\n", + " ggufs = [f for f in files if f.lower().endswith(\".gguf\")]\n", + " if not ggufs:\n", + " raise RuntimeError(f\"No .gguf files visible in {{repo_id}}\")\n", + " for pref in prefs:\n", + " for f in ggufs:\n", + " if pref.lower() in f.lower():\n", + " return f\n", + " return ggufs[0]\n", + "\n", + "def pick_chat_format(repo: str, fname: str) -> str:\n", + " return \"qwen\"\n", + "\n", + "@serve.deployment(name=\"qwen\", num_replicas=1, ray_actor_options={{\"num_cpus\": 6}})\n", + "@serve.ingress(app)\n", + "class OpenAICompatLlama:\n", + " def __init__(self, repo_id: str = MODEL_REPO):\n", + " target = pick_one_file(repo_id, GGUF_PREFS)\n", + " print(f\"[env] model repo: {{repo_id}} file: {{target}}\", flush=True)\n", + " local_dir = \"/tmp/hf-gguf\"; os.makedirs(local_dir, exist_ok=True)\n", + "\n", + " gguf_path = hf_hub_download(\n", + " repo_id=repo_id, filename=target, token=HF_TOKEN,\n", + " local_dir=local_dir, local_dir_use_symlinks=False,\n", + " force_download=False, resume_download=True\n", + " )\n", + " print(f\"[download] done: {{gguf_path}}\", flush=True)\n", + "\n", + " n_threads = int(os.getenv(\"LLM_THREADS\", max(2, (multiprocessing.cpu_count() or 4)//2)))\n", + " print(f\"[load] llama-cpp-python | ctx={{CTX_LEN}} threads={{n_threads}} gpu_layers={{int(os.getenv('GPU_LAYERS','0'))}}\", flush=True)\n", + "\n", + " self.model_file = os.path.basename(gguf_path)\n", + " self.model_repo = repo_id\n", + " chat_format = pick_chat_format(self.model_repo, self.model_file)\n", + " print(f\"[load] chat_format={{chat_format}}\", flush=True)\n", + "\n", + " self.llm = Llama(\n", + " model_path=gguf_path,\n", + " n_ctx=CTX_LEN,\n", + " n_threads=n_threads,\n", + " n_batch=256, \n", + " n_gpu_layers=int(os.getenv(\"GPU_LAYERS\",\"0\")),\n", + " chat_format=chat_format,\n", + " verbose=False\n", + " )\n", + " print(\"[ready] model loaded\", flush=True)\n", + "\n", + " @app.get(\"/healthz\")\n", + " async def health(self):\n", + " return {{\"status\":\"ok\"}}\n", + "\n", + " @app.post(\"/chat/completions\")\n", + " async def chat_completions(self, request: Request):\n", + " t0 = time.time()\n", + " body = await request.json()\n", + "\n", + " messages = body.get(\"messages\", [])\n", + " temperature = float(body.get(\"temperature\", 0.2))\n", + " req_max = body.get(\"max_tokens\", None)\n", + " stop_words = (body.get(\"stop\", []) or []) + [\"<|im_end|>\", \"\"]\n", + "\n", + " SERVER_MAX = int(os.getenv(\"SERVER_MAX_NEW_TOKENS\", \"512\"))\n", + " max_tokens = int(req_max if isinstance(req_max, int) else MAX_TOKENS)\n", + " max_tokens = max(32, min(max_tokens, CTX_LEN - 128, SERVER_MAX))\n", + "\n", + " rid = \"chatcmpl-\" + uuid.uuid4().hex[:24]\n", + " created = int(time.time())\n", + " model_name = f\"{{self.model_repo}}/{{self.model_file}}\"\n", + "\n", + " try:\n", + " result = self.llm.create_chat_completion(\n", + " messages=messages,\n", + " temperature=temperature,\n", + " max_tokens=max_tokens,\n", + " top_k=50,\n", + " top_p=0.9,\n", + " repeat_penalty=1.1,\n", + " stop=stop_words,\n", + " )\n", + " out_text = (result[\"choices\"][0][\"message\"][\"content\"] or \"\").strip()\n", + " usage_raw = result.get(\"usage\") or {{}}\n", + " p_tokens = int(usage_raw.get(\"prompt_tokens\") or 0)\n", + " c_tokens = int(usage_raw.get(\"completion_tokens\") or 0)\n", + " err = None\n", + " except Exception as e:\n", + " out_text = \"\"\n", + " p_tokens = c_tokens = 0\n", + " err = str(e)\n", + "\n", + " if USE_MLFLOW:\n", + " try:\n", + " dur_ms = int((time.time()-t0) * 1000)\n", + " with mlflow.start_run(run_name=\"chat\"):\n", + " mlflow.set_tags({{\n", + " \"model_repo\": self.model_repo,\n", + " \"model_file\": self.model_file,\n", + " \"framework\": \"llama-cpp-python\",\n", + " }})\n", + " mlflow.log_params({{\n", + " \"temperature\": temperature,\n", + " \"max_tokens\": max_tokens,\n", + " \"ctx\": CTX_LEN,\n", + " }})\n", + " if not (p_tokens and c_tokens):\n", + " p_tokens = p_tokens or max(1, len(\" \".join(m.get(\"content\",\"\") for m in messages).split()))\n", + " c_tokens = c_tokens or max(0, len(out_text.split()))\n", + " mlflow.log_metrics({{\n", + " \"duration_ms\": dur_ms,\n", + " \"prompt_tokens_approx\": p_tokens,\n", + " \"completion_tokens_approx\": c_tokens,\n", + " \"total_tokens_approx\": p_tokens + c_tokens,\n", + " }})\n", + " except Exception:\n", + " pass\n", + "\n", + " if err:\n", + " return JSONResponse(status_code=500, content={{\"error\": err, \"type\":\"generation_error\"}})\n", + "\n", + " usage = {{\n", + " \"prompt_tokens\": p_tokens,\n", + " \"completion_tokens\": c_tokens,\n", + " \"total_tokens\": p_tokens + c_tokens,\n", + " }}\n", + " return {{\n", + " \"id\": rid,\n", + " \"object\": \"chat.completion\",\n", + " \"created\": created,\n", + " \"model\": model_name,\n", + " \"choices\": [\n", + " {{\n", + " \"index\": 0,\n", + " \"message\": {{\"role\":\"assistant\",\"content\": out_text}},\n", + " \"finish_reason\": \"stop\"\n", + " }}\n", + " ],\n", + " \"usage\": usage\n", + " }}\n", + "\n", + "serve.run(OpenAICompatLlama.bind(), route_prefix=SERVE_ROUTE)\n", + "print(\"READY\", flush=True)\n", + "\"\"\").strip()\n", + "\n", + "payload = base64.b64encode(serve_py.encode()).decode()\n", + "entrypoint = 'python -c \"import base64,sys;exec(base64.b64decode(\\'{}\\').decode())\"'.format(payload)\n", + "\n", + "job = requests.post(\n", + " f\"{DASH_URL}/api/jobs/\",\n", + " json={\n", + " \"entrypoint\": entrypoint,\n", + " \"runtime_env\": runtime_env,\n", + " \"metadata\": {\"job_name\": \"serve-qwen2_5-llama_cpp-openai\"},\n", + " },\n", + " timeout=45\n", + ").json()\n", + "\n", + "print(\"Job:\", job.get(\"job_id\"))\n", + "print(\"Health:\", f\"http://{HEAD}:{SERVE_PORT}{SERVE_ROUTE}/healthz\")\n", + "print(\"Chat: \", f\"http://{HEAD}:{SERVE_PORT}{SERVE_ROUTE}/chat/completions\")" + ] + }, + { + "cell_type": "markdown", + "id": "a411c015-c802-4ca1-81bb-3f4790d9626a", + "metadata": {}, + "source": [ + "### Cell 4 - Basic client + latency test\n", + "\n", + "Calls /v1/healthz and then sends an OpenAI-style chat request to /v1/chat/completions with a short prompt. Prints latency and token usage, returning the assistant text." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3be634e2-a82f-42c9-8e31-57e6868a86ee", + "metadata": {}, + "outputs": [], + "source": [ + "import os, time, requests, json\n", + "\n", + "HEAD = os.environ.get(\"RAY_HEAD_SVC\", \"ai-starter-kit-kuberay-head-svc\")\n", + "SERVE_PORT = 8000\n", + "BASE_URL = f\"http://{HEAD}:{SERVE_PORT}/v1\"\n", + "\n", + "def health():\n", + " r = requests.get(f\"{BASE_URL}/healthz\", timeout=10)\n", + " print(\"Health:\", r.status_code, r.json())\n", + "\n", + "def chat(prompt, temperature=0.4, max_tokens=220, stop=None):\n", + " body = {\n", + " \"model\": \"qwen2.5-1.5b-instruct-gguf\",\n", + " \"temperature\": float(temperature),\n", + " \"max_tokens\": int(max_tokens),\n", + " \"messages\": [\n", + " {\"role\": \"system\", \"content\": \"You are Qwen2.5 Instruct running on a tiny CPU host. Be concise, complete sentences.\"},\n", + " {\"role\": \"user\", \"content\": prompt},\n", + " ],\n", + " }\n", + " if stop:\n", + " body[\"stop\"] = stop\n", + "\n", + " t0 = time.time()\n", + " r = requests.post(f\"{BASE_URL}/chat/completions\", json=body, timeout=300)\n", + " dt = time.time() - t0\n", + " r.raise_for_status()\n", + " out = r.json()[\"choices\"][0][\"message\"][\"content\"]\n", + " usage = r.json().get(\"usage\", {})\n", + " print(f\"\\nLatency: {dt:.2f}s | usage: {usage}\")\n", + " print(\"\\n---\\n\", out)\n", + " return out\n", + "\n", + "health()\n", + "_ = chat(\"Say 'test ok' then give me one short fun fact about llamas.\", stop=[\"<|im_end|>\"])" + ] + }, + { + "cell_type": "markdown", + "id": "553d2756-8949-43e3-8342-71387688e0fa", + "metadata": {}, + "source": [ + "### Cell 5 - Multi-agent (Autogen) pipeline\n", + "\n", + "Installs Autogen, configures OpenAIWrapper to hit Ray Serve /v1 endpoint, warms up the model, then runs a simple three-agent workflow (Researcher -> Writer -> Critic) to produce and refine a short report." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0f6713f3-8b60-40b2-ad3c-ebf6db4f66e1", + "metadata": {}, + "outputs": [], + "source": [ + "!pip -q install pyautogen~=0.2.35 \"flaml[automl]\" --disable-pip-version-check\n", + "\n", + "import os, sys\n", + "\n", + "for p in [\n", + " \"/tmp/models-cache/lib/python3.11/site-packages\", \n", + " os.path.expanduser(\"~/.local/lib/python3.11/site-packages\"), \n", + "]:\n", + " if os.path.isdir(p) and p not in sys.path:\n", + " sys.path.insert(0, p)\n", + "\n", + "import os, autogen\n", + "from autogen import AssistantAgent, UserProxyAgent\n", + "\n", + "HEAD = os.environ.get(\"RAY_HEAD_SVC\", \"ai-starter-kit-kuberay-head-svc\")\n", + "SERVE_PORT = 8000\n", + "BASE_URL = f\"http://{HEAD}:{SERVE_PORT}/v1\" \n", + "\n", + "config_list = [\n", + " {\n", + " \"model\": \"qwen2.5-1.5b-instruct-gguf\", \n", + " \"base_url\": BASE_URL, \n", + " \"api_key\": \"local\", \n", + " \"price\": [0.0, 0.0],\n", + " }\n", + "]\n", + "\n", + "llm = autogen.OpenAIWrapper(config_list=config_list)\n", + "try:\n", + " r = llm.create(messages=[{\"role\":\"user\",\"content\":\"Say 'test ok'.\"}], temperature=0.2, max_tokens=16)\n", + " print(\"Warmup:\", r.choices[0].message.content)\n", + "except Exception as e:\n", + " print(\"Warmup failed:\", e)\n", + "\n", + "user_proxy = UserProxyAgent(\n", + " name=\"UserProxy\",\n", + " system_message=\"You are the human admin. Initiate the task.\",\n", + " code_execution_config=False,\n", + " human_input_mode=\"NEVER\",\n", + ")\n", + "\n", + "researcher = AssistantAgent(\n", + " name=\"Researcher\",\n", + " system_message=(\n", + " \"You are a researcher. Gather concise, verified facts on the topic. \"\n", + " \"Return several bullet points with inline source domains (e.g., nature.com, ibm.com). \"\n", + " \"Keep under 100 words total. No made-up sources. \"\n", + " \"Do not include any special end token.\"\n", + " ),\n", + " llm_config={\"config_list\": config_list, \"temperature\": 0.35, \"max_tokens\": 140, \"timeout\": 300},\n", + ")\n", + "\n", + "writer = AssistantAgent(\n", + " name=\"Writer\",\n", + " system_message=(\n", + " \"You are a writer. Using the Researcher’s notes, produce a clear word report under 160 words. \"\n", + " \"Avoid speculation. Keep it structured and readable. \"\n", + " \"Do not include any special end token.\"\n", + " ),\n", + " llm_config={\"config_list\": config_list, \"temperature\": 0.55, \"max_tokens\": 220, \"timeout\": 180},\n", + ")\n", + "\n", + "critic = AssistantAgent(\n", + " name=\"Critic\",\n", + " system_message=(\n", + " \"You are a critic. Review the Writer’s report for accuracy, clarity, and flow.\"\n", + " \"Present the tightened final text and keep it under 140 words. On a new last line output exactly: <|END|>\"\n", + " ),\n", + " llm_config={\"config_list\": config_list, \"temperature\": 0.45, \"max_tokens\": 160, \"timeout\": 300},\n", + ")\n", + "\n", + "def run_sequential(task):\n", + " research_response = researcher.generate_reply(messages=[{\"content\": task, \"role\": \"user\"}])\n", + " research_notes = research_response if isinstance(research_response, str) else research_response.get(\"content\", \"[no output]\")\n", + " print(\"\\nResearch Notes:\\n\", research_notes)\n", + "\n", + " writer_prompt = f\"Using these research notes, write the report:\\n{research_notes}\"\n", + " writer_response = writer.generate_reply(messages=[{\"content\": writer_prompt, \"role\": \"user\"}])\n", + " report = writer_response if isinstance(writer_response, str) else writer_response.get(\"content\", \"[no output]\")\n", + " print(\"\\nDraft Report:\\n\", report)\n", + "\n", + " critic_prompt = f\"Review this report:\\n{report}\"\n", + " critic_response = critic.generate_reply(messages=[{\"content\": critic_prompt, \"role\": \"user\"}])\n", + " final_text = critic_response if isinstance(critic_response, str) else critic_response.get(\"content\", \"[no output]\")\n", + " print(\"\\nFinal Review:\\n\", final_text)\n", + " return final_text\n", + "\n", + "task = \"Research the latest advancements in quantum computing as of 2025. Gather key facts, then write a short report (200–300 words). Have the Critic review and finalize.\"\n", + "final_output = run_sequential(task)" + ] + }, + { + "cell_type": "markdown", + "id": "0af596cf-5ba6-42df-a030-61d7a20d6f7b", + "metadata": {}, + "source": [ + "### Cell 6 - MLFlow: connect to tracking server and list recent chat runs\n", + "\n", + "Installs MLflow, sets the tracking URI and experiment, then queries and prints the latest runs with key params/metrics (temperature, max_tokens, duration) to verify Serve logging." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "03a1b042-04df-4cd0-9099-4cc763ecfe9d", + "metadata": {}, + "outputs": [], + "source": [ + "!pip -q install mlflow==2.14.3 --disable-pip-version-check\n", + "\n", + "import os, mlflow\n", + "from datetime import datetime\n", + "\n", + "tracking_uri = os.getenv(\"MLFLOW_TRACKING_URI\", \"http://ai-starter-kit-mlflow:5000\")\n", + "mlflow.set_tracking_uri(tracking_uri)\n", + "print(f\"MLflow Tracking URI: {tracking_uri}\")\n", + "\n", + "exp_name = os.getenv(\"MLFLOW_EXPERIMENT_NAME\", \"ray-llama-cpp\")\n", + "exp = mlflow.set_experiment(exp_name)\n", + "print(f\"Experiment: {exp.name} (ID: {exp.experiment_id})\")\n", + "print(\"-\" * 60)\n", + "\n", + "client = mlflow.tracking.MlflowClient()\n", + "runs = client.search_runs(\n", + " exp.experiment_id, \n", + " order_by=[\"attributes.start_time DESC\"], \n", + " max_results=10\n", + ")\n", + "\n", + "if not runs:\n", + " print(\"No runs found. Run cells 4 or 5 first to generate inference requests.\")\n", + "else:\n", + " print(f\"\\nFound {len(runs)} recent runs:\")\n", + " print(\"-\" * 60)\n", + " \n", + " for i, run in enumerate(runs, 1):\n", + " start_time = datetime.fromtimestamp(run.info.start_time/1000).strftime('%Y-%m-%d %H:%M:%S')\n", + " duration = run.data.metrics.get('duration_ms', 'N/A')\n", + " temp = run.data.params.get('temperature', 'N/A')\n", + " max_tokens = run.data.params.get('max_tokens', 'N/A')\n", + " total_tokens = run.data.metrics.get('total_tokens_approx', 'N/A')\n", + " \n", + " print(f\"\\nRun {i}:\")\n", + " print(f\" ID: {run.info.run_id[:12]}...\")\n", + " print(f\" Time: {start_time}\")\n", + " print(f\" Status: {run.info.status}\")\n", + " print(f\" Temperature: {temp}\")\n", + " print(f\" Max Tokens: {max_tokens}\")\n", + " print(f\" Duration: {duration} ms\")\n", + " print(f\" Total Tokens: {total_tokens}\")\n", + " \n", + " print(\"\\n\" + \"=\" * 60)\n", + " print(\"SUMMARY:\")\n", + " successful = sum(1 for r in runs if r.info.status == 'FINISHED')\n", + " durations = [r.data.metrics.get('duration_ms', 0) for r in runs if r.data.metrics.get('duration_ms')]\n", + " avg_duration = sum(durations) / len(durations) if durations else 0\n", + " \n", + " print(f\" Total Runs: {len(runs)}\")\n", + " print(f\" Successful: {successful}\")\n", + " print(f\" Failed: {len(runs) - successful}\")\n", + " print(f\" Avg Duration: {avg_duration:.1f} ms\" if avg_duration else \" Avg Duration: N/A\")\n", + "\n", + "print(\"\\n\" + \"=\" * 60)\n", + "print(\"MLflow verification complete\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From ad4e0affcdd28c12236c271e1bd36b7827273768 Mon Sep 17 00:00:00 2001 From: ArthurKamalov Date: Mon, 1 Sep 2025 19:10:07 +0200 Subject: [PATCH 2/2] Add ollama (#10) * add ollama to ai starter kit * add ollama * add image pulling for ollama * add simple chat bot * add ollama persistence * keep only ollama model pull * add automatic configmaps generation for files * small fixes in values.yaml * remove postStart hook * enable everything --------- Co-authored-by: Nikita Aleksandrov --- .../helm-chart/ai-starter-kit/Chart.yaml | 6 + .../ai-starter-kit/files/chat_bot.ipynb | 312 ++++++++++++++++++ .../ai-starter-kit/files/requirements.txt | 2 + .../ai-starter-kit/templates/configmaps.yaml | 50 +-- .../helm-chart/ai-starter-kit/values-gke.yaml | 26 +- .../ai-starter-kit/values-minikube.yaml | 26 +- .../helm-chart/ai-starter-kit/values.yaml | 36 +- ai/ai-starter-kit/notebooks/test_ollama.py | 11 + 8 files changed, 396 insertions(+), 73 deletions(-) create mode 100644 ai/ai-starter-kit/helm-chart/ai-starter-kit/files/chat_bot.ipynb create mode 100644 ai/ai-starter-kit/notebooks/test_ollama.py diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/Chart.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/Chart.yaml index 6dd55f35..382e4510 100644 --- a/ai/ai-starter-kit/helm-chart/ai-starter-kit/Chart.yaml +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/Chart.yaml @@ -28,8 +28,10 @@ dependencies: - name: kuberay-operator version: "1.3.0" repository: "https://ray-project.github.io/kuberay-helm" + condition: ray-cluster.enabled - name: ray-cluster version: "1.3.0" + condition: ray-cluster.enabled repository: "https://ray-project.github.io/kuberay-helm" - name: jupyterhub version: "9.0.16" @@ -37,3 +39,7 @@ dependencies: - name: mlflow version: "0.12.0" repository: "https://community-charts.github.io/helm-charts" + - name: ollama + condition: ollama.enabled + version: "1.27.0" + repository: "https://helm.otwld.com" diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/chat_bot.ipynb b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/chat_bot.ipynb new file mode 100644 index 00000000..0834cf6c --- /dev/null +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/chat_bot.ipynb @@ -0,0 +1,312 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "e9e3dd59-b4d9-4de5-a6aa-a72d1480ac77", + "metadata": {}, + "outputs": [], + "source": [ + "from ollama import Client\n", + "\n", + "client = Client(\n", + " host='http://ai-starter-kit-ollama:11434',\n", + " headers={'x-some-header': 'some-value'}\n", + ")\n", + "\n", + "def get_response(prompt):\n", + " response = client.chat(model='gemma3', messages=[\n", + " {\n", + " 'role': 'user',\n", + " 'content': prompt,\n", + " },\n", + " ])\n", + " return response.message.content" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "dd1513d4-18c5-46d7-8260-f90be004d315", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": "(function(root) {\n function now() {\n return new Date();\n }\n\n const force = true;\n const py_version = '3.7.3'.replace('rc', '-rc.').replace('.dev', '-dev.');\n const reloading = false;\n const Bokeh = root.Bokeh;\n\n // Set a timeout for this load but only if we are not already initializing\n if (typeof (root._bokeh_timeout) === \"undefined\" || (force || !root._bokeh_is_initializing)) {\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_failed_load = false;\n }\n\n function run_callbacks() {\n try {\n root._bokeh_onload_callbacks.forEach(function(callback) {\n if (callback != null)\n callback();\n });\n } finally {\n delete root._bokeh_onload_callbacks;\n }\n console.debug(\"Bokeh: all callbacks have finished\");\n }\n\n function load_libs(css_urls, js_urls, js_modules, js_exports, callback) {\n if (css_urls == null) css_urls = [];\n if (js_urls == null) js_urls = [];\n if (js_modules == null) js_modules = [];\n if (js_exports == null) js_exports = {};\n\n root._bokeh_onload_callbacks.push(callback);\n\n if (root._bokeh_is_loading > 0) {\n // Don't load bokeh if it is still initializing\n console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n return null;\n } else if (js_urls.length === 0 && js_modules.length === 0 && Object.keys(js_exports).length === 0) {\n // There is nothing to load\n run_callbacks();\n return null;\n }\n\n function on_load() {\n root._bokeh_is_loading--;\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n run_callbacks()\n }\n }\n window._bokeh_on_load = on_load\n\n function on_error(e) {\n const src_el = e.srcElement\n console.error(\"failed to load \" + (src_el.href || src_el.src));\n }\n\n const skip = [];\n if (window.requirejs) {\n window.requirejs.config({'packages': {}, 'paths': {}, 'shim': {}});\n root._bokeh_is_loading = css_urls.length + 0;\n } else {\n root._bokeh_is_loading = css_urls.length + js_urls.length + js_modules.length + Object.keys(js_exports).length;\n }\n\n const existing_stylesheets = []\n const links = document.getElementsByTagName('link')\n for (let i = 0; i < links.length; i++) {\n const link = links[i]\n if (link.href != null) {\n existing_stylesheets.push(link.href)\n }\n }\n for (let i = 0; i < css_urls.length; i++) {\n const url = css_urls[i];\n const escaped = encodeURI(url)\n if (existing_stylesheets.indexOf(escaped) !== -1) {\n on_load()\n continue;\n }\n const element = document.createElement(\"link\");\n element.onload = on_load;\n element.onerror = on_error;\n element.rel = \"stylesheet\";\n element.type = \"text/css\";\n element.href = url;\n console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n document.body.appendChild(element);\n } var existing_scripts = []\n const scripts = document.getElementsByTagName('script')\n for (let i = 0; i < scripts.length; i++) {\n var script = scripts[i]\n if (script.src != null) {\n existing_scripts.push(script.src)\n }\n }\n for (let i = 0; i < js_urls.length; i++) {\n const url = js_urls[i];\n const escaped = encodeURI(url)\n if (skip.indexOf(escaped) !== -1 || existing_scripts.indexOf(escaped) !== -1) {\n if (!window.requirejs) {\n on_load();\n }\n continue;\n }\n const element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n for (let i = 0; i < js_modules.length; i++) {\n const url = js_modules[i];\n const escaped = encodeURI(url)\n if (skip.indexOf(escaped) !== -1 || existing_scripts.indexOf(escaped) !== -1) {\n if (!window.requirejs) {\n on_load();\n }\n continue;\n }\n var element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n element.type = \"module\";\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n for (const name in js_exports) {\n const url = js_exports[name];\n const escaped = encodeURI(url)\n if (skip.indexOf(escaped) >= 0 || root[name] != null) {\n if (!window.requirejs) {\n on_load();\n }\n continue;\n }\n var element = document.createElement('script');\n element.onerror = on_error;\n element.async = false;\n element.type = \"module\";\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n element.textContent = `\n import ${name} from \"${url}\"\n window.${name} = ${name}\n window._bokeh_on_load()\n `\n document.head.appendChild(element);\n }\n if (!js_urls.length && !js_modules.length) {\n on_load()\n }\n };\n\n function inject_raw_css(css) {\n const element = document.createElement(\"style\");\n element.appendChild(document.createTextNode(css));\n document.body.appendChild(element);\n }\n\n const js_urls = [\"https://cdn.holoviz.org/panel/1.7.5/dist/bundled/reactiveesm/es-module-shims@^1.10.0/dist/es-module-shims.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-3.7.3.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-gl-3.7.3.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-widgets-3.7.3.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-tables-3.7.3.min.js\", \"https://cdn.holoviz.org/panel/1.7.5/dist/panel.min.js\"];\n const js_modules = [];\n const js_exports = {};\n const css_urls = [];\n const inline_js = [ function(Bokeh) {\n Bokeh.set_log_level(\"info\");\n },\nfunction(Bokeh) {} // ensure no trailing comma for IE\n ];\n\n function run_inline_js() {\n if ((root.Bokeh !== undefined) || (force === true)) {\n for (let i = 0; i < inline_js.length; i++) {\n try {\n inline_js[i].call(root, root.Bokeh);\n } catch(e) {\n if (!reloading) {\n throw e;\n }\n }\n }\n // Cache old bokeh versions\n if (Bokeh != undefined && !reloading) {\n var NewBokeh = root.Bokeh;\n if (Bokeh.versions === undefined) {\n Bokeh.versions = new Map();\n }\n if (NewBokeh.version !== Bokeh.version) {\n Bokeh.versions.set(NewBokeh.version, NewBokeh)\n }\n root.Bokeh = Bokeh;\n }\n } else if (Date.now() < root._bokeh_timeout) {\n setTimeout(run_inline_js, 100);\n } else if (!root._bokeh_failed_load) {\n console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n root._bokeh_failed_load = true;\n }\n root._bokeh_is_initializing = false\n }\n\n function load_or_wait() {\n // Implement a backoff loop that tries to ensure we do not load multiple\n // versions of Bokeh and its dependencies at the same time.\n // In recent versions we use the root._bokeh_is_initializing flag\n // to determine whether there is an ongoing attempt to initialize\n // bokeh, however for backward compatibility we also try to ensure\n // that we do not start loading a newer (Panel>=1.0 and Bokeh>3) version\n // before older versions are fully initialized.\n if (root._bokeh_is_initializing && Date.now() > root._bokeh_timeout) {\n // If the timeout and bokeh was not successfully loaded we reset\n // everything and try loading again\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_is_initializing = false;\n root._bokeh_onload_callbacks = undefined;\n root._bokeh_is_loading = 0\n console.log(\"Bokeh: BokehJS was loaded multiple times but one version failed to initialize.\");\n load_or_wait();\n } else if (root._bokeh_is_initializing || (typeof root._bokeh_is_initializing === \"undefined\" && root._bokeh_onload_callbacks !== undefined)) {\n setTimeout(load_or_wait, 100);\n } else {\n root._bokeh_is_initializing = true\n root._bokeh_onload_callbacks = []\n const bokeh_loaded = root.Bokeh != null && (root.Bokeh.version === py_version || (root.Bokeh.versions !== undefined && root.Bokeh.versions.has(py_version)));\n if (!reloading && !bokeh_loaded) {\n if (root.Bokeh) {\n root.Bokeh = undefined;\n }\n console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n }\n load_libs(css_urls, js_urls, js_modules, js_exports, function() {\n console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n run_inline_js();\n });\n }\n }\n // Give older versions of the autoload script a head-start to ensure\n // they initialize before we start loading newer version.\n setTimeout(load_or_wait, 100)\n}(window));", + "application/vnd.holoviews_load.v0+json": "" + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": "\nif ((window.PyViz === undefined) || (window.PyViz instanceof HTMLElement)) {\n window.PyViz = {comms: {}, comm_status:{}, kernels:{}, receivers: {}, plot_index: []}\n}\n\n\n function JupyterCommManager() {\n }\n\n JupyterCommManager.prototype.register_target = function(plot_id, comm_id, msg_handler) {\n if (window.comm_manager || ((window.Jupyter !== undefined) && (Jupyter.notebook.kernel != null))) {\n var comm_manager = window.comm_manager || Jupyter.notebook.kernel.comm_manager;\n comm_manager.register_target(comm_id, function(comm) {\n comm.on_msg(msg_handler);\n });\n } else if ((plot_id in window.PyViz.kernels) && (window.PyViz.kernels[plot_id])) {\n window.PyViz.kernels[plot_id].registerCommTarget(comm_id, function(comm) {\n comm.onMsg = msg_handler;\n });\n } else if (typeof google != 'undefined' && google.colab.kernel != null) {\n google.colab.kernel.comms.registerTarget(comm_id, (comm) => {\n var messages = comm.messages[Symbol.asyncIterator]();\n function processIteratorResult(result) {\n var message = result.value;\n var content = {data: message.data, comm_id};\n var buffers = []\n for (var buffer of message.buffers || []) {\n buffers.push(new DataView(buffer))\n }\n var metadata = message.metadata || {};\n var msg = {content, buffers, metadata}\n msg_handler(msg);\n return messages.next().then(processIteratorResult);\n }\n return messages.next().then(processIteratorResult);\n })\n }\n }\n\n JupyterCommManager.prototype.get_client_comm = function(plot_id, comm_id, msg_handler) {\n if (comm_id in window.PyViz.comms) {\n return window.PyViz.comms[comm_id];\n } else if (window.comm_manager || ((window.Jupyter !== undefined) && (Jupyter.notebook.kernel != null))) {\n var comm_manager = window.comm_manager || Jupyter.notebook.kernel.comm_manager;\n var comm = comm_manager.new_comm(comm_id, {}, {}, {}, comm_id);\n if (msg_handler) {\n comm.on_msg(msg_handler);\n }\n } else if ((plot_id in window.PyViz.kernels) && (window.PyViz.kernels[plot_id])) {\n var comm = window.PyViz.kernels[plot_id].connectToComm(comm_id);\n let retries = 0;\n const open = () => {\n if (comm.active) {\n comm.open();\n } else if (retries > 3) {\n console.warn('Comm target never activated')\n } else {\n retries += 1\n setTimeout(open, 500)\n }\n }\n if (comm.active) {\n comm.open();\n } else {\n setTimeout(open, 500)\n }\n if (msg_handler) {\n comm.onMsg = msg_handler;\n }\n } else if (typeof google != 'undefined' && google.colab.kernel != null) {\n var comm_promise = google.colab.kernel.comms.open(comm_id)\n comm_promise.then((comm) => {\n window.PyViz.comms[comm_id] = comm;\n if (msg_handler) {\n var messages = comm.messages[Symbol.asyncIterator]();\n function processIteratorResult(result) {\n var message = result.value;\n var content = {data: message.data};\n var metadata = message.metadata || {comm_id};\n var msg = {content, metadata}\n msg_handler(msg);\n return messages.next().then(processIteratorResult);\n }\n return messages.next().then(processIteratorResult);\n }\n })\n var sendClosure = (data, metadata, buffers, disposeOnDone) => {\n return comm_promise.then((comm) => {\n comm.send(data, metadata, buffers, disposeOnDone);\n });\n };\n var comm = {\n send: sendClosure\n };\n }\n window.PyViz.comms[comm_id] = comm;\n return comm;\n }\n window.PyViz.comm_manager = new JupyterCommManager();\n \n\n\nvar JS_MIME_TYPE = 'application/javascript';\nvar HTML_MIME_TYPE = 'text/html';\nvar EXEC_MIME_TYPE = 'application/vnd.holoviews_exec.v0+json';\nvar CLASS_NAME = 'output';\n\n/**\n * Render data to the DOM node\n */\nfunction render(props, node) {\n var div = document.createElement(\"div\");\n var script = document.createElement(\"script\");\n node.appendChild(div);\n node.appendChild(script);\n}\n\n/**\n * Handle when a new output is added\n */\nfunction handle_add_output(event, handle) {\n var output_area = handle.output_area;\n var output = handle.output;\n if ((output.data == undefined) || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n return\n }\n var id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n if (id !== undefined) {\n var nchildren = toinsert.length;\n var html_node = toinsert[nchildren-1].children[0];\n html_node.innerHTML = output.data[HTML_MIME_TYPE];\n var scripts = [];\n var nodelist = html_node.querySelectorAll(\"script\");\n for (var i in nodelist) {\n if (nodelist.hasOwnProperty(i)) {\n scripts.push(nodelist[i])\n }\n }\n\n scripts.forEach( function (oldScript) {\n var newScript = document.createElement(\"script\");\n var attrs = [];\n var nodemap = oldScript.attributes;\n for (var j in nodemap) {\n if (nodemap.hasOwnProperty(j)) {\n attrs.push(nodemap[j])\n }\n }\n attrs.forEach(function(attr) { newScript.setAttribute(attr.name, attr.value) });\n newScript.appendChild(document.createTextNode(oldScript.innerHTML));\n oldScript.parentNode.replaceChild(newScript, oldScript);\n });\n if (JS_MIME_TYPE in output.data) {\n toinsert[nchildren-1].children[1].textContent = output.data[JS_MIME_TYPE];\n }\n output_area._hv_plot_id = id;\n if ((window.Bokeh !== undefined) && (id in Bokeh.index)) {\n window.PyViz.plot_index[id] = Bokeh.index[id];\n } else {\n window.PyViz.plot_index[id] = null;\n }\n } else if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n var bk_div = document.createElement(\"div\");\n bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n var script_attrs = bk_div.children[0].attributes;\n for (var i = 0; i < script_attrs.length; i++) {\n toinsert[toinsert.length - 1].childNodes[1].setAttribute(script_attrs[i].name, script_attrs[i].value);\n }\n // store reference to server id on output_area\n output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n }\n}\n\n/**\n * Handle when an output is cleared or removed\n */\nfunction handle_clear_output(event, handle) {\n var id = handle.cell.output_area._hv_plot_id;\n var server_id = handle.cell.output_area._bokeh_server_id;\n if (((id === undefined) || !(id in PyViz.plot_index)) && (server_id !== undefined)) { return; }\n var comm = window.PyViz.comm_manager.get_client_comm(\"hv-extension-comm\", \"hv-extension-comm\", function () {});\n if (server_id !== null) {\n comm.send({event_type: 'server_delete', 'id': server_id});\n return;\n } else if (comm !== null) {\n comm.send({event_type: 'delete', 'id': id});\n }\n delete PyViz.plot_index[id];\n if ((window.Bokeh !== undefined) & (id in window.Bokeh.index)) {\n var doc = window.Bokeh.index[id].model.document\n doc.clear();\n const i = window.Bokeh.documents.indexOf(doc);\n if (i > -1) {\n window.Bokeh.documents.splice(i, 1);\n }\n }\n}\n\n/**\n * Handle kernel restart event\n */\nfunction handle_kernel_cleanup(event, handle) {\n delete PyViz.comms[\"hv-extension-comm\"];\n window.PyViz.plot_index = {}\n}\n\n/**\n * Handle update_display_data messages\n */\nfunction handle_update_output(event, handle) {\n handle_clear_output(event, {cell: {output_area: handle.output_area}})\n handle_add_output(event, handle)\n}\n\nfunction register_renderer(events, OutputArea) {\n function append_mime(data, metadata, element) {\n // create a DOM node to render to\n var toinsert = this.create_output_subarea(\n metadata,\n CLASS_NAME,\n EXEC_MIME_TYPE\n );\n this.keyboard_manager.register_events(toinsert);\n // Render to node\n var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n render(props, toinsert[0]);\n element.append(toinsert);\n return toinsert\n }\n\n events.on('output_added.OutputArea', handle_add_output);\n events.on('output_updated.OutputArea', handle_update_output);\n events.on('clear_output.CodeCell', handle_clear_output);\n events.on('delete.Cell', handle_clear_output);\n events.on('kernel_ready.Kernel', handle_kernel_cleanup);\n\n OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n safe: true,\n index: 0\n });\n}\n\nif (window.Jupyter !== undefined) {\n try {\n var events = require('base/js/events');\n var OutputArea = require('notebook/js/outputarea').OutputArea;\n if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n register_renderer(events, OutputArea);\n }\n } catch(err) {\n }\n}\n", + "application/vnd.holoviews_load.v0+json": "" + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.holoviews_exec.v0+json": "", + "text/html": [ + "
\n", + "
\n", + "
\n", + "" + ] + }, + "metadata": { + "application/vnd.holoviews_exec.v0+json": { + "id": "b6fd14e0-f8d2-46e7-9c4d-722893d04d7e" + } + }, + "output_type": "display_data" + }, + { + "data": {}, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.holoviews_exec.v0+json": "", + "text/html": [ + "
\n", + "
\n", + "
\n", + "" + ], + "text/plain": [ + "Column\n", + " [0] TextInput(placeholder='Enter text here…')\n", + " [1] Row\n", + " [0] Button(name='Chat!')\n", + " [2] ParamFunction(function, _pane=Column, defer_load=False, height=300, loading_indicator=True, sizing_mode='fixed', width=300)" + ] + }, + "execution_count": 2, + "metadata": { + "application/vnd.holoviews_exec.v0+json": { + "id": "2854d6b0-689d-4dc0-8861-1834489708e9" + } + }, + "output_type": "execute_result" + } + ], + "source": [ + "import panel as pn # GUI\n", + "pn.extension()\n", + "\n", + "panels = [] # collect display \n", + "context = [ ] # accumulate messages\n", + "\n", + "\n", + "def collect_messages(_):\n", + " prompt = inp.value_input\n", + " inp.value = ''\n", + " if (not prompt):\n", + " return pn.Column(*panels)\n", + "\n", + " response = get_response(prompt)\n", + " context.append({'role':'user', 'content':f\"{prompt}\"})\n", + " context.append({'role':'assistant', 'content':f\"{response}\"})\n", + " panels.append(\n", + " pn.Row('User:', pn.pane.Markdown(prompt, width=600)))\n", + " panels.append(\n", + " pn.Row('Assistant:', pn.pane.Markdown(response, width=600)))\n", + " \n", + " return pn.Column(*panels)\n", + "\n", + "\n", + "inp = pn.widgets.TextInput(value=\"Hi\", placeholder='Enter text here…')\n", + "button_conversation = pn.widgets.Button(name=\"Chat!\")\n", + "interactive_conversation = pn.bind(collect_messages, button_conversation)\n", + "dashboard = pn.Column(\n", + " inp,\n", + " pn.Row(button_conversation),\n", + " pn.panel(interactive_conversation, loading_indicator=True, height=300, width=300),\n", + ")\n", + "\n", + "dashboard" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/requirements.txt b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/requirements.txt index 48fe31da..ac30b2a5 100644 --- a/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/requirements.txt +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/requirements.txt @@ -4,3 +4,5 @@ huggingface_hub numpy ipywidgets mlflow +ollama +panel diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/configmaps.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/configmaps.yaml index 33d0dfdd..e73e3d7a 100644 --- a/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/configmaps.yaml +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/configmaps.yaml @@ -3,51 +3,17 @@ Create ConfigMaps for jupyterhub singleuser pods. These ConfigMaps are mounted as volumes. */ -}} +{{- range $path, $_ := .Files.Glob "files/*" }} --- apiVersion: v1 kind: ConfigMap metadata: - name: {{ .Release.Name }}-requirements-txt + name: {{ printf "%s-%s" $.Release.Name (base $path | replace "." "-" | replace "_" "-") }} labels: - app.kubernetes.io/managed-by: {{ .Release.Service | quote }} - app.kubernetes.io/instance: {{ .Release.Name | quote }} - helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" + app.kubernetes.io/managed-by: {{ $.Release.Service | quote }} + app.kubernetes.io/instance: {{ $.Release.Name | quote }} + helm.sh/chart: "{{ $.Chart.Name }}-{{ $.Chart.Version }}" data: - requirements.txt: |- -{{ .Files.Get "files/requirements.txt" | nindent 4 }} ---- -apiVersion: v1 -kind: ConfigMap -metadata: - name: {{ .Release.Name }}-hf-download-script - labels: - app.kubernetes.io/managed-by: {{ .Release.Service | quote }} - app.kubernetes.io/instance: {{ .Release.Name | quote }} - helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" -data: - download_models.py: |- -{{ .Files.Get "files/download_models.py" | nindent 4 }} ---- -apiVersion: v1 -kind: ConfigMap -metadata: - name: {{ .Release.Name }}-welcome-notebook - labels: - app.kubernetes.io/managed-by: {{ .Release.Service | quote }} - app.kubernetes.io/instance: {{ .Release.Name | quote }} - helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" -data: - welcome.ipynb: |- -{{ .Files.Get "files/welcome.ipynb" | nindent 4 }} ---- -apiVersion: v1 -kind: ConfigMap -metadata: - name: {{ .Release.Name }}-ray-notebook - labels: - app.kubernetes.io/managed-by: {{ .Release.Service | quote }} - app.kubernetes.io/instance: {{ .Release.Name | quote }} - helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" -data: - ray.ipynb: |- -{{ .Files.Get "files/ray.ipynb" | nindent 4 }} + {{ base $path | quote }}: |- +{{ $.Files.Get $path | nindent 4 }} +{{- end }} diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/values-gke.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/values-gke.yaml index a641cc45..4090f791 100644 --- a/ai/ai-starter-kit/helm-chart/ai-starter-kit/values-gke.yaml +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/values-gke.yaml @@ -9,8 +9,6 @@ jupyterhub: singleuser: fsGid: 100 defaultUrl: "/lab/tree/welcome.ipynb" - lifecycleHooks: - postStart: null image: tag: "5.0.0-debian-12-r2" initContainers: @@ -70,15 +68,18 @@ jupyterhub: # - name: models-cache # persistentVolumeClaim: # claimName: "{{ .Release.Name }}-models-cache-pvc" - - name: hf-download-script + - name: download-models-py configMap: - name: "{{ .Release.Name }}-hf-download-script" - - name: welcome-notebook + name: "{{ .Release.Name }}-download-models-py" + - name: welcome-ipynb configMap: - name: "{{ .Release.Name }}-welcome-notebook" - - name: ray-notebook + name: "{{ .Release.Name }}-welcome-ipynb" + - name: ray-ipynb configMap: - name: "{{ .Release.Name }}-ray-notebook" + name: "{{ .Release.Name }}-ray-ipynb" + - name: chat-bot-ipynb + configMap: + name: "{{ .Release.Name }}-chat-bot-ipynb" - name: hf-token-secret secret: secretName: "{{ .Release.Name }}-hf-token-secret" @@ -89,15 +90,18 @@ jupyterhub: subPath: requirements.txt # - name: models-cache # mountPath: /tmp/models-cache - - name: hf-download-script + - name: download-models-py mountPath: /tmp/download_models.py subPath: download_models.py - - name: welcome-notebook + - name: welcome-ipynb mountPath: /tmp/welcome.ipynb subPath: welcome.ipynb - - name: ray-notebook + - name: ray-ipynb mountPath: /tmp/ray.ipynb subPath: ray.ipynb + - name: chat-bot-ipynb + mountPath: /tmp/chat_bot.ipynb + subPath: chat_bot.ipynb - name: hf-token-secret mountPath: "/etc/secrets/huggingface" readOnly: true diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/values-minikube.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/values-minikube.yaml index c36ed30d..ba409f59 100644 --- a/ai/ai-starter-kit/helm-chart/ai-starter-kit/values-minikube.yaml +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/values-minikube.yaml @@ -37,7 +37,7 @@ jupyterhub: readOnly: true - name: models-cache mountPath: /tmp/models-cache - - name: hf-download-script + - name: download-models-py mountPath: /tmp/download_models.py subPath: download_models.py readOnly: true @@ -51,15 +51,18 @@ jupyterhub: - name: models-cache persistentVolumeClaim: claimName: "{{ .Release.Name }}-models-cache-pvc" - - name: hf-download-script + - name: download-models-py configMap: - name: "{{ .Release.Name }}-hf-download-script" - - name: welcome-notebook + name: "{{ .Release.Name }}-download-models-py" + - name: welcome-ipynb configMap: - name: "{{ .Release.Name }}-welcome-notebook" - - name: ray-notebook + name: "{{ .Release.Name }}-welcome-ipynb" + - name: ray-ipynb configMap: - name: "{{ .Release.Name }}-ray-notebook" + name: "{{ .Release.Name }}-ray-ipynb" + - name: chat-bot-ipynb + configMap: + name: "{{ .Release.Name }}-chat-bot-ipynb" - name: hf-token-secret secret: secretName: "{{ .Release.Name }}-hf-token-secret" @@ -70,15 +73,18 @@ jupyterhub: subPath: requirements.txt - name: models-cache mountPath: /tmp/models-cache - - name: hf-download-script + - name: download-models-py mountPath: /tmp/download_models.py subPath: download_models.py - - name: welcome-notebook + - name: welcome-ipynb mountPath: /tmp/welcome.ipynb subPath: welcome.ipynb - - name: ray-notebook + - name: ray-ipynb mountPath: /tmp/ray.ipynb subPath: ray.ipynb + - name: chat-bot-ipynb + mountPath: /tmp/chat_bot.ipynb + subPath: chat_bot.ipynb - name: hf-token-secret mountPath: "/etc/secrets/huggingface" readOnly: true diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml index b093d9f1..8f1147f5 100644 --- a/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml @@ -9,8 +9,6 @@ jupyterhub: singleuser: fsGid: 100 defaultUrl: "/lab/tree/welcome.ipynb" - lifecycleHooks: - postStart: null image: tag: "5.0.0-debian-12-r2" initContainers: @@ -64,15 +62,18 @@ jupyterhub: - name: requirements-txt configMap: name: "{{ .Release.Name }}-requirements-txt" - - name: hf-download-script + - name: download-models-py configMap: - name: "{{ .Release.Name }}-hf-download-script" - - name: welcome-notebook + name: "{{ .Release.Name }}-download-models-py" + - name: welcome-ipynb configMap: - name: "{{ .Release.Name }}-welcome-notebook" - - name: ray-notebook + name: "{{ .Release.Name }}-welcome-ipynb" + - name: ray-ipynb configMap: name: "{{ .Release.Name }}-ray-notebook" + - name: chat-bot-ipynb + configMap: + name: "{{ .Release.Name }}-chat-bot-ipynb" - name: hf-token-secret secret: secretName: "{{ .Release.Name }}-hf-token-secret" @@ -81,15 +82,18 @@ jupyterhub: - name: requirements-txt mountPath: /tmp/requirements.txt subPath: requirements.txt - - name: hf-download-script + - name: download-models-py mountPath: /tmp/download_models.py subPath: download_models.py - - name: welcome-notebook + - name: welcome-ipynb mountPath: /tmp/welcome.ipynb subPath: welcome.ipynb - - name: ray-notebook + - name: ray-ipynb mountPath: /tmp/ray.ipynb subPath: ray.ipynb + - name: chat-bot-ipynb + mountPath: /tmp/chat_bot.ipynb + subPath: chat_bot.ipynb - name: hf-token-secret mountPath: "/etc/secrets/huggingface" readOnly: true @@ -172,6 +176,18 @@ localPersistence: # This path must match the destination path inside the minikube node. hostPath: "/tmp/models-cache" +ollama: + enabled: true + ollama: + models: + pull: + - gemma3 + persistentVolume: + enabled: true + existingClaim: "ai-starter-kit-models-cache-pvc" + subPath: "ollama" + + ramalama: enabled: true command: ["sh", "-c" , "trap 'exit 0' TERM; while true; do sleep 60 & wait; done"] diff --git a/ai/ai-starter-kit/notebooks/test_ollama.py b/ai/ai-starter-kit/notebooks/test_ollama.py new file mode 100644 index 00000000..58cf22e0 --- /dev/null +++ b/ai/ai-starter-kit/notebooks/test_ollama.py @@ -0,0 +1,11 @@ +from ollama import Client +client = Client( + host='http://ai-starter-kit-ollama:11434', + headers={'x-some-header': 'some-value'} +) +response = client.chat(model='gemma3', messages=[ + { + 'role': 'user', + 'content': 'Why is the sky blue?', + }, +])