From 4dce1c03005ad3a12a2a5013558e90c0a651f3af Mon Sep 17 00:00:00 2001 From: Yaron Schneider Date: Wed, 30 Apr 2025 08:14:43 -0700 Subject: [PATCH] Add quickstart for a knowledge base agent over Postgres + MCP + Chainlit (#103) * initial commit Signed-off-by: yaron2 * add quickstart for a postgres agent with mcp Signed-off-by: yaron2 * linter Signed-off-by: yaron2 * linter Signed-off-by: yaron2 * review feedback Signed-off-by: yaron2 * changed docker instructions Signed-off-by: yaron2 * Update README.md Signed-off-by: Yaron Schneider --------- Signed-off-by: yaron2 Signed-off-by: Yaron Schneider --- .../08-data-agent-mcp-chainlit/.gitignore | 97 +++++++++++ .../08-data-agent-mcp-chainlit/README.md | 163 ++++++++++++++++++ quickstarts/08-data-agent-mcp-chainlit/app.py | 83 +++++++++ .../components/conversationmemory.yaml | 12 ++ .../08-data-agent-mcp-chainlit/get_schema.py | 55 ++++++ .../requirements.txt | 5 + .../08-data-agent-mcp-chainlit/schema.sql | 10 ++ .../08-data-agent-mcp-chainlit/users.sql | 11 ++ 8 files changed, 436 insertions(+) create mode 100644 quickstarts/08-data-agent-mcp-chainlit/.gitignore create mode 100644 quickstarts/08-data-agent-mcp-chainlit/README.md create mode 100644 quickstarts/08-data-agent-mcp-chainlit/app.py create mode 100644 quickstarts/08-data-agent-mcp-chainlit/components/conversationmemory.yaml create mode 100644 quickstarts/08-data-agent-mcp-chainlit/get_schema.py create mode 100644 quickstarts/08-data-agent-mcp-chainlit/requirements.txt create mode 100644 quickstarts/08-data-agent-mcp-chainlit/schema.sql create mode 100644 quickstarts/08-data-agent-mcp-chainlit/users.sql diff --git a/quickstarts/08-data-agent-mcp-chainlit/.gitignore b/quickstarts/08-data-agent-mcp-chainlit/.gitignore new file mode 100644 index 0000000..bc33034 --- /dev/null +++ b/quickstarts/08-data-agent-mcp-chainlit/.gitignore @@ -0,0 +1,97 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Virtual environments +.venv/ +venv/ +ENV/ +env/ +.pipenv/ +*.egg-info/ +.eggs/ + +# Distribution / packaging +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +*.egg +MANIFEST + +# PyInstaller +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover + +# Pytest +.pytest_cache/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# VS Code +.vscode/ + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# Pylint +.pylint.d/ + +# IDEs and editors +.idea/ +*.sublime-workspace +*.sublime-project +*.vscode/ + +# MacOS +.DS_Store + +# Logs +*.log +.files/ + +# Local environment variables +.env +.env.* + +# Docker +*.pid + +# Chainlit +.chainlit/ +chainlit.md diff --git a/quickstarts/08-data-agent-mcp-chainlit/README.md b/quickstarts/08-data-agent-mcp-chainlit/README.md new file mode 100644 index 0000000..242fb41 --- /dev/null +++ b/quickstarts/08-data-agent-mcp-chainlit/README.md @@ -0,0 +1,163 @@ +# A conversational agent over a Postgres database using MCP + +This quickstart demonstrates how to build a fully functional, enterprise-ready agent that allows users to ask their database any question in natural text and get both the results and a highly structured analysis of complex questions. This quickstart also shows the usage of MCP in Dapr Agents to connect to the database and provides a fully functional ChatGPT-like chat interface using Chainlit. + +## Key Benefits + +- **Conversational Knowledge Base**: Users can talk to their database in natural language, ask complex questions and perform advanced analysis over data +- **Conversational Memory**: The agent maintains context across interactions in the user's [database of choice](https://docs.dapr.io/reference/components-reference/supported-state-stores/) +- **UI Interface**: Use an out-of-the-box, LLM-ready chat interface using [Chainlit](https://github.com/Chainlit/chainlit) +- **Boilerplate-Free DB Layer**: MCP allows the Dapr Agent to connect to the database without requiring users to write Postgres-specific code + +## Prerequisites + +- Python 3.10 (recommended) +- pip package manager +- OpenAI API key (for the OpenAI example) +- [Dapr CLI installed](https://docs.dapr.io/getting-started/install-dapr-cli/) + +## Environment Setup + +```bash +# Create a virtual environment +python3.10 -m venv .venv + +# Activate the virtual environment +# On Windows: +.venv\Scripts\activate +# On macOS/Linux: +source .venv/bin/activate + +# Install dependencies +pip install -r requirements.txt + +# Initialize Dapr +dapr init +``` + +## LLM Configuration + +For this example, we'll be using the OpenAI client that is used by default. To target different LLMs, see [this example](../02_llm_call_dapr/README.md). + +Create a `.env` file in the project root: + +```env +OPENAI_API_KEY=your_api_key_here +``` + +Replace `your_api_key_here` with your actual OpenAI API key. + +## Postgres Configuration + +### Connect to an existing database + +Create an `.env` file in the root directory of this quickstart and insert your database configuration: + +```bash +DB_HOST= +DB_PORT= +DB_NAME= +DB_USER= +DB_PASSWORD= +``` + +### Create a new sample database + +First, install Postgres on your machine. + +#### Option 1: Using Docker + +Create the following directory and copy the sql files there: + +```bash +mkdir docker-entrypoint-initdb.d:/docker-entrypoint-initdb.d +cp schema.sql users.sql ./docker-entrypoint-initdb.d:/docker-entrypoint-initdb.d +``` + +Run the database container: + +```bash +docker run --rm --name sampledb \ + -e POSTGRES_PASSWORD=mypassword \ + -e POSTGRES_USER=admin \ + -e POSTGRES_DB=userdb \ + -p 5432:5432 \ + -v $(pwd)/docker-entrypoint-initdb.d:/docker-entrypoint-initdb.d \ + -d postgres +``` + +#### Option 2: Using Brew + +Install Postgres: + +```bash +brew install postgresql +brew services start postgresql + +psql postgres +> CREATE USER admin WITH PASSWORD 'mypassword'; +> CREATE DATABASE userdb +> GRANT ALL PRIVILEGES ON DATABASE userdb TO admin; +``` + +Next, create the users table and seed data: + +```bash +psql -h localhost -U admin -d userdb -f schema.sql +psql -h localhost -U admin -d userdb -f users.sql +``` + +#### Create .env file + +Finally, create an `.env` file in the root directory of this quickstart and insert your database configuration: + +```bash +DB_HOST=localhost +DB_PORT=5432 +DB_NAME=userdb +DB_USER=admin +DB_PASSWORD=mypassword +``` + +## MCP Configuration + +To get the Dapr Agent to connect to our Postgres database, we'll use a Postgres MCP server. +Change the settings below based on your Postgres configuration: + +*Note: If you're running Postgres in a Docker container, change `` to `localhost`.* + +```bash +docker run -p 8000:8000 \ + -e DATABASE_URI=postgresql://:@:5432/userdb \ + crystaldba/postgres-mcp --access-mode=unrestricted --transport=sse +``` + +## Examples + +### Load data to Postgres and create a knowledge base chat interface + +Run the agent: + +```bash +dapr run --app-id sql --resources-path ./components -- chainlit run app.py -w --port 8001 +``` + +Wait until the browser opens up. Once open, you're ready to talk to your Postgres database! +You can find the agent page at http://localhost:8001. + +### Ask Questions + +Now you can start talking to your data. If using the sample database, ask questons like `Show me all churned users from the past month` and `Can you identify the problematic area in our product that led to users churning?`. + +#### Testing the agent's memory + +If you exit the app and restart it, the agent will remember all the previous conversation. When you insall Dapr using `dapr init`, Redis is installed by default and this is where the conversation memory is saved. To change it, edit the `./components/conversationmemory.yaml` file. + +## Summary + +**How It Works:** +1. The MCP server is running and connects to our Postgres database +2. Dapr starts, loading the conversation history storage configs from the `components` folder. The agent connects to the MCP server. +3. Chainlit loads and starts the agent UI in your browser. +4. Users can now talk to their database in natural language and have the agent analyze the data. +5. The conversation history is automatically managed by Dapr and saved in the state store configured in `./components/conversationmemory.yaml`. diff --git a/quickstarts/08-data-agent-mcp-chainlit/app.py b/quickstarts/08-data-agent-mcp-chainlit/app.py new file mode 100644 index 0000000..9a16f50 --- /dev/null +++ b/quickstarts/08-data-agent-mcp-chainlit/app.py @@ -0,0 +1,83 @@ +import chainlit as cl +from dapr_agents import Agent +from dapr_agents.tool.mcp.client import MCPClient +from dotenv import load_dotenv +from get_schema import get_table_schema_as_dict + +load_dotenv() + +instructions = [ + "You are an assistant designed to translate human readable text to postgresql queries. " + "Your primary goal is to provide accurate SQL queries based on the user request. " + "If something is unclear or you need more context, ask thoughtful clarifying questions." +] + +agent = {} + +table_info = {} + + +@cl.on_chat_start +async def start(): + client = MCPClient() + await client.connect_sse( + server_name="local", # Unique name you assign to this server + url="http://0.0.0.0:8000/sse", # MCP SSE endpoint + headers=None, # Optional HTTP headers if needed + ) + + # See what tools were loaded + tools = client.get_all_tools() + + global agent + agent = Agent( + name="SQL", + role="Database Expert", + instructions=instructions, + tools=tools, + ) + + global table_info + table_info = get_table_schema_as_dict() + + if table_info: + await cl.Message( + content="Database connection successful. Ask me anything." + ).send() + else: + await cl.Message(content="Database connection failed.").send() + + +@cl.on_message +async def main(message: cl.Message): + # generate the result set and pass back to the user + prompt = create_prompt_for_llm(table_info, message.content) + result = await agent.run(prompt) + + await cl.Message( + content=result, + ).send() + + result_set = await agent.run( + "Execute the following sql query and always return a table format unless instructed otherwise. If the user asks a question regarding the data, return the result and formalize an answer based on inspecting the data: " + + result + ) + await cl.Message( + content=result_set, + ).send() + + +def create_prompt_for_llm(schema_data, user_question): + prompt = "Here is the schema for the tables in the database:\n\n" + + # Add schema information to the prompt + for table, columns in schema_data.items(): + prompt += f"Table {table}:\n" + for col in columns: + prompt += f" - {col['column_name']} ({col['data_type']}), Nullable: {col['is_nullable']}, Default: {col['column_default']}\n" + + # Add the user's question for context + prompt += f"\nUser's question: {user_question}\n" + prompt += "Generate the postgres SQL query to answer the user's question. Return only the query string and nothing else." + + return prompt diff --git a/quickstarts/08-data-agent-mcp-chainlit/components/conversationmemory.yaml b/quickstarts/08-data-agent-mcp-chainlit/components/conversationmemory.yaml new file mode 100644 index 0000000..d5afd64 --- /dev/null +++ b/quickstarts/08-data-agent-mcp-chainlit/components/conversationmemory.yaml @@ -0,0 +1,12 @@ +apiVersion: dapr.io/v1alpha1 +kind: Component +metadata: + name: conversationstore +spec: + type: state.redis + version: v1 + metadata: + - name: redisHost + value: localhost:6379 + - name: redisPassword + value: "" diff --git a/quickstarts/08-data-agent-mcp-chainlit/get_schema.py b/quickstarts/08-data-agent-mcp-chainlit/get_schema.py new file mode 100644 index 0000000..bc5ce1a --- /dev/null +++ b/quickstarts/08-data-agent-mcp-chainlit/get_schema.py @@ -0,0 +1,55 @@ +import os +import psycopg + + +def get_table_schema_as_dict(): + conn_params = { + "host": os.getenv("DB_HOST"), + "port": os.getenv("DB_PORT"), + "dbname": os.getenv("DB_NAME"), + "user": os.getenv("DB_USER"), + "password": os.getenv("DB_PASSWORD"), + } + + schema_data = {} + + try: + with psycopg.connect(**conn_params) as conn: + with conn.cursor() as cur: + cur.execute( + """ + SELECT table_schema, table_name + FROM information_schema.tables + WHERE table_type = 'BASE TABLE' AND table_schema NOT IN ('pg_catalog', 'information_schema') + ORDER BY table_schema, table_name; + """ + ) + tables = cur.fetchall() + + for schema, table in tables: + schema_data[f"{schema}.{table}"] = [] + cur.execute( + """ + SELECT column_name, data_type, is_nullable, column_default + FROM information_schema.columns + WHERE table_schema = %s AND table_name = %s + ORDER BY ordinal_position; + """, + (schema, table), + ) + columns = cur.fetchall() + + for col in columns: + schema_data[f"{schema}.{table}"].append( + { + "column_name": col[0], + "data_type": col[1], + "is_nullable": col[2], + "column_default": col[3], + } + ) + + return schema_data + + except Exception: + return False diff --git a/quickstarts/08-data-agent-mcp-chainlit/requirements.txt b/quickstarts/08-data-agent-mcp-chainlit/requirements.txt new file mode 100644 index 0000000..bf06f2b --- /dev/null +++ b/quickstarts/08-data-agent-mcp-chainlit/requirements.txt @@ -0,0 +1,5 @@ +dapr-agents>=0.5.1 +python-dotenv +chainlit +psycopg +psycopg[binary] \ No newline at end of file diff --git a/quickstarts/08-data-agent-mcp-chainlit/schema.sql b/quickstarts/08-data-agent-mcp-chainlit/schema.sql new file mode 100644 index 0000000..4e0caec --- /dev/null +++ b/quickstarts/08-data-agent-mcp-chainlit/schema.sql @@ -0,0 +1,10 @@ +DROP TABLE IF EXISTS users; + +CREATE TABLE users ( + id SERIAL PRIMARY KEY, + name VARCHAR(100) NOT NULL, + email VARCHAR(255) UNIQUE NOT NULL, + is_customer BOOLEAN DEFAULT FALSE, + churn_reason TEXT, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP +); diff --git a/quickstarts/08-data-agent-mcp-chainlit/users.sql b/quickstarts/08-data-agent-mcp-chainlit/users.sql new file mode 100644 index 0000000..6687bb0 --- /dev/null +++ b/quickstarts/08-data-agent-mcp-chainlit/users.sql @@ -0,0 +1,11 @@ +INSERT INTO users (name, email, is_customer, churn_reason, created_at) VALUES +('Alice Johnson', 'alice@example.com', TRUE, NULL, NOW() - INTERVAL '6 months'), +('Bob Smith', 'bob@example.com', TRUE, NULL, NOW() - INTERVAL '5 months'), +('Carla Ruiz', 'carla@example.com', TRUE, NULL, NOW() - INTERVAL '4 months'), +('David Lee', 'david@example.com', TRUE, NULL, NOW() - INTERVAL '3 months'), +('Emma Chen', 'emma@example.com', TRUE, NULL, NOW() - INTERVAL '2 months'), +('Frank Novak', 'frank@example.com', FALSE, 'Spent 10 minutes trying to find the logout button — it was hidden in a weird place.', NOW() - INTERVAL '6 weeks'), +('Grace Patel', 'grace@example.com', FALSE, 'The dashboard felt cluttered and overwhelming right from the start.', NOW() - INTERVAL '5 weeks'), +('Hassan Ali', 'hassan@example.com', FALSE, 'Couldn’t figure out how to edit my profile — had to Google it.', NOW() - INTERVAL '4 weeks'), +('Isabella Moreau', 'isabella@example.com', FALSE, 'Forms had way too many fields and no clear labels.', NOW() - INTERVAL '3 weeks'), +('Jamal Wright', 'jamal@example.com', FALSE, 'Nothing looked clickable — I was stuck on the home screen for a while.', NOW() - INTERVAL '2 weeks');