# ruff: noqa: E501
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from __future__ import annotations
import argparse
import asyncio
import enum
import os
from typing import TYPE_CHECKING, Any, Literal
import openai
import pydantic
if TYPE_CHECKING:
from openai.types.chat import ChatCompletionChunk
ConstraintsFormat = Literal[
"choice",
"regex",
"json",
"grammar",
"structural_tag",
]
async def print_stream_response(
stream_response: openai.AsyncStream[ChatCompletionChunk],
title: str,
args: argparse.Namespace,
):
print(f"\n\n{title} (Streaming):")
local_reasoning_header_printed = False
local_content_header_printed = False
async for chunk in stream_response:
delta = chunk.choices[0].delta
reasoning_chunk_text: str | None = getattr(delta, "reasoning_content", None)
content_chunk_text = delta.content
if args.reasoning:
if reasoning_chunk_text:
if not local_reasoning_header_printed:
print(" Reasoning: ", end="")
local_reasoning_header_printed = True
print(reasoning_chunk_text, end="", flush=True)
if content_chunk_text:
if not local_content_header_printed:
if local_reasoning_header_printed:
print()
print(" Content: ", end="")
local_content_header_printed = True
print(content_chunk_text, end="", flush=True)
else:
if content_chunk_text:
if not local_content_header_printed:
print(" Content: ", end="")
local_content_header_printed = True
print(content_chunk_text, end="", flush=True)
print()
class CarType(str, enum.Enum):
SEDAN = "SEDAN"
SUV = "SUV"
TRUCK = "TRUCK"
COUPE = "COUPE"
class CarDescription(pydantic.BaseModel):
brand: str
model: str
car_type: CarType
PARAMS: dict[ConstraintsFormat, dict[str, Any]] = {
"choice": {
"messages": [
{
"role": "user",
"content": "Classify this sentiment: vLLM is wonderful!",
}
],
"extra_body": {"guided_choice": ["positive", "negative"]},
},
"regex": {
"messages": [
{
"role": "user",
"content": "Generate an email address for Alan Turing, who works in Enigma. End in .com and new line. Example result: 'alan.turing@enigma.com\n'",
}
],
"extra_body": {
"guided_regex": r"[a-z0-9.]{1,20}@\w{6,10}\.com\n",
},
},
"json": {
"messages": [
{
"role": "user",
"content": "Generate a JSON with the brand, model and car_type of the most iconic car from the 90's",
}
],
"response_format": {
"type": "json_schema",
"json_schema": {
"name": "car-description",
"schema": CarDescription.model_json_schema(),
},
},
},
"grammar": {
"messages": [
{
"role": "user",
"content": "Generate an SQL query to show the 'username' and 'email'from the 'users' table.",
}
],
"extra_body": {
"guided_grammar": """
root ::= select_statement
select_statement ::= "SELECT " column " from " table " where " condition
column ::= "col_1 " | "col_2 "
table ::= "table_1 " | "table_2 "
condition ::= column "= " number
number ::= "1 " | "2 "
""",
},
},
"structural_tag": {
"messages": [
{
"role": "user",
"content": """
You have access to the following function to retrieve the weather in a city:
{
"name": "get_weather",
"parameters": {
"city": {
"param_type": "string",
"description": "The city to get the weather for",
"required": True
}
}
}
If a you choose to call a function ONLY reply in the following format:
<{start_tag}={function_name}>{parameters}{end_tag}
where
start_tag => ` a JSON dict with the function argument name as key and function
argument value as value.
end_tag => ``
Here is an example,
{"example_name": "example_value"}
Reminder:
- Function calls MUST follow the specified format
- Required parameters MUST be specified
- Only call one function at a time
- Put the entire function call reply on one line
- Always add your sources when using search results to answer the user query
You are a helpful assistant.
Given the previous instructions, what is the weather in New York City, Boston,
and San Francisco?""",
},
],
"response_format": {
"type": "structural_tag",
"structures": [
{
"begin": "",
"schema": {
"type": "object",
"properties": {"city": {"type": "string"}},
"required": ["city"],
},
"end": "",
}
],
"triggers": ["