# ruff: noqa: E501 # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project from __future__ import annotations import argparse import asyncio import enum import os from typing import TYPE_CHECKING, Any, Literal import openai import pydantic if TYPE_CHECKING: from openai.types.chat import ChatCompletionChunk ConstraintsFormat = Literal[ "choice", "regex", "json", "grammar", "structural_tag", ] async def print_stream_response( stream_response: openai.AsyncStream[ChatCompletionChunk], title: str, args: argparse.Namespace, ): print(f"\n\n{title} (Streaming):") local_reasoning_header_printed = False local_content_header_printed = False async for chunk in stream_response: delta = chunk.choices[0].delta reasoning_chunk_text: str | None = getattr(delta, "reasoning_content", None) content_chunk_text = delta.content if args.reasoning: if reasoning_chunk_text: if not local_reasoning_header_printed: print(" Reasoning: ", end="") local_reasoning_header_printed = True print(reasoning_chunk_text, end="", flush=True) if content_chunk_text: if not local_content_header_printed: if local_reasoning_header_printed: print() print(" Content: ", end="") local_content_header_printed = True print(content_chunk_text, end="", flush=True) else: if content_chunk_text: if not local_content_header_printed: print(" Content: ", end="") local_content_header_printed = True print(content_chunk_text, end="", flush=True) print() class CarType(str, enum.Enum): SEDAN = "SEDAN" SUV = "SUV" TRUCK = "TRUCK" COUPE = "COUPE" class CarDescription(pydantic.BaseModel): brand: str model: str car_type: CarType PARAMS: dict[ConstraintsFormat, dict[str, Any]] = { "choice": { "messages": [ { "role": "user", "content": "Classify this sentiment: vLLM is wonderful!", } ], "extra_body": {"guided_choice": ["positive", "negative"]}, }, "regex": { "messages": [ { "role": "user", "content": "Generate an email address for Alan Turing, who works in Enigma. End in .com and new line. Example result: 'alan.turing@enigma.com\n'", } ], "extra_body": { "guided_regex": r"[a-z0-9.]{1,20}@\w{6,10}\.com\n", }, }, "json": { "messages": [ { "role": "user", "content": "Generate a JSON with the brand, model and car_type of the most iconic car from the 90's", } ], "response_format": { "type": "json_schema", "json_schema": { "name": "car-description", "schema": CarDescription.model_json_schema(), }, }, }, "grammar": { "messages": [ { "role": "user", "content": "Generate an SQL query to show the 'username' and 'email'from the 'users' table.", } ], "extra_body": { "guided_grammar": """ root ::= select_statement select_statement ::= "SELECT " column " from " table " where " condition column ::= "col_1 " | "col_2 " table ::= "table_1 " | "table_2 " condition ::= column "= " number number ::= "1 " | "2 " """, }, }, "structural_tag": { "messages": [ { "role": "user", "content": """ You have access to the following function to retrieve the weather in a city: { "name": "get_weather", "parameters": { "city": { "param_type": "string", "description": "The city to get the weather for", "required": True } } } If a you choose to call a function ONLY reply in the following format: <{start_tag}={function_name}>{parameters}{end_tag} where start_tag => ` a JSON dict with the function argument name as key and function argument value as value. end_tag => `` Here is an example, {"example_name": "example_value"} Reminder: - Function calls MUST follow the specified format - Required parameters MUST be specified - Only call one function at a time - Put the entire function call reply on one line - Always add your sources when using search results to answer the user query You are a helpful assistant. Given the previous instructions, what is the weather in New York City, Boston, and San Francisco?""", }, ], "response_format": { "type": "structural_tag", "structures": [ { "begin": "", "schema": { "type": "object", "properties": {"city": {"type": "string"}}, "required": ["city"], }, "end": "", } ], "triggers": ["