vllm/examples/online_serving/openai_translation_client.py

# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import asyncio
import json

import httpx
from openai import OpenAI

from vllm.assets.audio import AudioAsset


def sync_openai(audio_path: str, client: OpenAI):
    with open(audio_path, "rb") as f:
        translation = client.audio.translations.create(
            file=f,
            model="openai/whisper-large-v3",
            response_format="json",
            temperature=0.0,
            # Additional params not provided by OpenAI API.
            extra_body=dict(
                language="it",
                seed=4419,
                repetition_penalty=1.3,
            ),
        )
        print("translation result:", translation.text)


async def stream_openai_response(audio_path: str, base_url: str, api_key: str):
    data = {
        "language": "it",
        "stream": True,
        "model": "openai/whisper-large-v3",
    }
    url = base_url + "/audio/translations"
    headers = {"Authorization": f"Bearer {api_key}"}
    print("translation result:", end=" ")
    # OpenAI translation API client does not support streaming.
    async with httpx.AsyncClient() as client:
        with open(audio_path, "rb") as f:
            async with client.stream(
                "POST", url, files={"file": f}, data=data, headers=headers
            ) as response:
                async for line in response.aiter_lines():
                    # Each line is a JSON object prefixed with 'data: '
                    if line:
                        if line.startswith("data: "):
                            line = line[len("data: ") :]
                        # Last chunk, stream ends
                        if line.strip() == "[DONE]":
                            break
                        # Parse the JSON response
                        chunk = json.loads(line)
                        # Extract and print the content
                        content = chunk["choices"][0].get("delta", {}).get("content")
                        print(content, end="")


def main():
    foscolo = str(AudioAsset("azacinto_foscolo").get_local_path())

    # Modify OpenAI's API key and API base to use vLLM's API server.
    openai_api_key = "EMPTY"
    openai_api_base = "http://localhost:8000/v1"
    client = OpenAI(
        api_key=openai_api_key,
        base_url=openai_api_base,
    )
    sync_openai(foscolo, client)
    # Run the asynchronous function
    asyncio.run(stream_openai_response(foscolo, openai_api_base, openai_api_key))


if __name__ == "__main__":
    main()