From 202c5df9357e7c52b51e19abc70e8444f3f85ada Mon Sep 17 00:00:00 2001 From: "Wang, Yi" Date: Sun, 22 Jun 2025 15:21:04 +0800 Subject: [PATCH] [Benchmark] fix request loss if "ping" is returned (#19535) Signed-off-by: Wang, Yi A Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- benchmarks/backend_request_func.py | 8 +++++++- vllm/benchmarks/endpoint_request_func.py | 20 ++++++++++++++++---- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/benchmarks/backend_request_func.py b/benchmarks/backend_request_func.py index ddb38e304c..c7229dbb8e 100644 --- a/benchmarks/backend_request_func.py +++ b/benchmarks/backend_request_func.py @@ -404,8 +404,14 @@ async def async_request_openai_chat_completions( chunk_bytes = chunk_bytes.strip() if not chunk_bytes: continue + chunk_bytes = chunk_bytes.decode("utf-8") + # NOTE: SSE comments (often used as pings) start with a colon. + # These are not JSON data payload and should be skipped. + if chunk_bytes.startswith(":"): + continue + + chunk = chunk_bytes.removeprefix("data: ") - chunk = chunk_bytes.decode("utf-8").removeprefix("data: ") if chunk != "[DONE]": timestamp = time.perf_counter() data = json.loads(chunk) diff --git a/vllm/benchmarks/endpoint_request_func.py b/vllm/benchmarks/endpoint_request_func.py index aba60edc58..60ae520db3 100644 --- a/vllm/benchmarks/endpoint_request_func.py +++ b/vllm/benchmarks/endpoint_request_func.py @@ -104,9 +104,15 @@ async def async_request_openai_completions( chunk_bytes = chunk_bytes.strip() if not chunk_bytes: continue + chunk_bytes = chunk_bytes.decode("utf-8") + # NOTE: SSE comments (often used as pings) start with + # a colon. These are not JSON data payload and should + # be skipped. + if chunk_bytes.startswith(":"): + continue + + chunk = chunk_bytes.removeprefix("data: ") - chunk = chunk_bytes.decode("utf-8").removeprefix( - "data: ") if chunk != "[DONE]": data = json.loads(chunk) @@ -213,9 +219,15 @@ async def async_request_openai_chat_completions( chunk_bytes = chunk_bytes.strip() if not chunk_bytes: continue + chunk_bytes = chunk_bytes.decode("utf-8") + # NOTE: SSE comments (often used as pings) start with + # a colon. These are not JSON data payload and should + # be skipped. + if chunk_bytes.startswith(":"): + continue + + chunk = chunk_bytes.removeprefix("data: ") - chunk = chunk_bytes.decode("utf-8").removeprefix( - "data: ") if chunk != "[DONE]": timestamp = time.perf_counter() data = json.loads(chunk)