diff --git a/benchmarks/backend_request_func.py b/benchmarks/backend_request_func.py index ddb38e304c..c7229dbb8e 100644 --- a/benchmarks/backend_request_func.py +++ b/benchmarks/backend_request_func.py @@ -404,8 +404,14 @@ async def async_request_openai_chat_completions( chunk_bytes = chunk_bytes.strip() if not chunk_bytes: continue + chunk_bytes = chunk_bytes.decode("utf-8") + # NOTE: SSE comments (often used as pings) start with a colon. + # These are not JSON data payload and should be skipped. + if chunk_bytes.startswith(":"): + continue + + chunk = chunk_bytes.removeprefix("data: ") - chunk = chunk_bytes.decode("utf-8").removeprefix("data: ") if chunk != "[DONE]": timestamp = time.perf_counter() data = json.loads(chunk) diff --git a/vllm/benchmarks/endpoint_request_func.py b/vllm/benchmarks/endpoint_request_func.py index aba60edc58..60ae520db3 100644 --- a/vllm/benchmarks/endpoint_request_func.py +++ b/vllm/benchmarks/endpoint_request_func.py @@ -104,9 +104,15 @@ async def async_request_openai_completions( chunk_bytes = chunk_bytes.strip() if not chunk_bytes: continue + chunk_bytes = chunk_bytes.decode("utf-8") + # NOTE: SSE comments (often used as pings) start with + # a colon. These are not JSON data payload and should + # be skipped. + if chunk_bytes.startswith(":"): + continue + + chunk = chunk_bytes.removeprefix("data: ") - chunk = chunk_bytes.decode("utf-8").removeprefix( - "data: ") if chunk != "[DONE]": data = json.loads(chunk) @@ -213,9 +219,15 @@ async def async_request_openai_chat_completions( chunk_bytes = chunk_bytes.strip() if not chunk_bytes: continue + chunk_bytes = chunk_bytes.decode("utf-8") + # NOTE: SSE comments (often used as pings) start with + # a colon. These are not JSON data payload and should + # be skipped. + if chunk_bytes.startswith(":"): + continue + + chunk = chunk_bytes.removeprefix("data: ") - chunk = chunk_bytes.decode("utf-8").removeprefix( - "data: ") if chunk != "[DONE]": timestamp = time.perf_counter() data = json.loads(chunk)