mirror of https://github.com/vllm-project/vllm.git
[Benchmark] fix request loss if "ping" is returned (#19535)
Signed-off-by: Wang, Yi A <yi.a.wang@intel.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
This commit is contained in:
parent
2bb246b8f7
commit
202c5df935
|
@ -404,8 +404,14 @@ async def async_request_openai_chat_completions(
|
||||||
chunk_bytes = chunk_bytes.strip()
|
chunk_bytes = chunk_bytes.strip()
|
||||||
if not chunk_bytes:
|
if not chunk_bytes:
|
||||||
continue
|
continue
|
||||||
|
chunk_bytes = chunk_bytes.decode("utf-8")
|
||||||
|
# NOTE: SSE comments (often used as pings) start with a colon.
|
||||||
|
# These are not JSON data payload and should be skipped.
|
||||||
|
if chunk_bytes.startswith(":"):
|
||||||
|
continue
|
||||||
|
|
||||||
|
chunk = chunk_bytes.removeprefix("data: ")
|
||||||
|
|
||||||
chunk = chunk_bytes.decode("utf-8").removeprefix("data: ")
|
|
||||||
if chunk != "[DONE]":
|
if chunk != "[DONE]":
|
||||||
timestamp = time.perf_counter()
|
timestamp = time.perf_counter()
|
||||||
data = json.loads(chunk)
|
data = json.loads(chunk)
|
||||||
|
|
|
@ -104,9 +104,15 @@ async def async_request_openai_completions(
|
||||||
chunk_bytes = chunk_bytes.strip()
|
chunk_bytes = chunk_bytes.strip()
|
||||||
if not chunk_bytes:
|
if not chunk_bytes:
|
||||||
continue
|
continue
|
||||||
|
chunk_bytes = chunk_bytes.decode("utf-8")
|
||||||
|
# NOTE: SSE comments (often used as pings) start with
|
||||||
|
# a colon. These are not JSON data payload and should
|
||||||
|
# be skipped.
|
||||||
|
if chunk_bytes.startswith(":"):
|
||||||
|
continue
|
||||||
|
|
||||||
|
chunk = chunk_bytes.removeprefix("data: ")
|
||||||
|
|
||||||
chunk = chunk_bytes.decode("utf-8").removeprefix(
|
|
||||||
"data: ")
|
|
||||||
if chunk != "[DONE]":
|
if chunk != "[DONE]":
|
||||||
data = json.loads(chunk)
|
data = json.loads(chunk)
|
||||||
|
|
||||||
|
@ -213,9 +219,15 @@ async def async_request_openai_chat_completions(
|
||||||
chunk_bytes = chunk_bytes.strip()
|
chunk_bytes = chunk_bytes.strip()
|
||||||
if not chunk_bytes:
|
if not chunk_bytes:
|
||||||
continue
|
continue
|
||||||
|
chunk_bytes = chunk_bytes.decode("utf-8")
|
||||||
|
# NOTE: SSE comments (often used as pings) start with
|
||||||
|
# a colon. These are not JSON data payload and should
|
||||||
|
# be skipped.
|
||||||
|
if chunk_bytes.startswith(":"):
|
||||||
|
continue
|
||||||
|
|
||||||
|
chunk = chunk_bytes.removeprefix("data: ")
|
||||||
|
|
||||||
chunk = chunk_bytes.decode("utf-8").removeprefix(
|
|
||||||
"data: ")
|
|
||||||
if chunk != "[DONE]":
|
if chunk != "[DONE]":
|
||||||
timestamp = time.perf_counter()
|
timestamp = time.perf_counter()
|
||||||
data = json.loads(chunk)
|
data = json.loads(chunk)
|
||||||
|
|
Loading…
Reference in New Issue