From 88d46d46fd6bf7b3a26522114548bf22fa5812fa Mon Sep 17 00:00:00 2001 From: Heidar <74025356+Heidar-An@users.noreply.github.com> Date: Thu, 14 May 2026 17:32:54 +0100 Subject: [PATCH] fix: omit null delta fields in streaming chat completions (issue #2082) (#2092) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Motivation Streaming /v1/chat/completions responses emitted null for tool_calls, function_call, name, and tool_call_id in every delta chunk. The OpenAI streaming spec marks these fields as non-nullable — they must either carry a real value or be absent entirely. Spec-correct clients doing delta.get("tool_calls", []) receive None and crash with 'NoneType' object is not iterable. Root cause: the streaming serialisation path called model_dump_json() without exclude_none=True, while the request-parsing path already used it correctly. Three call sites in chat_completions.py and two in responses.py were affected. ## Testing Before — every delta carries explicit nulls: $ curl -sN -X POST http://localhost:52415/v1/chat/completions \ -H 'Content-Type: application/json' \ -d '{"model":"mlx-community/Qwen3.5-2B-MLX-8bit","messages":[{"role":"user"," content":"hi"}],"max_tokens":3,"stream":true}' \ | grep "^data: " data: {"id":"7c4dae10-...","choices":[{"index":0,"delta":{"role":"assistant","c ontent":null,"reasoning_content":"Okay","name":null,"tool_calls":null,"tool_cal l_id":null,"function_call":null},"logprobs":null,"finish_reason":null,"usage":n ull}],"usage":null,"service_tier":null} data: {"id":"7c4dae10-...","choices":[{"index":0,"delta":{"role":"assistant","c ontent":null,"reasoning_content":",","name":null,"tool_calls":null,"tool_call_i d":null,"function_call":null},"logprobs":null,"finish_reason":null,"usage":null }],"usage":null,"service_tier":null} data: {"id":"7c4dae10-...","choices":[{"index":0,"delta":{"role":"assistant","c ontent":" the","reasoning_content":null,"name":null,"tool_calls":null,"tool_cal l_id":null,"function_call":null},"logprobs":null,"finish_reason":"length","usag e":{"prompt_tokens":11,...}}],"usage":null,"service_tier":null} data: [DONE] After — only populated fields are emitted: data: {"id":"demo","object":"chat.completion","created":...,"model":"mlx-commun ity/Qwen3.5-2B-MLX-8bit","choices":[{"index":0,"delta":{"role":"assistant","rea soning_content":"Okay"}}]} data: {"id":"demo","object":"chat.completion","created":...,"model":"mlx-commun ity/Qwen3.5-2B-MLX-8bit","choices":[{"index":0,"delta":{"role":"assistant","rea soning_content":","}}]} data: {"id":"demo","object":"chat.completion","created":...,"model":"mlx-commun ity/Qwen3.5-2B-MLX-8bit","choices":[{"index":0,"delta":{"role":"assistant","con tent":" the"},"finish_reason":"length"}],"usage":{"prompt_tokens":11,"completio n_tokens":3,"total_tokens":14,...}} data: [DONE] --- src/exo/api/adapters/chat_completions.py | 8 +- src/exo/api/adapters/responses.py | 4 +- .../api/tests/test_chat_completions_stream.py | 195 ++++++++++++++++++ 3 files changed, 201 insertions(+), 6 deletions(-) create mode 100644 src/exo/api/tests/test_chat_completions_stream.py diff --git a/src/exo/api/adapters/chat_completions.py b/src/exo/api/adapters/chat_completions.py index d10cfb618..cbd545318 100644 --- a/src/exo/api/adapters/chat_completions.py +++ b/src/exo/api/adapters/chat_completions.py @@ -238,7 +238,7 @@ async def generate_chat_stream( code=500, ) ) - yield f"data: {error_response.model_dump_json()}\n\n" + yield f"data: {error_response.model_dump_json(exclude_none=True)}\n\n" yield "data: [DONE]\n\n" return @@ -269,7 +269,7 @@ async def generate_chat_stream( ], usage=last_usage, ) - yield f"data: {tool_response.model_dump_json()}\n\n" + yield f"data: {tool_response.model_dump_json(exclude_none=True)}\n\n" if chunk.stats is not None: yield f": generation_stats {chunk.stats.model_dump_json()}\n\n" yield "data: [DONE]\n\n" @@ -283,7 +283,7 @@ async def generate_chat_stream( chunk_response = chunk_response.model_copy( update={"usage": last_usage} ) - yield f"data: {chunk_response.model_dump_json()}\n\n" + yield f"data: {chunk_response.model_dump_json(exclude_none=True)}\n\n" if chunk.finish_reason is not None: if chunk.stats is not None: @@ -379,5 +379,5 @@ async def collect_chat_response( ) ], usage=last_usage, - ).model_dump_json() + ).model_dump_json(exclude_none=True) return diff --git a/src/exo/api/adapters/responses.py b/src/exo/api/adapters/responses.py index d65db32d5..41ceab1ad 100644 --- a/src/exo/api/adapters/responses.py +++ b/src/exo/api/adapters/responses.py @@ -101,7 +101,7 @@ def _build_response_usage(usage: Usage) -> ResponseUsage: def _format_sse(event: ResponsesStreamEvent) -> str: """Format a streaming event as an SSE message.""" - return f"event: {event.type}\ndata: {event.model_dump_json()}\n\n" + return f"event: {event.type}\ndata: {event.model_dump_json(exclude_none=True)}\n\n" def _extract_content(content: str | list[ResponseContentPart]) -> str: @@ -471,7 +471,7 @@ async def collect_responses_response( output=output, output_text=accumulated_text, usage=usage, - ).model_dump_json() + ).model_dump_json(exclude_none=True) return diff --git a/src/exo/api/tests/test_chat_completions_stream.py b/src/exo/api/tests/test_chat_completions_stream.py new file mode 100644 index 000000000..2f718f167 --- /dev/null +++ b/src/exo/api/tests/test_chat_completions_stream.py @@ -0,0 +1,195 @@ +# pyright: reportAny=false +"""Tests asserting OpenAI-spec wire shape for /v1/chat/completions deltas.""" + +import json +from collections.abc import AsyncGenerator +from typing import Any + +from exo.api.adapters.chat_completions import ( + collect_chat_response, + generate_chat_stream, +) +from exo.api.types import ( + CompletionTokensDetails, + PromptTokensDetails, + ToolCallItem, + Usage, +) +from exo.shared.types.chunks import ( + ErrorChunk, + PrefillProgressChunk, + TokenChunk, + ToolCallChunk, +) +from exo.shared.types.common import CommandId, ModelId + +_TEST_MODEL = ModelId("test-model") +_NULLABLE_DELTA_FIELDS = {"content", "refusal"} + + +def _make_usage(prompt_tokens: int = 1, completion_tokens: int = 1) -> Usage: + return Usage( + prompt_tokens=prompt_tokens, + completion_tokens=completion_tokens, + total_tokens=prompt_tokens + completion_tokens, + prompt_tokens_details=PromptTokensDetails(), + completion_tokens_details=CompletionTokensDetails(), + ) + + +async def _stream( + chunks: list[PrefillProgressChunk | ErrorChunk | ToolCallChunk | TokenChunk], +) -> AsyncGenerator[ + PrefillProgressChunk | ErrorChunk | ToolCallChunk | TokenChunk, None +]: + for chunk in chunks: + yield chunk + + +def _parse_data_events(lines: list[str]) -> list[dict[str, Any]]: + events: list[dict[str, Any]] = [] + for line in lines: + for sub in line.split("\n"): + if sub.startswith("data: ") and not sub.endswith("[DONE]"): + events.append(json.loads(sub[len("data: ") :])) + return events + + +def _assert_delta_spec_compliant(delta: dict[str, Any]) -> None: + """Reject any null delta key the OpenAI spec doesn't allow to be null.""" + for key, value in delta.items(): + if value is None and key not in _NULLABLE_DELTA_FIELDS: + raise AssertionError( + f"delta.{key} is null but spec requires it to be absent or a value; " + f"full delta={delta!r}" + ) + + +class TestTokenStreamDeltaShape: + async def test_token_chunk_delta_has_no_disallowed_nulls(self): + chunks: list[PrefillProgressChunk | ErrorChunk | ToolCallChunk | TokenChunk] = [ + TokenChunk( + model=_TEST_MODEL, + token_id=1, + text="Hello", + usage=None, + ), + TokenChunk( + model=_TEST_MODEL, + token_id=2, + text=" world", + usage=_make_usage(), + finish_reason="stop", + ), + ] + lines: list[str] = [] + async for event in generate_chat_stream( + CommandId("test-cmd-token"), _stream(chunks) + ): + lines.append(event) + + events = _parse_data_events(lines) + assert len(events) == 2 + for event in events: + delta = event["choices"][0]["delta"] + _assert_delta_spec_compliant(delta) + assert "tool_calls" not in delta or isinstance(delta["tool_calls"], list) + assert "function_call" not in delta + assert "name" not in delta + assert "tool_call_id" not in delta + + async def test_thinking_chunk_delta_has_no_disallowed_nulls(self): + chunks: list[PrefillProgressChunk | ErrorChunk | ToolCallChunk | TokenChunk] = [ + TokenChunk( + model=_TEST_MODEL, + token_id=1, + text="Hmm", + usage=None, + is_thinking=True, + ), + ] + lines: list[str] = [] + async for event in generate_chat_stream( + CommandId("test-cmd-thinking"), _stream(chunks) + ): + lines.append(event) + + events = _parse_data_events(lines) + assert len(events) == 1 + delta = events[0]["choices"][0]["delta"] + _assert_delta_spec_compliant(delta) + assert delta.get("reasoning_content") == "Hmm" + assert "content" not in delta + + +class TestToolCallStreamDeltaShape: + async def test_tool_call_chunk_delta_has_array_tool_calls(self): + chunks: list[PrefillProgressChunk | ErrorChunk | ToolCallChunk | TokenChunk] = [ + ToolCallChunk( + model=_TEST_MODEL, + tool_calls=[ + ToolCallItem(id="call_1", name="get_weather", arguments="{}"), + ], + usage=_make_usage(), + ), + ] + lines: list[str] = [] + async for event in generate_chat_stream( + CommandId("test-cmd-tool"), _stream(chunks) + ): + lines.append(event) + + events = _parse_data_events(lines) + assert len(events) == 1 + delta = events[0]["choices"][0]["delta"] + _assert_delta_spec_compliant(delta) + assert isinstance(delta["tool_calls"], list) + assert delta["tool_calls"][0]["function"]["name"] == "get_weather" + + +class TestErrorStreamShape: + async def test_error_chunk_response_has_no_nulls(self): + chunks: list[PrefillProgressChunk | ErrorChunk | ToolCallChunk | TokenChunk] = [ + ErrorChunk(model=_TEST_MODEL, error_message="boom"), + ] + lines: list[str] = [] + async for event in generate_chat_stream( + CommandId("test-cmd-err"), _stream(chunks) + ): + lines.append(event) + + events = _parse_data_events(lines) + assert len(events) == 1 + assert events[0]["error"]["message"] == "boom" + for value in events[0]["error"].values(): + assert value is not None + + +class TestNonStreamingResponseShape: + async def test_collected_response_message_has_no_disallowed_nulls(self): + chunks: list[PrefillProgressChunk | ErrorChunk | ToolCallChunk | TokenChunk] = [ + TokenChunk( + model=_TEST_MODEL, + token_id=1, + text="Hello", + usage=_make_usage(), + finish_reason="stop", + ), + ] + parts: list[str] = [] + async for part in collect_chat_response( + CommandId("test-cmd-nonstream"), _stream(chunks) + ): + parts.append(part) + + assert len(parts) == 1 + payload = json.loads(parts[0]) + message = payload["choices"][0]["message"] + for key, value in message.items(): + if value is None: + assert key in {"content", "refusal", "reasoning_content"}, ( + f"non-streaming message.{key} is null but spec disallows it" + ) + assert "function_call" not in message + assert "name" not in message + assert "tool_call_id" not in message