fix: omit null delta fields in streaming chat completions (issue #2082) (#2092)

## Motivation Streaming /v1/chat/completions responses emitted null for tool_calls, function_call, name, and tool_call_id in every delta chunk. The OpenAI streaming spec marks these fields as non-nullable — they must either carry a real value or be absent entirely. Spec-correct clients doing delta.get("tool_calls", []) receive None and crash with 'NoneType' object is not iterable. Root cause: the streaming serialisation path called model_dump_json() without exclude_none=True, while the request-parsing path already used it correctly. Three call sites in chat_completions.py and two in responses.py were affected. ## Testing Before — every delta carries explicit nulls: $ curl -sN -X POST http://localhost:52415/v1/chat/completions \ -H 'Content-Type: application/json' \ -d '{"model":"mlx-community/Qwen3.5-2B-MLX-8bit","messages":[{"role":"user"," content":"hi"}],"max_tokens":3,"stream":true}' \ | grep "^data: " data: {"id":"7c4dae10-...","choices":[{"index":0,"delta":{"role":"assistant","c ontent":null,"reasoning_content":"Okay","name":null,"tool_calls":null,"tool_cal l_id":null,"function_call":null},"logprobs":null,"finish_reason":null,"usage":n ull}],"usage":null,"service_tier":null} data: {"id":"7c4dae10-...","choices":[{"index":0,"delta":{"role":"assistant","c ontent":null,"reasoning_content":",","name":null,"tool_calls":null,"tool_call_i d":null,"function_call":null},"logprobs":null,"finish_reason":null,"usage":null }],"usage":null,"service_tier":null} data: {"id":"7c4dae10-...","choices":[{"index":0,"delta":{"role":"assistant","c ontent":" the","reasoning_content":null,"name":null,"tool_calls":null,"tool_cal l_id":null,"function_call":null},"logprobs":null,"finish_reason":"length","usag e":{"prompt_tokens":11,...}}],"usage":null,"service_tier":null} data: [DONE] After — only populated fields are emitted: data: {"id":"demo","object":"chat.completion","created":...,"model":"mlx-commun ity/Qwen3.5-2B-MLX-8bit","choices":[{"index":0,"delta":{"role":"assistant","rea soning_content":"Okay"}}]} data: {"id":"demo","object":"chat.completion","created":...,"model":"mlx-commun ity/Qwen3.5-2B-MLX-8bit","choices":[{"index":0,"delta":{"role":"assistant","rea soning_content":","}}]} data: {"id":"demo","object":"chat.completion","created":...,"model":"mlx-commun ity/Qwen3.5-2B-MLX-8bit","choices":[{"index":0,"delta":{"role":"assistant","con tent":" the"},"finish_reason":"length"}],"usage":{"prompt_tokens":11,"completio n_tokens":3,"total_tokens":14,...}} data: [DONE]
2026-05-19 04:05:23 -04:00 · 2026-05-14 17:32:54 +01:00
parent e8ec8d5010
commit 88d46d46fd
3 changed files with 201 additions and 6 deletions
--- a/src/exo/api/adapters/chat_completions.py
+++ b/src/exo/api/adapters/chat_completions.py
@@ -238,7 +238,7 @@ async def generate_chat_stream(
                        code=500,
                    )
                )
-                yield f"data: {error_response.model_dump_json()}\n\n"
+                yield f"data: {error_response.model_dump_json(exclude_none=True)}\n\n"
                yield "data: [DONE]\n\n"
                return

@@ -269,7 +269,7 @@ async def generate_chat_stream(
                    ],
                    usage=last_usage,
                )
-                yield f"data: {tool_response.model_dump_json()}\n\n"
+                yield f"data: {tool_response.model_dump_json(exclude_none=True)}\n\n"
                if chunk.stats is not None:
                    yield f": generation_stats {chunk.stats.model_dump_json()}\n\n"
                yield "data: [DONE]\n\n"
@@ -283,7 +283,7 @@ async def generate_chat_stream(
                    chunk_response = chunk_response.model_copy(
                        update={"usage": last_usage}
                    )
-                yield f"data: {chunk_response.model_dump_json()}\n\n"
+                yield f"data: {chunk_response.model_dump_json(exclude_none=True)}\n\n"

                if chunk.finish_reason is not None:
                    if chunk.stats is not None:
@@ -379,5 +379,5 @@ async def collect_chat_response(
            )
        ],
        usage=last_usage,
-    ).model_dump_json()
+    ).model_dump_json(exclude_none=True)
    return
--- a/src/exo/api/adapters/responses.py
+++ b/src/exo/api/adapters/responses.py
@@ -101,7 +101,7 @@ def _build_response_usage(usage: Usage) -> ResponseUsage:

 def _format_sse(event: ResponsesStreamEvent) -> str:
    """Format a streaming event as an SSE message."""
-    return f"event: {event.type}\ndata: {event.model_dump_json()}\n\n"
+    return f"event: {event.type}\ndata: {event.model_dump_json(exclude_none=True)}\n\n"


 def _extract_content(content: str | list[ResponseContentPart]) -> str:
@@ -471,7 +471,7 @@ async def collect_responses_response(
        output=output,
        output_text=accumulated_text,
        usage=usage,
-    ).model_dump_json()
+    ).model_dump_json(exclude_none=True)
    return


--- a/src/exo/api/tests/test_chat_completions_stream.py
+++ b/src/exo/api/tests/test_chat_completions_stream.py
@@ -0,0 +1,195 @@
+# pyright: reportAny=false
+"""Tests asserting OpenAI-spec wire shape for /v1/chat/completions deltas."""
+
+import json
+from collections.abc import AsyncGenerator
+from typing import Any
+
+from exo.api.adapters.chat_completions import (
+    collect_chat_response,
+    generate_chat_stream,
+)
+from exo.api.types import (
+    CompletionTokensDetails,
+    PromptTokensDetails,
+    ToolCallItem,
+    Usage,
+)
+from exo.shared.types.chunks import (
+    ErrorChunk,
+    PrefillProgressChunk,
+    TokenChunk,
+    ToolCallChunk,
+)
+from exo.shared.types.common import CommandId, ModelId
+
+_TEST_MODEL = ModelId("test-model")
+_NULLABLE_DELTA_FIELDS = {"content", "refusal"}
+
+
+def _make_usage(prompt_tokens: int = 1, completion_tokens: int = 1) -> Usage:
+    return Usage(
+        prompt_tokens=prompt_tokens,
+        completion_tokens=completion_tokens,
+        total_tokens=prompt_tokens + completion_tokens,
+        prompt_tokens_details=PromptTokensDetails(),
+        completion_tokens_details=CompletionTokensDetails(),
+    )
+
+
+async def _stream(
+    chunks: list[PrefillProgressChunk | ErrorChunk | ToolCallChunk | TokenChunk],
+) -> AsyncGenerator[
+    PrefillProgressChunk | ErrorChunk | ToolCallChunk | TokenChunk, None
+]:
+    for chunk in chunks:
+        yield chunk
+
+
+def _parse_data_events(lines: list[str]) -> list[dict[str, Any]]:
+    events: list[dict[str, Any]] = []
+    for line in lines:
+        for sub in line.split("\n"):
+            if sub.startswith("data: ") and not sub.endswith("[DONE]"):
+                events.append(json.loads(sub[len("data: ") :]))
+    return events
+
+
+def _assert_delta_spec_compliant(delta: dict[str, Any]) -> None:
+    """Reject any null delta key the OpenAI spec doesn't allow to be null."""
+    for key, value in delta.items():
+        if value is None and key not in _NULLABLE_DELTA_FIELDS:
+            raise AssertionError(
+                f"delta.{key} is null but spec requires it to be absent or a value; "
+                f"full delta={delta!r}"
+            )
+
+
+class TestTokenStreamDeltaShape:
+    async def test_token_chunk_delta_has_no_disallowed_nulls(self):
+        chunks: list[PrefillProgressChunk | ErrorChunk | ToolCallChunk | TokenChunk] = [
+            TokenChunk(
+                model=_TEST_MODEL,
+                token_id=1,
+                text="Hello",
+                usage=None,
+            ),
+            TokenChunk(
+                model=_TEST_MODEL,
+                token_id=2,
+                text=" world",
+                usage=_make_usage(),
+                finish_reason="stop",
+            ),
+        ]
+        lines: list[str] = []
+        async for event in generate_chat_stream(
+            CommandId("test-cmd-token"), _stream(chunks)
+        ):
+            lines.append(event)
+
+        events = _parse_data_events(lines)
+        assert len(events) == 2
+        for event in events:
+            delta = event["choices"][0]["delta"]
+            _assert_delta_spec_compliant(delta)
+            assert "tool_calls" not in delta or isinstance(delta["tool_calls"], list)
+            assert "function_call" not in delta
+            assert "name" not in delta
+            assert "tool_call_id" not in delta
+
+    async def test_thinking_chunk_delta_has_no_disallowed_nulls(self):
+        chunks: list[PrefillProgressChunk | ErrorChunk | ToolCallChunk | TokenChunk] = [
+            TokenChunk(
+                model=_TEST_MODEL,
+                token_id=1,
+                text="Hmm",
+                usage=None,
+                is_thinking=True,
+            ),
+        ]
+        lines: list[str] = []
+        async for event in generate_chat_stream(
+            CommandId("test-cmd-thinking"), _stream(chunks)
+        ):
+            lines.append(event)
+
+        events = _parse_data_events(lines)
+        assert len(events) == 1
+        delta = events[0]["choices"][0]["delta"]
+        _assert_delta_spec_compliant(delta)
+        assert delta.get("reasoning_content") == "Hmm"
+        assert "content" not in delta
+
+
+class TestToolCallStreamDeltaShape:
+    async def test_tool_call_chunk_delta_has_array_tool_calls(self):
+        chunks: list[PrefillProgressChunk | ErrorChunk | ToolCallChunk | TokenChunk] = [
+            ToolCallChunk(
+                model=_TEST_MODEL,
+                tool_calls=[
+                    ToolCallItem(id="call_1", name="get_weather", arguments="{}"),
+                ],
+                usage=_make_usage(),
+            ),
+        ]
+        lines: list[str] = []
+        async for event in generate_chat_stream(
+            CommandId("test-cmd-tool"), _stream(chunks)
+        ):
+            lines.append(event)
+
+        events = _parse_data_events(lines)
+        assert len(events) == 1
+        delta = events[0]["choices"][0]["delta"]
+        _assert_delta_spec_compliant(delta)
+        assert isinstance(delta["tool_calls"], list)
+        assert delta["tool_calls"][0]["function"]["name"] == "get_weather"
+
+
+class TestErrorStreamShape:
+    async def test_error_chunk_response_has_no_nulls(self):
+        chunks: list[PrefillProgressChunk | ErrorChunk | ToolCallChunk | TokenChunk] = [
+            ErrorChunk(model=_TEST_MODEL, error_message="boom"),
+        ]
+        lines: list[str] = []
+        async for event in generate_chat_stream(
+            CommandId("test-cmd-err"), _stream(chunks)
+        ):
+            lines.append(event)
+
+        events = _parse_data_events(lines)
+        assert len(events) == 1
+        assert events[0]["error"]["message"] == "boom"
+        for value in events[0]["error"].values():
+            assert value is not None
+
+
+class TestNonStreamingResponseShape:
+    async def test_collected_response_message_has_no_disallowed_nulls(self):
+        chunks: list[PrefillProgressChunk | ErrorChunk | ToolCallChunk | TokenChunk] = [
+            TokenChunk(
+                model=_TEST_MODEL,
+                token_id=1,
+                text="Hello",
+                usage=_make_usage(),
+                finish_reason="stop",
+            ),
+        ]
+        parts: list[str] = []
+        async for part in collect_chat_response(
+            CommandId("test-cmd-nonstream"), _stream(chunks)
+        ):
+            parts.append(part)
+
+        assert len(parts) == 1
+        payload = json.loads(parts[0])
+        message = payload["choices"][0]["message"]
+        for key, value in message.items():
+            if value is None:
+                assert key in {"content", "refusal", "reasoning_content"}, (
+                    f"non-streaming message.{key} is null but spec disallows it"
+                )
+        assert "function_call" not in message
+        assert "name" not in message
+        assert "tool_call_id" not in message