From 88d46d46fd6bf7b3a26522114548bf22fa5812fa Mon Sep 17 00:00:00 2001
From: Heidar <74025356+Heidar-An@users.noreply.github.com>
Date: Thu, 14 May 2026 17:32:54 +0100
Subject: [PATCH] fix: omit null delta fields in streaming chat completions
 (issue #2082) (#2092)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## Motivation

Streaming /v1/chat/completions responses emitted null for tool_calls,
function_call, name, and tool_call_id in every delta chunk. The OpenAI
streaming spec marks these fields as non-nullable — they must either
carry a
  real value or be absent entirely. Spec-correct clients doing
delta.get("tool_calls", []) receive None and crash with 'NoneType'
object is
  not iterable.

Root cause: the streaming serialisation path called model_dump_json()
without
exclude_none=True, while the request-parsing path already used it
correctly.
Three call sites in chat_completions.py and two in responses.py were
affected.

## Testing

Before — every delta carries explicit nulls:

  $ curl -sN -X POST http://localhost:52415/v1/chat/completions \
    -H 'Content-Type: application/json' \
-d
'{"model":"mlx-community/Qwen3.5-2B-MLX-8bit","messages":[{"role":"user","
  content":"hi"}],"max_tokens":3,"stream":true}' \
    | grep "^data: "
data:
{"id":"7c4dae10-...","choices":[{"index":0,"delta":{"role":"assistant","c

ontent":null,"reasoning_content":"Okay","name":null,"tool_calls":null,"tool_cal

l_id":null,"function_call":null},"logprobs":null,"finish_reason":null,"usage":n
  ull}],"usage":null,"service_tier":null}
data:
{"id":"7c4dae10-...","choices":[{"index":0,"delta":{"role":"assistant","c

ontent":null,"reasoning_content":",","name":null,"tool_calls":null,"tool_call_i

d":null,"function_call":null},"logprobs":null,"finish_reason":null,"usage":null
  }],"usage":null,"service_tier":null}
data:
{"id":"7c4dae10-...","choices":[{"index":0,"delta":{"role":"assistant","c
ontent":"
the","reasoning_content":null,"name":null,"tool_calls":null,"tool_cal

l_id":null,"function_call":null},"logprobs":null,"finish_reason":"length","usag
  e":{"prompt_tokens":11,...}}],"usage":null,"service_tier":null}
  data: [DONE]

  After — only populated fields are emitted:
data:
{"id":"demo","object":"chat.completion","created":...,"model":"mlx-commun

ity/Qwen3.5-2B-MLX-8bit","choices":[{"index":0,"delta":{"role":"assistant","rea
  soning_content":"Okay"}}]}
data:
{"id":"demo","object":"chat.completion","created":...,"model":"mlx-commun

ity/Qwen3.5-2B-MLX-8bit","choices":[{"index":0,"delta":{"role":"assistant","rea
  soning_content":","}}]}
data:
{"id":"demo","object":"chat.completion","created":...,"model":"mlx-commun

ity/Qwen3.5-2B-MLX-8bit","choices":[{"index":0,"delta":{"role":"assistant","con
tent":"
the"},"finish_reason":"length"}],"usage":{"prompt_tokens":11,"completio
  n_tokens":3,"total_tokens":14,...}}
  data: [DONE]
---
 src/exo/api/adapters/chat_completions.py      |   8 +-
 src/exo/api/adapters/responses.py             |   4 +-
 .../api/tests/test_chat_completions_stream.py | 195 ++++++++++++++++++
 3 files changed, 201 insertions(+), 6 deletions(-)
 create mode 100644 src/exo/api/tests/test_chat_completions_stream.py

diff --git a/src/exo/api/adapters/chat_completions.py b/src/exo/api/adapters/chat_completions.py
index d10cfb618..cbd545318 100644
--- a/src/exo/api/adapters/chat_completions.py
+++ b/src/exo/api/adapters/chat_completions.py
@@ -238,7 +238,7 @@ async def generate_chat_stream(
                         code=500,
                     )
                 )
-                yield f"data: {error_response.model_dump_json()}\n\n"
+                yield f"data: {error_response.model_dump_json(exclude_none=True)}\n\n"
                 yield "data: [DONE]\n\n"
                 return
 
@@ -269,7 +269,7 @@ async def generate_chat_stream(
                     ],
                     usage=last_usage,
                 )
-                yield f"data: {tool_response.model_dump_json()}\n\n"
+                yield f"data: {tool_response.model_dump_json(exclude_none=True)}\n\n"
                 if chunk.stats is not None:
                     yield f": generation_stats {chunk.stats.model_dump_json()}\n\n"
                 yield "data: [DONE]\n\n"
@@ -283,7 +283,7 @@ async def generate_chat_stream(
                     chunk_response = chunk_response.model_copy(
                         update={"usage": last_usage}
                     )
-                yield f"data: {chunk_response.model_dump_json()}\n\n"
+                yield f"data: {chunk_response.model_dump_json(exclude_none=True)}\n\n"
 
                 if chunk.finish_reason is not None:
                     if chunk.stats is not None:
@@ -379,5 +379,5 @@ async def collect_chat_response(
             )
         ],
         usage=last_usage,
-    ).model_dump_json()
+    ).model_dump_json(exclude_none=True)
     return
diff --git a/src/exo/api/adapters/responses.py b/src/exo/api/adapters/responses.py
index d65db32d5..41ceab1ad 100644
--- a/src/exo/api/adapters/responses.py
+++ b/src/exo/api/adapters/responses.py
@@ -101,7 +101,7 @@ def _build_response_usage(usage: Usage) -> ResponseUsage:
 
 def _format_sse(event: ResponsesStreamEvent) -> str:
     """Format a streaming event as an SSE message."""
-    return f"event: {event.type}\ndata: {event.model_dump_json()}\n\n"
+    return f"event: {event.type}\ndata: {event.model_dump_json(exclude_none=True)}\n\n"
 
 
 def _extract_content(content: str | list[ResponseContentPart]) -> str:
@@ -471,7 +471,7 @@ async def collect_responses_response(
         output=output,
         output_text=accumulated_text,
         usage=usage,
-    ).model_dump_json()
+    ).model_dump_json(exclude_none=True)
     return
 
 
diff --git a/src/exo/api/tests/test_chat_completions_stream.py b/src/exo/api/tests/test_chat_completions_stream.py
new file mode 100644
index 000000000..2f718f167
--- /dev/null
+++ b/src/exo/api/tests/test_chat_completions_stream.py
@@ -0,0 +1,195 @@
+# pyright: reportAny=false
+"""Tests asserting OpenAI-spec wire shape for /v1/chat/completions deltas."""
+
+import json
+from collections.abc import AsyncGenerator
+from typing import Any
+
+from exo.api.adapters.chat_completions import (
+    collect_chat_response,
+    generate_chat_stream,
+)
+from exo.api.types import (
+    CompletionTokensDetails,
+    PromptTokensDetails,
+    ToolCallItem,
+    Usage,
+)
+from exo.shared.types.chunks import (
+    ErrorChunk,
+    PrefillProgressChunk,
+    TokenChunk,
+    ToolCallChunk,
+)
+from exo.shared.types.common import CommandId, ModelId
+
+_TEST_MODEL = ModelId("test-model")
+_NULLABLE_DELTA_FIELDS = {"content", "refusal"}
+
+
+def _make_usage(prompt_tokens: int = 1, completion_tokens: int = 1) -> Usage:
+    return Usage(
+        prompt_tokens=prompt_tokens,
+        completion_tokens=completion_tokens,
+        total_tokens=prompt_tokens + completion_tokens,
+        prompt_tokens_details=PromptTokensDetails(),
+        completion_tokens_details=CompletionTokensDetails(),
+    )
+
+
+async def _stream(
+    chunks: list[PrefillProgressChunk | ErrorChunk | ToolCallChunk | TokenChunk],
+) -> AsyncGenerator[
+    PrefillProgressChunk | ErrorChunk | ToolCallChunk | TokenChunk, None
+]:
+    for chunk in chunks:
+        yield chunk
+
+
+def _parse_data_events(lines: list[str]) -> list[dict[str, Any]]:
+    events: list[dict[str, Any]] = []
+    for line in lines:
+        for sub in line.split("\n"):
+            if sub.startswith("data: ") and not sub.endswith("[DONE]"):
+                events.append(json.loads(sub[len("data: ") :]))
+    return events
+
+
+def _assert_delta_spec_compliant(delta: dict[str, Any]) -> None:
+    """Reject any null delta key the OpenAI spec doesn't allow to be null."""
+    for key, value in delta.items():
+        if value is None and key not in _NULLABLE_DELTA_FIELDS:
+            raise AssertionError(
+                f"delta.{key} is null but spec requires it to be absent or a value; "
+                f"full delta={delta!r}"
+            )
+
+
+class TestTokenStreamDeltaShape:
+    async def test_token_chunk_delta_has_no_disallowed_nulls(self):
+        chunks: list[PrefillProgressChunk | ErrorChunk | ToolCallChunk | TokenChunk] = [
+            TokenChunk(
+                model=_TEST_MODEL,
+                token_id=1,
+                text="Hello",
+                usage=None,
+            ),
+            TokenChunk(
+                model=_TEST_MODEL,
+                token_id=2,
+                text=" world",
+                usage=_make_usage(),
+                finish_reason="stop",
+            ),
+        ]
+        lines: list[str] = []
+        async for event in generate_chat_stream(
+            CommandId("test-cmd-token"), _stream(chunks)
+        ):
+            lines.append(event)
+
+        events = _parse_data_events(lines)
+        assert len(events) == 2
+        for event in events:
+            delta = event["choices"][0]["delta"]
+            _assert_delta_spec_compliant(delta)
+            assert "tool_calls" not in delta or isinstance(delta["tool_calls"], list)
+            assert "function_call" not in delta
+            assert "name" not in delta
+            assert "tool_call_id" not in delta
+
+    async def test_thinking_chunk_delta_has_no_disallowed_nulls(self):
+        chunks: list[PrefillProgressChunk | ErrorChunk | ToolCallChunk | TokenChunk] = [
+            TokenChunk(
+                model=_TEST_MODEL,
+                token_id=1,
+                text="Hmm",
+                usage=None,
+                is_thinking=True,
+            ),
+        ]
+        lines: list[str] = []
+        async for event in generate_chat_stream(
+            CommandId("test-cmd-thinking"), _stream(chunks)
+        ):
+            lines.append(event)
+
+        events = _parse_data_events(lines)
+        assert len(events) == 1
+        delta = events[0]["choices"][0]["delta"]
+        _assert_delta_spec_compliant(delta)
+        assert delta.get("reasoning_content") == "Hmm"
+        assert "content" not in delta
+
+
+class TestToolCallStreamDeltaShape:
+    async def test_tool_call_chunk_delta_has_array_tool_calls(self):
+        chunks: list[PrefillProgressChunk | ErrorChunk | ToolCallChunk | TokenChunk] = [
+            ToolCallChunk(
+                model=_TEST_MODEL,
+                tool_calls=[
+                    ToolCallItem(id="call_1", name="get_weather", arguments="{}"),
+                ],
+                usage=_make_usage(),
+            ),
+        ]
+        lines: list[str] = []
+        async for event in generate_chat_stream(
+            CommandId("test-cmd-tool"), _stream(chunks)
+        ):
+            lines.append(event)
+
+        events = _parse_data_events(lines)
+        assert len(events) == 1
+        delta = events[0]["choices"][0]["delta"]
+        _assert_delta_spec_compliant(delta)
+        assert isinstance(delta["tool_calls"], list)
+        assert delta["tool_calls"][0]["function"]["name"] == "get_weather"
+
+
+class TestErrorStreamShape:
+    async def test_error_chunk_response_has_no_nulls(self):
+        chunks: list[PrefillProgressChunk | ErrorChunk | ToolCallChunk | TokenChunk] = [
+            ErrorChunk(model=_TEST_MODEL, error_message="boom"),
+        ]
+        lines: list[str] = []
+        async for event in generate_chat_stream(
+            CommandId("test-cmd-err"), _stream(chunks)
+        ):
+            lines.append(event)
+
+        events = _parse_data_events(lines)
+        assert len(events) == 1
+        assert events[0]["error"]["message"] == "boom"
+        for value in events[0]["error"].values():
+            assert value is not None
+
+
+class TestNonStreamingResponseShape:
+    async def test_collected_response_message_has_no_disallowed_nulls(self):
+        chunks: list[PrefillProgressChunk | ErrorChunk | ToolCallChunk | TokenChunk] = [
+            TokenChunk(
+                model=_TEST_MODEL,
+                token_id=1,
+                text="Hello",
+                usage=_make_usage(),
+                finish_reason="stop",
+            ),
+        ]
+        parts: list[str] = []
+        async for part in collect_chat_response(
+            CommandId("test-cmd-nonstream"), _stream(chunks)
+        ):
+            parts.append(part)
+
+        assert len(parts) == 1
+        payload = json.loads(parts[0])
+        message = payload["choices"][0]["message"]
+        for key, value in message.items():
+            if value is None:
+                assert key in {"content", "refusal", "reasoning_content"}, (
+                    f"non-streaming message.{key} is null but spec disallows it"
+                )
+        assert "function_call" not in message
+        assert "name" not in message
+        assert "tool_call_id" not in message