fix: omit null delta fields in streaming chat completions (issue #2082) (#2092)

## Motivation

Streaming /v1/chat/completions responses emitted null for tool_calls,
function_call, name, and tool_call_id in every delta chunk. The OpenAI
streaming spec marks these fields as non-nullable — they must either
carry a
  real value or be absent entirely. Spec-correct clients doing
delta.get("tool_calls", []) receive None and crash with 'NoneType'
object is
  not iterable.

Root cause: the streaming serialisation path called model_dump_json()
without
exclude_none=True, while the request-parsing path already used it
correctly.
Three call sites in chat_completions.py and two in responses.py were
affected.

## Testing

Before — every delta carries explicit nulls:

  $ curl -sN -X POST http://localhost:52415/v1/chat/completions \
    -H 'Content-Type: application/json' \
-d
'{"model":"mlx-community/Qwen3.5-2B-MLX-8bit","messages":[{"role":"user","
  content":"hi"}],"max_tokens":3,"stream":true}' \
    | grep "^data: "
data:
{"id":"7c4dae10-...","choices":[{"index":0,"delta":{"role":"assistant","c

ontent":null,"reasoning_content":"Okay","name":null,"tool_calls":null,"tool_cal

l_id":null,"function_call":null},"logprobs":null,"finish_reason":null,"usage":n
  ull}],"usage":null,"service_tier":null}
data:
{"id":"7c4dae10-...","choices":[{"index":0,"delta":{"role":"assistant","c

ontent":null,"reasoning_content":",","name":null,"tool_calls":null,"tool_call_i

d":null,"function_call":null},"logprobs":null,"finish_reason":null,"usage":null
  }],"usage":null,"service_tier":null}
data:
{"id":"7c4dae10-...","choices":[{"index":0,"delta":{"role":"assistant","c
ontent":"
the","reasoning_content":null,"name":null,"tool_calls":null,"tool_cal

l_id":null,"function_call":null},"logprobs":null,"finish_reason":"length","usag
  e":{"prompt_tokens":11,...}}],"usage":null,"service_tier":null}
  data: [DONE]

  After — only populated fields are emitted:
data:
{"id":"demo","object":"chat.completion","created":...,"model":"mlx-commun

ity/Qwen3.5-2B-MLX-8bit","choices":[{"index":0,"delta":{"role":"assistant","rea
  soning_content":"Okay"}}]}
data:
{"id":"demo","object":"chat.completion","created":...,"model":"mlx-commun

ity/Qwen3.5-2B-MLX-8bit","choices":[{"index":0,"delta":{"role":"assistant","rea
  soning_content":","}}]}
data:
{"id":"demo","object":"chat.completion","created":...,"model":"mlx-commun

ity/Qwen3.5-2B-MLX-8bit","choices":[{"index":0,"delta":{"role":"assistant","con
tent":"
the"},"finish_reason":"length"}],"usage":{"prompt_tokens":11,"completio
  n_tokens":3,"total_tokens":14,...}}
  data: [DONE]
This commit is contained in:
Heidar
2026-05-14 17:32:54 +01:00
committed by GitHub
parent e8ec8d5010
commit 88d46d46fd
3 changed files with 201 additions and 6 deletions

View File

@@ -238,7 +238,7 @@ async def generate_chat_stream(
code=500,
)
)
yield f"data: {error_response.model_dump_json()}\n\n"
yield f"data: {error_response.model_dump_json(exclude_none=True)}\n\n"
yield "data: [DONE]\n\n"
return
@@ -269,7 +269,7 @@ async def generate_chat_stream(
],
usage=last_usage,
)
yield f"data: {tool_response.model_dump_json()}\n\n"
yield f"data: {tool_response.model_dump_json(exclude_none=True)}\n\n"
if chunk.stats is not None:
yield f": generation_stats {chunk.stats.model_dump_json()}\n\n"
yield "data: [DONE]\n\n"
@@ -283,7 +283,7 @@ async def generate_chat_stream(
chunk_response = chunk_response.model_copy(
update={"usage": last_usage}
)
yield f"data: {chunk_response.model_dump_json()}\n\n"
yield f"data: {chunk_response.model_dump_json(exclude_none=True)}\n\n"
if chunk.finish_reason is not None:
if chunk.stats is not None:
@@ -379,5 +379,5 @@ async def collect_chat_response(
)
],
usage=last_usage,
).model_dump_json()
).model_dump_json(exclude_none=True)
return

View File

@@ -101,7 +101,7 @@ def _build_response_usage(usage: Usage) -> ResponseUsage:
def _format_sse(event: ResponsesStreamEvent) -> str:
"""Format a streaming event as an SSE message."""
return f"event: {event.type}\ndata: {event.model_dump_json()}\n\n"
return f"event: {event.type}\ndata: {event.model_dump_json(exclude_none=True)}\n\n"
def _extract_content(content: str | list[ResponseContentPart]) -> str:
@@ -471,7 +471,7 @@ async def collect_responses_response(
output=output,
output_text=accumulated_text,
usage=usage,
).model_dump_json()
).model_dump_json(exclude_none=True)
return

View File

@@ -0,0 +1,195 @@
# pyright: reportAny=false
"""Tests asserting OpenAI-spec wire shape for /v1/chat/completions deltas."""
import json
from collections.abc import AsyncGenerator
from typing import Any
from exo.api.adapters.chat_completions import (
collect_chat_response,
generate_chat_stream,
)
from exo.api.types import (
CompletionTokensDetails,
PromptTokensDetails,
ToolCallItem,
Usage,
)
from exo.shared.types.chunks import (
ErrorChunk,
PrefillProgressChunk,
TokenChunk,
ToolCallChunk,
)
from exo.shared.types.common import CommandId, ModelId
_TEST_MODEL = ModelId("test-model")
_NULLABLE_DELTA_FIELDS = {"content", "refusal"}
def _make_usage(prompt_tokens: int = 1, completion_tokens: int = 1) -> Usage:
return Usage(
prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens,
total_tokens=prompt_tokens + completion_tokens,
prompt_tokens_details=PromptTokensDetails(),
completion_tokens_details=CompletionTokensDetails(),
)
async def _stream(
chunks: list[PrefillProgressChunk | ErrorChunk | ToolCallChunk | TokenChunk],
) -> AsyncGenerator[
PrefillProgressChunk | ErrorChunk | ToolCallChunk | TokenChunk, None
]:
for chunk in chunks:
yield chunk
def _parse_data_events(lines: list[str]) -> list[dict[str, Any]]:
events: list[dict[str, Any]] = []
for line in lines:
for sub in line.split("\n"):
if sub.startswith("data: ") and not sub.endswith("[DONE]"):
events.append(json.loads(sub[len("data: ") :]))
return events
def _assert_delta_spec_compliant(delta: dict[str, Any]) -> None:
"""Reject any null delta key the OpenAI spec doesn't allow to be null."""
for key, value in delta.items():
if value is None and key not in _NULLABLE_DELTA_FIELDS:
raise AssertionError(
f"delta.{key} is null but spec requires it to be absent or a value; "
f"full delta={delta!r}"
)
class TestTokenStreamDeltaShape:
async def test_token_chunk_delta_has_no_disallowed_nulls(self):
chunks: list[PrefillProgressChunk | ErrorChunk | ToolCallChunk | TokenChunk] = [
TokenChunk(
model=_TEST_MODEL,
token_id=1,
text="Hello",
usage=None,
),
TokenChunk(
model=_TEST_MODEL,
token_id=2,
text=" world",
usage=_make_usage(),
finish_reason="stop",
),
]
lines: list[str] = []
async for event in generate_chat_stream(
CommandId("test-cmd-token"), _stream(chunks)
):
lines.append(event)
events = _parse_data_events(lines)
assert len(events) == 2
for event in events:
delta = event["choices"][0]["delta"]
_assert_delta_spec_compliant(delta)
assert "tool_calls" not in delta or isinstance(delta["tool_calls"], list)
assert "function_call" not in delta
assert "name" not in delta
assert "tool_call_id" not in delta
async def test_thinking_chunk_delta_has_no_disallowed_nulls(self):
chunks: list[PrefillProgressChunk | ErrorChunk | ToolCallChunk | TokenChunk] = [
TokenChunk(
model=_TEST_MODEL,
token_id=1,
text="Hmm",
usage=None,
is_thinking=True,
),
]
lines: list[str] = []
async for event in generate_chat_stream(
CommandId("test-cmd-thinking"), _stream(chunks)
):
lines.append(event)
events = _parse_data_events(lines)
assert len(events) == 1
delta = events[0]["choices"][0]["delta"]
_assert_delta_spec_compliant(delta)
assert delta.get("reasoning_content") == "Hmm"
assert "content" not in delta
class TestToolCallStreamDeltaShape:
async def test_tool_call_chunk_delta_has_array_tool_calls(self):
chunks: list[PrefillProgressChunk | ErrorChunk | ToolCallChunk | TokenChunk] = [
ToolCallChunk(
model=_TEST_MODEL,
tool_calls=[
ToolCallItem(id="call_1", name="get_weather", arguments="{}"),
],
usage=_make_usage(),
),
]
lines: list[str] = []
async for event in generate_chat_stream(
CommandId("test-cmd-tool"), _stream(chunks)
):
lines.append(event)
events = _parse_data_events(lines)
assert len(events) == 1
delta = events[0]["choices"][0]["delta"]
_assert_delta_spec_compliant(delta)
assert isinstance(delta["tool_calls"], list)
assert delta["tool_calls"][0]["function"]["name"] == "get_weather"
class TestErrorStreamShape:
async def test_error_chunk_response_has_no_nulls(self):
chunks: list[PrefillProgressChunk | ErrorChunk | ToolCallChunk | TokenChunk] = [
ErrorChunk(model=_TEST_MODEL, error_message="boom"),
]
lines: list[str] = []
async for event in generate_chat_stream(
CommandId("test-cmd-err"), _stream(chunks)
):
lines.append(event)
events = _parse_data_events(lines)
assert len(events) == 1
assert events[0]["error"]["message"] == "boom"
for value in events[0]["error"].values():
assert value is not None
class TestNonStreamingResponseShape:
async def test_collected_response_message_has_no_disallowed_nulls(self):
chunks: list[PrefillProgressChunk | ErrorChunk | ToolCallChunk | TokenChunk] = [
TokenChunk(
model=_TEST_MODEL,
token_id=1,
text="Hello",
usage=_make_usage(),
finish_reason="stop",
),
]
parts: list[str] = []
async for part in collect_chat_response(
CommandId("test-cmd-nonstream"), _stream(chunks)
):
parts.append(part)
assert len(parts) == 1
payload = json.loads(parts[0])
message = payload["choices"][0]["message"]
for key, value in message.items():
if value is None:
assert key in {"content", "refusal", "reasoning_content"}, (
f"non-streaming message.{key} is null but spec disallows it"
)
assert "function_call" not in message
assert "name" not in message
assert "tool_call_id" not in message