diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go index ab715d8b1..5ecb402d3 100644 --- a/core/http/endpoints/openai/chat.go +++ b/core/http/endpoints/openai/chat.go @@ -147,6 +147,7 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator result := "" lastEmittedCount := 0 sentInitialRole := false + sentReasoning := false hasChatDeltaToolCalls := false hasChatDeltaContent := false @@ -190,6 +191,7 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator }}, Object: "chat.completion.chunk", } + sentReasoning = true } // Stream content deltas (cleaned of reasoning tags) while no tool calls @@ -363,7 +365,12 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator functionResults = functions.ParseFunctionCall(cleanedResult, config.FunctionsConfig) } xlog.Debug("[ChatDeltas] final tool call decision", "tool_calls", len(functionResults), "text_content", *textContentToReturn) - noActionToRun := len(functionResults) > 0 && functionResults[0].Name == noAction || len(functionResults) == 0 + // noAction is a sentinel "just answer" pseudo-function — not a real + // tool call. Scan the whole slice rather than only index 0 so we + // don't drop a real tool call that happens to follow a noAction + // entry, and so the default branch isn't entered with only noAction + // entries to emit as tool_calls. + noActionToRun := !hasRealCall(functionResults, noAction) switch { case noActionToRun: @@ -377,108 +384,31 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator usage.TimingPromptProcessing = tokenUsage.TimingPromptProcessing } - if sentInitialRole { - // Content was already streamed during the callback — just emit usage. - delta := &schema.Message{} - if reasoning != "" && extractor.Reasoning() == "" { - delta.Reasoning = &reasoning - } - responses <- schema.OpenAIResponse{ - ID: id, Created: created, Model: req.Model, - Choices: []schema.Choice{{Delta: delta, Index: 0}}, - Object: "chat.completion.chunk", - Usage: usage, - } - } else { - // Content was NOT streamed — send everything at once (fallback). - responses <- schema.OpenAIResponse{ - ID: id, Created: created, Model: req.Model, - Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant"}, Index: 0}}, - Object: "chat.completion.chunk", - } - - result, err := handleQuestion(config, functionResults, extractor.CleanedContent(), prompt) - if err != nil { - xlog.Error("error handling question", "error", err) - return err - } - - delta := &schema.Message{Content: &result} - if reasoning != "" { - delta.Reasoning = &reasoning - } - responses <- schema.OpenAIResponse{ - ID: id, Created: created, Model: req.Model, - Choices: []schema.Choice{{Delta: delta, Index: 0}}, - Object: "chat.completion.chunk", - Usage: usage, + var result string + if !sentInitialRole { + var hqErr error + result, hqErr = handleQuestion(config, functionResults, extractor.CleanedContent(), prompt) + if hqErr != nil { + xlog.Error("error handling question", "error", hqErr) + return hqErr } } + for _, chunk := range buildNoActionFinalChunks( + id, req.Model, created, + sentInitialRole, sentReasoning, + result, reasoning, usage, + ) { + responses <- chunk + } default: - for i, ss := range functionResults { - name, args := ss.Name, ss.Arguments - toolCallID := ss.ID - if toolCallID == "" { - toolCallID = id - } - - if i < lastEmittedCount { - // Already emitted during streaming by the incremental - // JSON/XML parser — skip to avoid duplicate tool calls. - continue - } - - // Tool call not yet emitted — send name + args (two chunks). - initialMessage := schema.OpenAIResponse{ - ID: id, - Created: created, - Model: req.Model, - Choices: []schema.Choice{{ - Delta: &schema.Message{ - Role: "assistant", - ToolCalls: []schema.ToolCall{ - { - Index: i, - ID: toolCallID, - Type: "function", - FunctionCall: schema.FunctionCall{ - Name: name, - }, - }, - }, - }, - Index: 0, - FinishReason: nil, - }}, - Object: "chat.completion.chunk", - } - responses <- initialMessage - - responses <- schema.OpenAIResponse{ - ID: id, - Created: created, - Model: req.Model, - Choices: []schema.Choice{{ - Delta: &schema.Message{ - Role: "assistant", - Content: textContentToReturn, - ToolCalls: []schema.ToolCall{ - { - Index: i, - ID: toolCallID, - Type: "function", - FunctionCall: schema.FunctionCall{ - Arguments: args, - }, - }, - }, - }, - Index: 0, - FinishReason: nil, - }}, - Object: "chat.completion.chunk", - } + for _, chunk := range buildDeferredToolCallChunks( + id, req.Model, created, + functionResults, lastEmittedCount, + sentInitialRole, *textContentToReturn, + sentReasoning, reasoning, + ) { + responses <- chunk } } diff --git a/core/http/endpoints/openai/chat_emit.go b/core/http/endpoints/openai/chat_emit.go new file mode 100644 index 000000000..0418099a6 --- /dev/null +++ b/core/http/endpoints/openai/chat_emit.go @@ -0,0 +1,233 @@ +package openai + +import ( + "fmt" + + "github.com/mudler/LocalAI/core/schema" + "github.com/mudler/LocalAI/pkg/functions" +) + +// hasRealCall reports whether functionResults contains at least one +// entry whose Name is something other than the noAction sentinel. +// Used by processTools to decide between the "answer the question" +// path and the real tool-call flush. +func hasRealCall(functionResults []functions.FuncCallResults, noAction string) bool { + for _, fc := range functionResults { + if fc.Name != noAction { + return true + } + } + return false +} + +// buildNoActionFinalChunks produces the closing SSE chunks for the +// noActionToRun branch of processTools (i.e. the model chose the "answer" +// pseudo-function or emitted no tool calls at all). +// +// When content was already streamed (contentAlreadyStreamed=true) the +// helper emits a single trailing usage chunk, optionally carrying +// reasoning that was produced but not streamed incrementally. When +// content was not streamed it emits a role chunk followed by a +// content+reasoning+usage chunk — the "send everything at once" fallback. +// +// Reasoning re-emission is guarded by reasoningAlreadyStreamed, not by +// probing the extractor's Go-side state: the C++ autoparser delivers +// reasoning through ProcessChatDeltaReasoning which populates a +// separate accumulator that extractor.Reasoning() does not expose. +// Without this guard the callback would stream reasoning incrementally +// and the final chunk would duplicate it. +func buildNoActionFinalChunks( + id, model string, + created int, + contentAlreadyStreamed bool, + reasoningAlreadyStreamed bool, + content string, + reasoning string, + usage schema.OpenAIUsage, +) []schema.OpenAIResponse { + var out []schema.OpenAIResponse + + if contentAlreadyStreamed { + delta := &schema.Message{} + if reasoning != "" && !reasoningAlreadyStreamed { + r := reasoning + delta.Reasoning = &r + } + out = append(out, schema.OpenAIResponse{ + ID: id, Created: created, Model: model, + Choices: []schema.Choice{{Delta: delta, Index: 0}}, + Object: "chat.completion.chunk", + Usage: usage, + }) + return out + } + + // Content was not streamed — send role, then content (+reasoning) + usage. + out = append(out, schema.OpenAIResponse{ + ID: id, Created: created, Model: model, + Choices: []schema.Choice{{ + Delta: &schema.Message{Role: "assistant"}, + Index: 0, + }}, + Object: "chat.completion.chunk", + }) + + c := content + delta := &schema.Message{Content: &c} + if reasoning != "" && !reasoningAlreadyStreamed { + r := reasoning + delta.Reasoning = &r + } + out = append(out, schema.OpenAIResponse{ + ID: id, Created: created, Model: model, + Choices: []schema.Choice{{Delta: delta, Index: 0}}, + Object: "chat.completion.chunk", + Usage: usage, + }) + return out +} + +// buildDeferredToolCallChunks produces the SSE chunks for tool calls that +// were discovered only during final parsing (i.e. after the streaming +// callback finished). The caller forwards every returned chunk to the +// responses channel. +// +// Guarantees: +// - tool calls with i < lastEmittedCount are skipped (already streamed) +// - each emitted call yields two chunks: name-only, then args-only +// - no chunk ever carries both non-empty Content and non-empty ToolCalls +// - no chunk ever carries both non-empty Reasoning and non-empty ToolCalls +// - if !reasoningAlreadyStreamed && reasoningContent != "", +// a reasoning chunk is emitted first +// - if !contentAlreadyStreamed && textContent != "", +// a role chunk followed by a content chunk is emitted (after reasoning) +// - chunks order: [reasoning?] [role+content?] (name, args)+ +// - fallback IDs for empty ss.ID are unique per index so a client can +// match tool_result messages back to the right call +func buildDeferredToolCallChunks( + id, model string, + created int, + functionResults []functions.FuncCallResults, + lastEmittedCount int, + contentAlreadyStreamed bool, + textContent string, + reasoningAlreadyStreamed bool, + reasoningContent string, +) []schema.OpenAIResponse { + // If every call was already emitted incrementally there's nothing to + // flush — and no reason to emit a standalone reasoning/content chunk. + hasDeferred := false + for i := range functionResults { + if i >= lastEmittedCount { + hasDeferred = true + break + } + } + if !hasDeferred { + return nil + } + + var out []schema.OpenAIResponse + + // Reasoning first — the callback path at processTools emits reasoning + // incrementally in its own chunks, but when the C++ autoparser only + // surfaces reasoning as a final aggregate the callback never sees it. + // Recover it here (no duplication: contentAlreadyStreamed and + // reasoningAlreadyStreamed track what the callback already sent). + if !reasoningAlreadyStreamed && reasoningContent != "" { + r := reasoningContent + out = append(out, schema.OpenAIResponse{ + ID: id, Created: created, Model: model, + Choices: []schema.Choice{{ + Delta: &schema.Message{Reasoning: &r}, + Index: 0, + }}, + Object: "chat.completion.chunk", + }) + } + + // Then content, when it wasn't streamed via the callback. Emit role + // and content in separate deltas — the OpenAI streaming contract + // forbids bundling content alongside tool_calls in one delta. + if !contentAlreadyStreamed && textContent != "" { + out = append(out, schema.OpenAIResponse{ + ID: id, Created: created, Model: model, + Choices: []schema.Choice{{ + Delta: &schema.Message{Role: "assistant"}, + Index: 0, + }}, + Object: "chat.completion.chunk", + }) + c := textContent + out = append(out, schema.OpenAIResponse{ + ID: id, Created: created, Model: model, + Choices: []schema.Choice{{ + Delta: &schema.Message{Content: &c}, + Index: 0, + }}, + Object: "chat.completion.chunk", + }) + } + + for i, ss := range functionResults { + if i < lastEmittedCount { + // Already streamed by the incremental JSON/XML parser during + // the token callback — skip to avoid a duplicate emission. + continue + } + + toolCallID := ss.ID + if toolCallID == "" { + // Unique per-index fallback so multiple empty-ID calls don't + // collide on the same request ID (clients match tool results + // back by tool_call_id). + toolCallID = fmt.Sprintf("%s-%d", id, i) + } + + // Name chunk. + out = append(out, schema.OpenAIResponse{ + ID: id, Created: created, Model: model, + Choices: []schema.Choice{{ + Delta: &schema.Message{ + Role: "assistant", + ToolCalls: []schema.ToolCall{{ + Index: i, + ID: toolCallID, + Type: "function", + FunctionCall: schema.FunctionCall{ + Name: ss.Name, + }, + }}, + }, + Index: 0, + FinishReason: nil, + }}, + Object: "chat.completion.chunk", + }) + + // Args chunk — no Content here. Either it was streamed through + // the token callback earlier, or the role+content pair above + // already delivered it. + out = append(out, schema.OpenAIResponse{ + ID: id, Created: created, Model: model, + Choices: []schema.Choice{{ + Delta: &schema.Message{ + Role: "assistant", + ToolCalls: []schema.ToolCall{{ + Index: i, + ID: toolCallID, + Type: "function", + FunctionCall: schema.FunctionCall{ + Arguments: ss.Arguments, + }, + }}, + }, + Index: 0, + FinishReason: nil, + }}, + Object: "chat.completion.chunk", + }) + } + + return out +} diff --git a/core/http/endpoints/openai/chat_emit_test.go b/core/http/endpoints/openai/chat_emit_test.go new file mode 100644 index 000000000..377d61c51 --- /dev/null +++ b/core/http/endpoints/openai/chat_emit_test.go @@ -0,0 +1,717 @@ +package openai + +import ( + "fmt" + + "github.com/mudler/LocalAI/core/schema" + "github.com/mudler/LocalAI/pkg/functions" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +// contentOf extracts the string payload from a chunk's delta.Content, +// transparently handling both *string and string underlying types so +// assertions don't have to care which one the helper produced. +func contentOf(ch schema.OpenAIResponse) string { + if len(ch.Choices) == 0 || ch.Choices[0].Delta == nil { + return "" + } + switch v := ch.Choices[0].Delta.Content.(type) { + case *string: + if v == nil { + return "" + } + return *v + case string: + return v + default: + return "" + } +} + +// reasoningOf mirrors contentOf for the delta.Reasoning field, which is a +// *string on schema.Message. +func reasoningOf(ch schema.OpenAIResponse) string { + if len(ch.Choices) == 0 || ch.Choices[0].Delta == nil { + return "" + } + r := ch.Choices[0].Delta.Reasoning + if r == nil { + return "" + } + return *r +} + +// toolCallsOf returns the ToolCalls slice of a chunk's delta, or nil. +func toolCallsOf(ch schema.OpenAIResponse) []schema.ToolCall { + if len(ch.Choices) == 0 || ch.Choices[0].Delta == nil { + return nil + } + return ch.Choices[0].Delta.ToolCalls +} + +// expectSpecCompliant enforces the invariants on every chunk: +// - Object == "chat.completion.chunk" +// - Exactly one Choice with Index==0 +// - No delta ever carries both non-empty Content and non-empty ToolCalls +// - No delta ever carries both non-empty Reasoning and non-empty ToolCalls +func expectSpecCompliant(chunks []schema.OpenAIResponse) { + for i, ch := range chunks { + Expect(ch.Object).To(Equal("chat.completion.chunk"), "chunk[%d] Object", i) + Expect(ch.Choices).To(HaveLen(1), "chunk[%d] Choices length", i) + Expect(ch.Choices[0].Index).To(Equal(0), "chunk[%d] Choices[0].Index", i) + + hasContent := contentOf(ch) != "" + hasReasoning := reasoningOf(ch) != "" + hasToolCalls := len(toolCallsOf(ch)) > 0 + + if hasContent && hasToolCalls { + Fail(fmt.Sprintf("chunk[%d] violates spec: Content and ToolCalls in same delta", i)) + } + if hasReasoning && hasToolCalls { + Fail(fmt.Sprintf("chunk[%d] violates spec: Reasoning and ToolCalls in same delta", i)) + } + } +} + +// expectMetadata asserts every chunk carries the same id/model/created. +func expectMetadata(chunks []schema.OpenAIResponse, id, model string, created int) { + for i, ch := range chunks { + Expect(ch.ID).To(Equal(id), "chunk[%d] ID", i) + Expect(ch.Model).To(Equal(model), "chunk[%d] Model", i) + Expect(ch.Created).To(Equal(created), "chunk[%d] Created", i) + } +} + +var _ = Describe("buildDeferredToolCallChunks", func() { + const ( + testID = "req" + testModel = "test-model" + testCreated = 1700000000 + ) + + Describe("Case A — primary bug: content already streamed, 1 deferred call", func() { + It("emits only the tool_call chunks, no Content anywhere", func() { + results := []functions.FuncCallResults{ + {Name: "search", Arguments: `{"q":"x"}`, ID: "tc1"}, + } + chunks := buildDeferredToolCallChunks( + testID, testModel, testCreated, + results, 0, + true, "Let me search…", + true, "", + ) + + expectSpecCompliant(chunks) + Expect(chunks).To(HaveLen(2), "two chunks: name, args") + + // Name chunk + tc0 := toolCallsOf(chunks[0]) + Expect(tc0).To(HaveLen(1)) + Expect(tc0[0].Index).To(Equal(0)) + Expect(tc0[0].ID).To(Equal("tc1")) + Expect(tc0[0].FunctionCall.Name).To(Equal("search")) + Expect(tc0[0].FunctionCall.Arguments).To(BeEmpty()) + Expect(contentOf(chunks[0])).To(BeEmpty()) + + // Args chunk — MUST NOT carry Content + tc1 := toolCallsOf(chunks[1]) + Expect(tc1).To(HaveLen(1)) + Expect(tc1[0].FunctionCall.Name).To(BeEmpty()) + Expect(tc1[0].FunctionCall.Arguments).To(Equal(`{"q":"x"}`)) + Expect(contentOf(chunks[1])).To(BeEmpty(), + "args chunk must not duplicate already-streamed content") + }) + }) + + Describe("Case B — autoparser / content not streamed", func() { + It("emits role, content, then name+args", func() { + results := []functions.FuncCallResults{ + {Name: "do", Arguments: "{}", ID: "tc1"}, + } + chunks := buildDeferredToolCallChunks( + testID, testModel, testCreated, + results, 0, + false, "Here is my plan…", + true, "", + ) + + expectSpecCompliant(chunks) + Expect(chunks).To(HaveLen(4), "role, content, name, args") + + // Role chunk + Expect(chunks[0].Choices[0].Delta.Role).To(Equal("assistant")) + Expect(contentOf(chunks[0])).To(BeEmpty()) + Expect(toolCallsOf(chunks[0])).To(BeEmpty()) + + // Content chunk + Expect(contentOf(chunks[1])).To(Equal("Here is my plan…")) + Expect(toolCallsOf(chunks[1])).To(BeEmpty()) + + // Name + args chunks + Expect(toolCallsOf(chunks[2])).To(HaveLen(1)) + Expect(toolCallsOf(chunks[2])[0].FunctionCall.Name).To(Equal("do")) + Expect(toolCallsOf(chunks[3])).To(HaveLen(1)) + Expect(toolCallsOf(chunks[3])[0].FunctionCall.Arguments).To(Equal("{}")) + }) + }) + + Describe("Case C — multiple deferred calls, content already streamed", func() { + It("emits (name, args) × 3 with no Content anywhere", func() { + results := []functions.FuncCallResults{ + {Name: "a", Arguments: "{}", ID: "tcA"}, + {Name: "b", Arguments: "{}", ID: "tcB"}, + {Name: "c", Arguments: "{}", ID: "tcC"}, + } + chunks := buildDeferredToolCallChunks( + testID, testModel, testCreated, + results, 0, + true, "some narration", + true, "", + ) + + expectSpecCompliant(chunks) + Expect(chunks).To(HaveLen(6)) + + for i := 0; i < 3; i++ { + Expect(contentOf(chunks[2*i])).To(BeEmpty(), + "call #%d name chunk must not carry Content", i) + Expect(contentOf(chunks[2*i+1])).To(BeEmpty(), + "call #%d args chunk must not carry Content", i) + Expect(toolCallsOf(chunks[2*i])[0].Index).To(Equal(i)) + Expect(toolCallsOf(chunks[2*i+1])[0].Index).To(Equal(i)) + } + Expect(toolCallsOf(chunks[0])[0].FunctionCall.Name).To(Equal("a")) + Expect(toolCallsOf(chunks[2])[0].FunctionCall.Name).To(Equal("b")) + Expect(toolCallsOf(chunks[4])[0].FunctionCall.Name).To(Equal("c")) + }) + }) + + Describe("Case D — partial incremental emission", func() { + It("emits only the deferred tail (call #1), skipping #0", func() { + results := []functions.FuncCallResults{ + {Name: "a", Arguments: "{}", ID: "tc0"}, + {Name: "b", Arguments: "{}", ID: "tc1"}, + } + chunks := buildDeferredToolCallChunks( + testID, testModel, testCreated, + results, 1, + true, "narration", + true, "", + ) + + expectSpecCompliant(chunks) + Expect(chunks).To(HaveLen(2)) + Expect(toolCallsOf(chunks[0])[0].Index).To(Equal(1)) + Expect(toolCallsOf(chunks[0])[0].FunctionCall.Name).To(Equal("b")) + Expect(toolCallsOf(chunks[1])[0].Index).To(Equal(1)) + Expect(toolCallsOf(chunks[1])[0].FunctionCall.Arguments).To(Equal("{}")) + }) + }) + + Describe("Case E — all calls already emitted incrementally", func() { + It("emits nothing", func() { + results := []functions.FuncCallResults{ + {Name: "a", Arguments: "{}", ID: "tc0"}, + {Name: "b", Arguments: "{}", ID: "tc1"}, + } + chunks := buildDeferredToolCallChunks( + testID, testModel, testCreated, + results, 2, + true, "narration", + true, "", + ) + + expectSpecCompliant(chunks) + Expect(chunks).To(BeEmpty()) + }) + }) + + Describe("Case F — content not streamed but textContent empty", func() { + It("emits only the tool call chunks, no leading role/content", func() { + results := []functions.FuncCallResults{ + {Name: "x", Arguments: "{}", ID: "tcX"}, + } + chunks := buildDeferredToolCallChunks( + testID, testModel, testCreated, + results, 0, + false, "", + true, "", + ) + + expectSpecCompliant(chunks) + Expect(chunks).To(HaveLen(2)) + Expect(toolCallsOf(chunks[0])[0].FunctionCall.Name).To(Equal("x")) + Expect(toolCallsOf(chunks[1])[0].FunctionCall.Arguments).To(Equal("{}")) + }) + }) + + Describe("Case G — empty ss.ID falls back to a unique per-index ID", func() { + It("emits a deterministic per-index fallback", func() { + results := []functions.FuncCallResults{ + {Name: "x", Arguments: "{}", ID: ""}, + } + chunks := buildDeferredToolCallChunks( + testID, testModel, testCreated, + results, 0, + true, "narration", + true, "", + ) + + expectSpecCompliant(chunks) + Expect(chunks).To(HaveLen(2)) + expectedID := fmt.Sprintf("%s-%d", testID, 0) + Expect(toolCallsOf(chunks[0])[0].ID).To(Equal(expectedID)) + Expect(toolCallsOf(chunks[1])[0].ID).To(Equal(expectedID)) + }) + }) + + Describe("Case G2 — multiple empty IDs get distinct fallbacks", func() { + It("avoids the collision bug where every empty-ID call shared the request id", func() { + results := []functions.FuncCallResults{ + {Name: "a", Arguments: "{}", ID: ""}, + {Name: "b", Arguments: "{}", ID: ""}, + {Name: "c", Arguments: "{}", ID: ""}, + } + chunks := buildDeferredToolCallChunks( + testID, testModel, testCreated, + results, 0, + true, "narration", + true, "", + ) + + expectSpecCompliant(chunks) + Expect(chunks).To(HaveLen(6)) + + ids := map[string]int{} + for _, ch := range chunks { + for _, tc := range toolCallsOf(ch) { + ids[tc.ID]++ + } + } + // Each call yields a name chunk + args chunk → each distinct ID + // should appear in exactly two chunks. Three distinct IDs + // overall. + Expect(ids).To(HaveLen(3), "three distinct per-index fallback IDs") + for id, n := range ids { + Expect(n).To(Equal(2), "ID %q should appear in exactly 2 chunks", id) + } + }) + }) + + Describe("Case H — indices preserved across skip with multiple calls", func() { + It("emits Index fields matching functionResults positions", func() { + results := []functions.FuncCallResults{ + {Name: "a", Arguments: "{}", ID: "tc0"}, + {Name: "b", Arguments: "{}", ID: "tc1"}, + {Name: "c", Arguments: "{}", ID: "tc2"}, + } + chunks := buildDeferredToolCallChunks( + testID, testModel, testCreated, + results, 1, + true, "narration", + true, "", + ) + + expectSpecCompliant(chunks) + Expect(chunks).To(HaveLen(4)) + + Expect(toolCallsOf(chunks[0])[0].Index).To(Equal(1)) + Expect(toolCallsOf(chunks[1])[0].Index).To(Equal(1)) + Expect(toolCallsOf(chunks[2])[0].Index).To(Equal(2)) + Expect(toolCallsOf(chunks[3])[0].Index).To(Equal(2)) + }) + }) + + Describe("Case I — explicit non-empty ID is preserved", func() { + It("does not touch ss.ID when it's already set", func() { + results := []functions.FuncCallResults{ + {Name: "x", Arguments: "{}", ID: "abc123"}, + } + chunks := buildDeferredToolCallChunks( + testID, testModel, testCreated, + results, 0, + true, "narration", + true, "", + ) + + expectSpecCompliant(chunks) + Expect(chunks).To(HaveLen(2)) + Expect(toolCallsOf(chunks[0])[0].ID).To(Equal("abc123")) + Expect(toolCallsOf(chunks[1])[0].ID).To(Equal("abc123")) + }) + }) + + Describe("Case J — chunk-shape sanity", func() { + It("splits Name into the first chunk and Arguments into the second", func() { + results := []functions.FuncCallResults{ + {Name: "x", Arguments: `{"k":"v"}`, ID: "tcX"}, + } + chunks := buildDeferredToolCallChunks( + testID, testModel, testCreated, + results, 0, + true, "narration", + true, "", + ) + + expectSpecCompliant(chunks) + Expect(chunks).To(HaveLen(2)) + + Expect(toolCallsOf(chunks[0])[0].FunctionCall.Name).To(Equal("x")) + Expect(toolCallsOf(chunks[0])[0].FunctionCall.Arguments).To(BeEmpty()) + + Expect(toolCallsOf(chunks[1])[0].FunctionCall.Name).To(BeEmpty()) + Expect(toolCallsOf(chunks[1])[0].FunctionCall.Arguments).To(Equal(`{"k":"v"}`)) + }) + }) + + Describe("Case K — metadata propagation", func() { + It("stamps every chunk with the same id/model/created", func() { + results := []functions.FuncCallResults{ + {Name: "a", Arguments: "{}", ID: "tcA"}, + {Name: "b", Arguments: "{}", ID: "tcB"}, + } + chunks := buildDeferredToolCallChunks( + testID, testModel, testCreated, + results, 0, + false, "hello", + true, "", + ) + + expectSpecCompliant(chunks) + expectMetadata(chunks, testID, testModel, testCreated) + }) + }) + + Describe("Case L — Choices[0].Index == 0 invariant", func() { + It("is upheld across every branch the helper can take", func() { + scenarios := []struct { + name string + functionResults []functions.FuncCallResults + lastEmittedCount int + contentStreamed bool + text string + reasoningStreamed bool + reasoning string + }{ + {"streamed-content-deferred-call", + []functions.FuncCallResults{{Name: "a", Arguments: "{}"}}, + 0, true, "hi", true, ""}, + {"unstreamed-content-deferred-call", + []functions.FuncCallResults{{Name: "a", Arguments: "{}"}}, + 0, false, "hello", true, ""}, + {"unstreamed-reasoning-and-content", + []functions.FuncCallResults{{Name: "a", Arguments: "{}"}}, + 0, false, "hello", false, "thinking…"}, + {"partial-incremental", + []functions.FuncCallResults{ + {Name: "a", Arguments: "{}"}, + {Name: "b", Arguments: "{}"}}, + 1, true, "hi", true, ""}, + } + for _, sc := range scenarios { + chunks := buildDeferredToolCallChunks( + testID, testModel, testCreated, + sc.functionResults, sc.lastEmittedCount, + sc.contentStreamed, sc.text, + sc.reasoningStreamed, sc.reasoning, + ) + for i, ch := range chunks { + Expect(ch.Choices[0].Index).To(Equal(0), + "scenario %q chunk[%d] Choices[0].Index", sc.name, i) + } + } + }) + }) + + Describe("Case M — spec compliance across every scenario", func() { + It("never mixes Content or Reasoning with ToolCalls in a single delta", func() { + scenarios := []struct { + name string + functionResults []functions.FuncCallResults + lastEmittedCount int + contentStreamed bool + text string + reasoningStreamed bool + reasoning string + }{ + {"A", []functions.FuncCallResults{{Name: "a", Arguments: "{}", ID: "tc"}}, + 0, true, "already-streamed", true, ""}, + {"C", []functions.FuncCallResults{ + {Name: "a", Arguments: "{}", ID: "tc0"}, + {Name: "b", Arguments: "{}", ID: "tc1"}}, + 0, true, "already-streamed", true, ""}, + {"B", []functions.FuncCallResults{{Name: "a", Arguments: "{}", ID: "tc"}}, + 0, false, "plan", true, ""}, + {"Reasoning-deferred", []functions.FuncCallResults{{Name: "a", Arguments: "{}", ID: "tc"}}, + 0, false, "plan", false, "thinking…"}, + } + for _, sc := range scenarios { + chunks := buildDeferredToolCallChunks( + testID, testModel, testCreated, + sc.functionResults, sc.lastEmittedCount, + sc.contentStreamed, sc.text, + sc.reasoningStreamed, sc.reasoning, + ) + for i, ch := range chunks { + hasContent := contentOf(ch) != "" + hasReasoning := reasoningOf(ch) != "" + hasToolCalls := len(toolCallsOf(ch)) > 0 + Expect(hasContent && hasToolCalls).To(BeFalse(), + "scenario %q chunk[%d] mixes Content with ToolCalls", sc.name, i) + Expect(hasReasoning && hasToolCalls).To(BeFalse(), + "scenario %q chunk[%d] mixes Reasoning with ToolCalls", sc.name, i) + } + } + }) + }) + + Describe("Case N — empty functionResults", func() { + It("emits nothing, including no leading role/content/reasoning", func() { + chunks := buildDeferredToolCallChunks( + testID, testModel, testCreated, + nil, 0, + false, "ignored", + false, "ignored", + ) + Expect(chunks).To(BeEmpty()) + }) + }) + + Describe("Case O — content not streamed but all calls already emitted", func() { + It("emits nothing, not even a standalone content chunk", func() { + results := []functions.FuncCallResults{ + {Name: "a", Arguments: "{}", ID: "tc0"}, + {Name: "b", Arguments: "{}", ID: "tc1"}, + } + chunks := buildDeferredToolCallChunks( + testID, testModel, testCreated, + results, 2, + false, "narration", + false, "thinking…", + ) + Expect(chunks).To(BeEmpty(), + "no tool_calls to trigger on, so no leading role/content/reasoning either") + }) + }) + + Describe("Reasoning — autoparser delivered reasoning only at end", func() { + It("emits a leading reasoning chunk when !reasoningAlreadyStreamed", func() { + results := []functions.FuncCallResults{ + {Name: "a", Arguments: "{}", ID: "tc"}, + } + chunks := buildDeferredToolCallChunks( + testID, testModel, testCreated, + results, 0, + true, "streamed content", + false, "model's private thoughts", + ) + + expectSpecCompliant(chunks) + Expect(chunks).To(HaveLen(3), "reasoning, name, args") + + Expect(reasoningOf(chunks[0])).To(Equal("model's private thoughts")) + Expect(contentOf(chunks[0])).To(BeEmpty()) + Expect(toolCallsOf(chunks[0])).To(BeEmpty()) + + // The following two are the tool_call name + args chunks. + Expect(toolCallsOf(chunks[1])[0].FunctionCall.Name).To(Equal("a")) + Expect(toolCallsOf(chunks[2])[0].FunctionCall.Arguments).To(Equal("{}")) + }) + + It("emits reasoning before role+content when neither was streamed", func() { + results := []functions.FuncCallResults{ + {Name: "a", Arguments: "{}", ID: "tc"}, + } + chunks := buildDeferredToolCallChunks( + testID, testModel, testCreated, + results, 0, + false, "final plan", + false, "private thoughts", + ) + + expectSpecCompliant(chunks) + Expect(chunks).To(HaveLen(5), "reasoning, role, content, name, args") + + Expect(reasoningOf(chunks[0])).To(Equal("private thoughts")) + Expect(chunks[1].Choices[0].Delta.Role).To(Equal("assistant")) + Expect(contentOf(chunks[2])).To(Equal("final plan")) + Expect(toolCallsOf(chunks[3])[0].FunctionCall.Name).To(Equal("a")) + Expect(toolCallsOf(chunks[4])[0].FunctionCall.Arguments).To(Equal("{}")) + }) + + It("does not re-emit reasoning that was already streamed", func() { + results := []functions.FuncCallResults{ + {Name: "a", Arguments: "{}", ID: "tc"}, + } + chunks := buildDeferredToolCallChunks( + testID, testModel, testCreated, + results, 0, + true, "streamed", + true, "already-sent reasoning", + ) + + expectSpecCompliant(chunks) + Expect(chunks).To(HaveLen(2), "only name + args; no reasoning re-emission") + for _, ch := range chunks { + Expect(reasoningOf(ch)).To(BeEmpty()) + } + }) + }) +}) + +var _ = Describe("hasRealCall", func() { + const noAction = "answer" + + It("returns false for nil and empty slices", func() { + Expect(hasRealCall(nil, noAction)).To(BeFalse()) + Expect(hasRealCall([]functions.FuncCallResults{}, noAction)).To(BeFalse()) + }) + + It("returns false when every entry is the noAction sentinel", func() { + results := []functions.FuncCallResults{ + {Name: noAction, Arguments: `{"message":"hi"}`}, + {Name: noAction, Arguments: `{"message":"hello"}`}, + } + Expect(hasRealCall(results, noAction)).To(BeFalse()) + }) + + It("returns true when only one entry is a real call", func() { + results := []functions.FuncCallResults{ + {Name: "search", Arguments: "{}"}, + } + Expect(hasRealCall(results, noAction)).To(BeTrue()) + }) + + It("returns true when a real call follows a noAction entry", func() { + // This is the regression the follow-up fixes: the old + // functionResults[0].Name == noAction check would declare this + // noActionToRun and drop the real call entirely. + results := []functions.FuncCallResults{ + {Name: noAction, Arguments: `{"message":"hi"}`}, + {Name: "search", Arguments: "{}"}, + } + Expect(hasRealCall(results, noAction)).To(BeTrue()) + }) + + It("returns true when a real call precedes a noAction entry", func() { + results := []functions.FuncCallResults{ + {Name: "search", Arguments: "{}"}, + {Name: noAction, Arguments: `{"message":"hi"}`}, + } + Expect(hasRealCall(results, noAction)).To(BeTrue()) + }) +}) + +var _ = Describe("buildNoActionFinalChunks", func() { + const ( + testID = "req" + testModel = "test-model" + testCreated = 1700000000 + ) + usage := schema.OpenAIUsage{PromptTokens: 5, CompletionTokens: 7, TotalTokens: 12} + + Describe("Content streamed — trailing usage chunk", func() { + It("emits just one chunk with usage, no content, no reasoning when reasoning was streamed", func() { + chunks := buildNoActionFinalChunks( + testID, testModel, testCreated, + true, true, + "", "already-streamed-reasoning", usage, + ) + + Expect(chunks).To(HaveLen(1)) + Expect(chunks[0].Usage.TotalTokens).To(Equal(12)) + Expect(contentOf(chunks[0])).To(BeEmpty()) + Expect(reasoningOf(chunks[0])).To(BeEmpty(), + "reasoning must not be re-emitted once it was streamed via the callback") + }) + + It("emits a trailing reasoning delivery when reasoning came only at end", func() { + chunks := buildNoActionFinalChunks( + testID, testModel, testCreated, + true, false, + "", "autoparser final reasoning", usage, + ) + + Expect(chunks).To(HaveLen(1)) + Expect(reasoningOf(chunks[0])).To(Equal("autoparser final reasoning")) + Expect(contentOf(chunks[0])).To(BeEmpty()) + Expect(chunks[0].Usage.TotalTokens).To(Equal(12)) + }) + + It("omits reasoning when it's empty regardless of streamed flag", func() { + chunks := buildNoActionFinalChunks( + testID, testModel, testCreated, + true, false, + "", "", usage, + ) + + Expect(chunks).To(HaveLen(1)) + Expect(reasoningOf(chunks[0])).To(BeEmpty()) + }) + }) + + Describe("Content not streamed — role, then content+usage", func() { + It("emits role chunk then content chunk without reasoning when reasoning was streamed", func() { + chunks := buildNoActionFinalChunks( + testID, testModel, testCreated, + false, true, + "the answer", "already-streamed-reasoning", usage, + ) + + Expect(chunks).To(HaveLen(2)) + Expect(chunks[0].Choices[0].Delta.Role).To(Equal("assistant")) + Expect(contentOf(chunks[0])).To(BeEmpty()) + + Expect(contentOf(chunks[1])).To(Equal("the answer")) + Expect(reasoningOf(chunks[1])).To(BeEmpty(), + "reasoning must not be re-emitted if it was streamed earlier") + Expect(chunks[1].Usage.TotalTokens).To(Equal(12)) + }) + + It("emits role, then content+reasoning when reasoning was not streamed", func() { + chunks := buildNoActionFinalChunks( + testID, testModel, testCreated, + false, false, + "the answer", "autoparser final reasoning", usage, + ) + + Expect(chunks).To(HaveLen(2)) + Expect(chunks[0].Choices[0].Delta.Role).To(Equal("assistant")) + + Expect(contentOf(chunks[1])).To(Equal("the answer")) + Expect(reasoningOf(chunks[1])).To(Equal("autoparser final reasoning")) + Expect(chunks[1].Usage.TotalTokens).To(Equal(12)) + }) + + It("still emits content even when reasoning is empty", func() { + chunks := buildNoActionFinalChunks( + testID, testModel, testCreated, + false, false, + "just an answer", "", usage, + ) + + Expect(chunks).To(HaveLen(2)) + Expect(contentOf(chunks[1])).To(Equal("just an answer")) + Expect(reasoningOf(chunks[1])).To(BeEmpty()) + }) + }) + + Describe("Metadata and shape invariants", func() { + It("stamps every chunk with the same id/model/created and object", func() { + chunks := buildNoActionFinalChunks( + testID, testModel, testCreated, + false, false, + "hi", "reasoning", usage, + ) + for i, ch := range chunks { + Expect(ch.ID).To(Equal(testID), "chunk[%d] ID", i) + Expect(ch.Model).To(Equal(testModel), "chunk[%d] Model", i) + Expect(ch.Created).To(Equal(testCreated), "chunk[%d] Created", i) + Expect(ch.Object).To(Equal("chat.completion.chunk"), "chunk[%d] Object", i) + Expect(ch.Choices).To(HaveLen(1)) + Expect(ch.Choices[0].Index).To(Equal(0)) + } + }) + }) +})