From 2c7f83d6a27230bade17f1d5e9d1cd1b6776d45b Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 25 May 2026 20:41:19 +0000 Subject: [PATCH] fix(streaming/tools): stop healing-marker stubs from gating off content MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the C++ autoparser is in pure-content fallback mode (e.g. qwen3 without --jinja) and the model emits a tool call as JSON, the streaming worker calls ParseJSONIterative on each new chunk. parseJSONWithStack heals partial input like `{` into `{"":1}` where is a random integer. removeHealingMarkerFromJSON only stripped the marker from values, so the synthetic key survived and downstream callers saw a stub object with a random-looking key. chat_stream_workers.go's JSON tool-call detector then bumped lastEmittedCount past the stub even though no real tool call was emitted, gating off ALL subsequent content chunks. The qwen3 + tools + streaming case ended up dribbling only the first `{"` to clients and then nothing, even when the model went on to call the noAction `answer({"message": "…"})` pseudo-tool. Three changes, each with its own regression test: * removeHealingMarkerFromJSON now strips the marker suffix from keys too, dropping the entry when the truncated key is empty. Inputs like `{` no longer leak `{"":1}` to callers; partial keys like `{ "code` still preserve the model-typed prefix `code`. * ParseJSONIterative skips empty-after-healing maps so a healed `{` doesn't surface as a stub result. * The streaming JSON detector now breaks (not continues) on entries without a usable `name`, and only bumps lastEmittedCount past successfully-emitted entries. Defense-in-depth against any future partial-parse shape. The parser tests cover eight partial-JSON-prefix shapes and verify no marker characters leak into keys, plus the two early shapes (`{`, `{"`) that should not surface a stub at all. Fixes #9988 Assisted-by: Claude:opus-4-7 [Read] [Edit] [Bash] Signed-off-by: Ettore Di Giacinto --- .../endpoints/openai/chat_stream_workers.go | 14 ++- pkg/functions/iterative_parser.go | 15 +++ pkg/functions/parse.go | 12 ++- pkg/functions/parse_test.go | 95 +++++++++++++++++++ 4 files changed, 133 insertions(+), 3 deletions(-) diff --git a/core/http/endpoints/openai/chat_stream_workers.go b/core/http/endpoints/openai/chat_stream_workers.go index e5eb2d1ca..b45733ea0 100644 --- a/core/http/endpoints/openai/chat_stream_workers.go +++ b/core/http/endpoints/openai/chat_stream_workers.go @@ -263,13 +263,23 @@ func processStreamWithTools( } else { // Try JSON tool call parsing for streaming. // Only emit NEW tool calls (same guard as XML parser above). + // + // Issue #9988 defense: ParseJSONIterative may return stub objects + // for partial input that has not yet committed a tool name (e.g. + // `{"n` healed to `{"n":1}`). Treat any entry without a usable + // `name` as "not yet a tool call" — break instead of continue, and + // advance lastEmittedCount only past actually-emitted entries. The + // previous version of this block set + // `lastEmittedCount = len(jsonResults)` unconditionally, which + // gated off ALL subsequent content emission as soon as one stub + // landed in results (the qwen3 + streaming + tools "{\"" leak). jsonResults, jsonErr := functions.ParseJSONIterative(cleanedResult, true) if jsonErr == nil && len(jsonResults) > lastEmittedCount { for i := lastEmittedCount; i < len(jsonResults); i++ { jsonObj := jsonResults[i] name, ok := jsonObj["name"].(string) if !ok || name == "" { - continue + break } args := "{}" if argsVal, ok := jsonObj["arguments"]; ok { @@ -305,8 +315,8 @@ func processStreamWithTools( Object: "chat.completion.chunk", } responses <- initialMessage + lastEmittedCount = i + 1 } - lastEmittedCount = len(jsonResults) } } return true diff --git a/pkg/functions/iterative_parser.go b/pkg/functions/iterative_parser.go index 20705560e..cfcf46812 100644 --- a/pkg/functions/iterative_parser.go +++ b/pkg/functions/iterative_parser.go @@ -577,6 +577,21 @@ func trimPotentialPartialWord(content string, format *XMLToolCallFormat, startTh func removeHealingMarkerFromJSON(value map[string]any, marker string) map[string]any { result := make(map[string]any) for k, v := range value { + // Strip the healing marker from KEYS. parseJSONWithStack appends the + // marker to close a partial key (e.g. `{ "code` heals into + // `{"code":1}`); we want to preserve the prefix the model + // actually emitted. If the entire key was the marker (i.e. the input + // was just `{` heals into `{"":1}`), the truncated key is + // empty — drop the entry. Without this, downstream callers see a + // stub object with a random integer-looking key and treat it as a + // complete result, the shape that trips chat_stream_workers.go's + // streaming tool-call detector in issue #9988. + if idx := strings.Index(k, marker); idx != -1 { + k = k[:idx] + if k == "" { + continue + } + } if str, ok := v.(string); ok { if idx := strings.Index(str, marker); idx != -1 { v = str[:idx] diff --git a/pkg/functions/parse.go b/pkg/functions/parse.go index b4be65773..9f6fe6b5e 100644 --- a/pkg/functions/parse.go +++ b/pkg/functions/parse.go @@ -325,7 +325,17 @@ func ParseJSONIterative(s string, isPartial bool) ([]map[string]any, error) { if jsonValue != nil { // Convert to map[string]any if it's an object, or handle arrays if obj, ok := jsonValue.(map[string]any); ok { - results = append(results, obj) + // Skip stub objects that healed away to nothing. Partial inputs + // like `{`, `{"`, or `{"n` go through parseJSONWithStack and + // come back as `{"":1}`; after removeHealingMarkerFromJSON + // drops the marker key the map is empty. Returning it as a + // real result trips the streaming tool-call detector + // (chat_stream_workers.go) into thinking a tool call landed, + // gating off content emission for the rest of the stream + // (issue #9988). + if !(isPartialJSON && len(obj) == 0) { + results = append(results, obj) + } } else if arr, ok := jsonValue.([]any); ok { // Handle arrays: extract objects from array for _, item := range arr { diff --git a/pkg/functions/parse_test.go b/pkg/functions/parse_test.go index 89c883f78..f5fba3010 100644 --- a/pkg/functions/parse_test.go +++ b/pkg/functions/parse_test.go @@ -1782,6 +1782,101 @@ value // Results may be empty or contain partial data Expect(len(results)).To(BeNumerically(">=", 0)) }) + + // Regression: https://github.com/mudler/LocalAI/issues/9988. + // The streaming tool-call detector calls ParseJSONIterative on each + // new content chunk. If the parser returns a stub object whose only + // key is the synthetic healing marker, the caller treats it as + // "tool call detected" and gates content emission — qwen3 with + // streaming + tools used to leak only the first two characters of + // the JSON ("{\"") to clients as a result. + // Regression: https://github.com/mudler/LocalAI/issues/9988. + // parseJSONWithStack inserts a random-integer healing marker into + // keys (and sometimes values) to make a partial input parseable. + // Those marker characters must never reach the caller — keys made + // entirely of the marker must be dropped, and a marker suffix on a + // partial key must be stripped down to the prefix the model + // actually typed. Without this the streaming worker sees garbage + // keys like `"4310046988783340008"` and mistakes the stub for a + // completed tool call, then gates off content emission. + DescribeTable("partial JSON starts must not surface healing markers in keys", + func(input string) { + parser := NewChatMsgParser(input, true) + marker := parser.HealingMarker() + results, err := ParseJSONIterative(input, true) + if err != nil { + return + } + for _, obj := range results { + for k := range obj { + Expect(k).NotTo(ContainSubstring(marker), + "healing marker leaked into key %q for input=%q (full=%+v)", k, input, obj) + Expect(k).NotTo(MatchRegexp(`^[A-Za-z]?\d{6,}$`), + "key %q looks like a synthetic numeric marker for input=%q (full=%+v)", + k, input, obj) + } + } + }, + Entry("just an opening brace", `{`), + Entry("brace + quote", `{"`), + Entry("brace + partial key", `{"n`), + Entry("brace + quoted partial key", `{"na`), + Entry("brace + complete key, no value yet", `{"name"`), + Entry("brace + key + colon", `{"name":`), + Entry("brace + key + opening quote of value", `{"name":"`), + Entry("brace + partial value", `{"name":"ans`), + ) + + DescribeTable("partial JSON that has not yet committed a tool name must not surface a stub object", + // The streaming tool-call detector treats every entry returned + // by ParseJSONIterative as a potential new tool call. For very + // early partial inputs like `{` or `{"` there is nothing the + // caller can act on yet — returning a stub object bumps + // lastEmittedCount and gates off content emission. + // (Partial-key results like `{"n` → `{"n": 1}` are OK at the + // parser level — the streaming caller filters them by + // requiring a usable `name` field. See the streaming + // defense in chat_stream_workers.go.) + func(input string) { + results, err := ParseJSONIterative(input, true) + if err != nil { + return + } + Expect(results).To(BeEmpty(), + "ParseJSONIterative(%q) should return no results — the partial input has no anchor", input) + }, + Entry("just an opening brace", `{`), + Entry("brace + quote", `{"`), + ) + + It("returns a clean tool call once the JSON has a real name (issue #9988)", func() { + results, err := ParseJSONIterative(`{"name":"answer","arguments":{"message":"Hi"}}`, true) + Expect(err).NotTo(HaveOccurred()) + Expect(results).To(HaveLen(1)) + Expect(results[0]).To(HaveKeyWithValue("name", "answer")) + for k := range results[0] { + Expect(k).NotTo(MatchRegexp(`^[A-Za-z]?\d{6,}$`), + "healing marker leaked as key %q", k) + } + }) + + It("strips healing-marker keys even when a real name is present (issue #9988)", func() { + // `{"name":"answer"` with no closing brace healed into a stub + // with both `name:"answer"` AND a marker-only key. The marker + // key must not surface. + parser := NewChatMsgParser(`{"name":"answer"`, true) + parser.SetHealingMarker("$marker$") + jsonValue, isPartial, _, err := parser.TryConsumeJSON() + Expect(err).NotTo(HaveOccurred()) + Expect(isPartial).To(BeTrue()) + obj, ok := jsonValue.(map[string]any) + Expect(ok).To(BeTrue()) + Expect(obj).To(HaveKeyWithValue("name", "answer")) + for k := range obj { + Expect(k).NotTo(ContainSubstring("$marker$"), + "healing marker leaked into key %q", k) + } + }) }) Describe("Comprehensive JSON partial parsing tests (matching llama.cpp)", func() {