fix(streaming/tools): stop healing-marker stubs from gating off content

When the C++ autoparser is in pure-content fallback mode (e.g. qwen3
without --jinja) and the model emits a tool call as JSON, the streaming
worker calls ParseJSONIterative on each new chunk. parseJSONWithStack
heals partial input like `{` into `{"<marker>":1}` where <marker> is a
random integer. removeHealingMarkerFromJSON only stripped the marker
from values, so the synthetic key survived and downstream callers saw
a stub object with a random-looking key.

chat_stream_workers.go's JSON tool-call detector then bumped
lastEmittedCount past the stub even though no real tool call was
emitted, gating off ALL subsequent content chunks. The qwen3 + tools +
streaming case ended up dribbling only the first `{"` to clients and
then nothing, even when the model went on to call the noAction
`answer({"message": "…"})` pseudo-tool.

Three changes, each with its own regression test:

* removeHealingMarkerFromJSON now strips the marker suffix from keys
  too, dropping the entry when the truncated key is empty. Inputs like
  `{` no longer leak `{"<marker>":1}` to callers; partial keys like
  `{ "code` still preserve the model-typed prefix `code`.

* ParseJSONIterative skips empty-after-healing maps so a healed `{`
  doesn't surface as a stub result.

* The streaming JSON detector now breaks (not continues) on entries
  without a usable `name`, and only bumps lastEmittedCount past
  successfully-emitted entries. Defense-in-depth against any future
  partial-parse shape.

The parser tests cover eight partial-JSON-prefix shapes and verify no
marker characters leak into keys, plus the two early shapes (`{`,
`{"`) that should not surface a stub at all.

Fixes #9988

Assisted-by: Claude:opus-4-7 [Read] [Edit] [Bash]
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
Ettore Di Giacinto
2026-05-25 20:41:19 +00:00
parent 9ff270eb65
commit 2c7f83d6a2
4 changed files with 133 additions and 3 deletions

View File

@@ -263,13 +263,23 @@ func processStreamWithTools(
} else {
// Try JSON tool call parsing for streaming.
// Only emit NEW tool calls (same guard as XML parser above).
//
// Issue #9988 defense: ParseJSONIterative may return stub objects
// for partial input that has not yet committed a tool name (e.g.
// `{"n` healed to `{"n":1}`). Treat any entry without a usable
// `name` as "not yet a tool call" — break instead of continue, and
// advance lastEmittedCount only past actually-emitted entries. The
// previous version of this block set
// `lastEmittedCount = len(jsonResults)` unconditionally, which
// gated off ALL subsequent content emission as soon as one stub
// landed in results (the qwen3 + streaming + tools "{\"" leak).
jsonResults, jsonErr := functions.ParseJSONIterative(cleanedResult, true)
if jsonErr == nil && len(jsonResults) > lastEmittedCount {
for i := lastEmittedCount; i < len(jsonResults); i++ {
jsonObj := jsonResults[i]
name, ok := jsonObj["name"].(string)
if !ok || name == "" {
continue
break
}
args := "{}"
if argsVal, ok := jsonObj["arguments"]; ok {
@@ -305,8 +315,8 @@ func processStreamWithTools(
Object: "chat.completion.chunk",
}
responses <- initialMessage
lastEmittedCount = i + 1
}
lastEmittedCount = len(jsonResults)
}
}
return true

View File

@@ -577,6 +577,21 @@ func trimPotentialPartialWord(content string, format *XMLToolCallFormat, startTh
func removeHealingMarkerFromJSON(value map[string]any, marker string) map[string]any {
result := make(map[string]any)
for k, v := range value {
// Strip the healing marker from KEYS. parseJSONWithStack appends the
// marker to close a partial key (e.g. `{ "code` heals into
// `{"code<marker>":1}`); we want to preserve the prefix the model
// actually emitted. If the entire key was the marker (i.e. the input
// was just `{` heals into `{"<marker>":1}`), the truncated key is
// empty — drop the entry. Without this, downstream callers see a
// stub object with a random integer-looking key and treat it as a
// complete result, the shape that trips chat_stream_workers.go's
// streaming tool-call detector in issue #9988.
if idx := strings.Index(k, marker); idx != -1 {
k = k[:idx]
if k == "" {
continue
}
}
if str, ok := v.(string); ok {
if idx := strings.Index(str, marker); idx != -1 {
v = str[:idx]

View File

@@ -325,7 +325,17 @@ func ParseJSONIterative(s string, isPartial bool) ([]map[string]any, error) {
if jsonValue != nil {
// Convert to map[string]any if it's an object, or handle arrays
if obj, ok := jsonValue.(map[string]any); ok {
results = append(results, obj)
// Skip stub objects that healed away to nothing. Partial inputs
// like `{`, `{"`, or `{"n` go through parseJSONWithStack and
// come back as `{"<marker>":1}`; after removeHealingMarkerFromJSON
// drops the marker key the map is empty. Returning it as a
// real result trips the streaming tool-call detector
// (chat_stream_workers.go) into thinking a tool call landed,
// gating off content emission for the rest of the stream
// (issue #9988).
if !(isPartialJSON && len(obj) == 0) {
results = append(results, obj)
}
} else if arr, ok := jsonValue.([]any); ok {
// Handle arrays: extract objects from array
for _, item := range arr {

View File

@@ -1782,6 +1782,101 @@ value
// Results may be empty or contain partial data
Expect(len(results)).To(BeNumerically(">=", 0))
})
// Regression: https://github.com/mudler/LocalAI/issues/9988.
// The streaming tool-call detector calls ParseJSONIterative on each
// new content chunk. If the parser returns a stub object whose only
// key is the synthetic healing marker, the caller treats it as
// "tool call detected" and gates content emission — qwen3 with
// streaming + tools used to leak only the first two characters of
// the JSON ("{\"") to clients as a result.
// Regression: https://github.com/mudler/LocalAI/issues/9988.
// parseJSONWithStack inserts a random-integer healing marker into
// keys (and sometimes values) to make a partial input parseable.
// Those marker characters must never reach the caller — keys made
// entirely of the marker must be dropped, and a marker suffix on a
// partial key must be stripped down to the prefix the model
// actually typed. Without this the streaming worker sees garbage
// keys like `"4310046988783340008"` and mistakes the stub for a
// completed tool call, then gates off content emission.
DescribeTable("partial JSON starts must not surface healing markers in keys",
func(input string) {
parser := NewChatMsgParser(input, true)
marker := parser.HealingMarker()
results, err := ParseJSONIterative(input, true)
if err != nil {
return
}
for _, obj := range results {
for k := range obj {
Expect(k).NotTo(ContainSubstring(marker),
"healing marker leaked into key %q for input=%q (full=%+v)", k, input, obj)
Expect(k).NotTo(MatchRegexp(`^[A-Za-z]?\d{6,}$`),
"key %q looks like a synthetic numeric marker for input=%q (full=%+v)",
k, input, obj)
}
}
},
Entry("just an opening brace", `{`),
Entry("brace + quote", `{"`),
Entry("brace + partial key", `{"n`),
Entry("brace + quoted partial key", `{"na`),
Entry("brace + complete key, no value yet", `{"name"`),
Entry("brace + key + colon", `{"name":`),
Entry("brace + key + opening quote of value", `{"name":"`),
Entry("brace + partial value", `{"name":"ans`),
)
DescribeTable("partial JSON that has not yet committed a tool name must not surface a stub object",
// The streaming tool-call detector treats every entry returned
// by ParseJSONIterative as a potential new tool call. For very
// early partial inputs like `{` or `{"` there is nothing the
// caller can act on yet — returning a stub object bumps
// lastEmittedCount and gates off content emission.
// (Partial-key results like `{"n` → `{"n": 1}` are OK at the
// parser level — the streaming caller filters them by
// requiring a usable `name` field. See the streaming
// defense in chat_stream_workers.go.)
func(input string) {
results, err := ParseJSONIterative(input, true)
if err != nil {
return
}
Expect(results).To(BeEmpty(),
"ParseJSONIterative(%q) should return no results — the partial input has no anchor", input)
},
Entry("just an opening brace", `{`),
Entry("brace + quote", `{"`),
)
It("returns a clean tool call once the JSON has a real name (issue #9988)", func() {
results, err := ParseJSONIterative(`{"name":"answer","arguments":{"message":"Hi"}}`, true)
Expect(err).NotTo(HaveOccurred())
Expect(results).To(HaveLen(1))
Expect(results[0]).To(HaveKeyWithValue("name", "answer"))
for k := range results[0] {
Expect(k).NotTo(MatchRegexp(`^[A-Za-z]?\d{6,}$`),
"healing marker leaked as key %q", k)
}
})
It("strips healing-marker keys even when a real name is present (issue #9988)", func() {
// `{"name":"answer"` with no closing brace healed into a stub
// with both `name:"answer"` AND a marker-only key. The marker
// key must not surface.
parser := NewChatMsgParser(`{"name":"answer"`, true)
parser.SetHealingMarker("$marker$")
jsonValue, isPartial, _, err := parser.TryConsumeJSON()
Expect(err).NotTo(HaveOccurred())
Expect(isPartial).To(BeTrue())
obj, ok := jsonValue.(map[string]any)
Expect(ok).To(BeTrue())
Expect(obj).To(HaveKeyWithValue("name", "answer"))
for k := range obj {
Expect(k).NotTo(ContainSubstring("$marker$"),
"healing marker leaked into key %q", k)
}
})
})
Describe("Comprehensive JSON partial parsing tests (matching llama.cpp)", func() {