mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-07 08:16:53 -04:00
fix(streaming/tools): stop healing-marker stubs from gating off content
When the C++ autoparser is in pure-content fallback mode (e.g. qwen3
without --jinja) and the model emits a tool call as JSON, the streaming
worker calls ParseJSONIterative on each new chunk. parseJSONWithStack
heals partial input like `{` into `{"<marker>":1}` where <marker> is a
random integer. removeHealingMarkerFromJSON only stripped the marker
from values, so the synthetic key survived and downstream callers saw
a stub object with a random-looking key.
chat_stream_workers.go's JSON tool-call detector then bumped
lastEmittedCount past the stub even though no real tool call was
emitted, gating off ALL subsequent content chunks. The qwen3 + tools +
streaming case ended up dribbling only the first `{"` to clients and
then nothing, even when the model went on to call the noAction
`answer({"message": "…"})` pseudo-tool.
Three changes, each with its own regression test:
* removeHealingMarkerFromJSON now strips the marker suffix from keys
too, dropping the entry when the truncated key is empty. Inputs like
`{` no longer leak `{"<marker>":1}` to callers; partial keys like
`{ "code` still preserve the model-typed prefix `code`.
* ParseJSONIterative skips empty-after-healing maps so a healed `{`
doesn't surface as a stub result.
* The streaming JSON detector now breaks (not continues) on entries
without a usable `name`, and only bumps lastEmittedCount past
successfully-emitted entries. Defense-in-depth against any future
partial-parse shape.
The parser tests cover eight partial-JSON-prefix shapes and verify no
marker characters leak into keys, plus the two early shapes (`{`,
`{"`) that should not surface a stub at all.
Fixes #9988
Assisted-by: Claude:opus-4-7 [Read] [Edit] [Bash]
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
@@ -263,13 +263,23 @@ func processStreamWithTools(
|
||||
} else {
|
||||
// Try JSON tool call parsing for streaming.
|
||||
// Only emit NEW tool calls (same guard as XML parser above).
|
||||
//
|
||||
// Issue #9988 defense: ParseJSONIterative may return stub objects
|
||||
// for partial input that has not yet committed a tool name (e.g.
|
||||
// `{"n` healed to `{"n":1}`). Treat any entry without a usable
|
||||
// `name` as "not yet a tool call" — break instead of continue, and
|
||||
// advance lastEmittedCount only past actually-emitted entries. The
|
||||
// previous version of this block set
|
||||
// `lastEmittedCount = len(jsonResults)` unconditionally, which
|
||||
// gated off ALL subsequent content emission as soon as one stub
|
||||
// landed in results (the qwen3 + streaming + tools "{\"" leak).
|
||||
jsonResults, jsonErr := functions.ParseJSONIterative(cleanedResult, true)
|
||||
if jsonErr == nil && len(jsonResults) > lastEmittedCount {
|
||||
for i := lastEmittedCount; i < len(jsonResults); i++ {
|
||||
jsonObj := jsonResults[i]
|
||||
name, ok := jsonObj["name"].(string)
|
||||
if !ok || name == "" {
|
||||
continue
|
||||
break
|
||||
}
|
||||
args := "{}"
|
||||
if argsVal, ok := jsonObj["arguments"]; ok {
|
||||
@@ -305,8 +315,8 @@ func processStreamWithTools(
|
||||
Object: "chat.completion.chunk",
|
||||
}
|
||||
responses <- initialMessage
|
||||
lastEmittedCount = i + 1
|
||||
}
|
||||
lastEmittedCount = len(jsonResults)
|
||||
}
|
||||
}
|
||||
return true
|
||||
|
||||
@@ -577,6 +577,21 @@ func trimPotentialPartialWord(content string, format *XMLToolCallFormat, startTh
|
||||
func removeHealingMarkerFromJSON(value map[string]any, marker string) map[string]any {
|
||||
result := make(map[string]any)
|
||||
for k, v := range value {
|
||||
// Strip the healing marker from KEYS. parseJSONWithStack appends the
|
||||
// marker to close a partial key (e.g. `{ "code` heals into
|
||||
// `{"code<marker>":1}`); we want to preserve the prefix the model
|
||||
// actually emitted. If the entire key was the marker (i.e. the input
|
||||
// was just `{` heals into `{"<marker>":1}`), the truncated key is
|
||||
// empty — drop the entry. Without this, downstream callers see a
|
||||
// stub object with a random integer-looking key and treat it as a
|
||||
// complete result, the shape that trips chat_stream_workers.go's
|
||||
// streaming tool-call detector in issue #9988.
|
||||
if idx := strings.Index(k, marker); idx != -1 {
|
||||
k = k[:idx]
|
||||
if k == "" {
|
||||
continue
|
||||
}
|
||||
}
|
||||
if str, ok := v.(string); ok {
|
||||
if idx := strings.Index(str, marker); idx != -1 {
|
||||
v = str[:idx]
|
||||
|
||||
@@ -325,7 +325,17 @@ func ParseJSONIterative(s string, isPartial bool) ([]map[string]any, error) {
|
||||
if jsonValue != nil {
|
||||
// Convert to map[string]any if it's an object, or handle arrays
|
||||
if obj, ok := jsonValue.(map[string]any); ok {
|
||||
results = append(results, obj)
|
||||
// Skip stub objects that healed away to nothing. Partial inputs
|
||||
// like `{`, `{"`, or `{"n` go through parseJSONWithStack and
|
||||
// come back as `{"<marker>":1}`; after removeHealingMarkerFromJSON
|
||||
// drops the marker key the map is empty. Returning it as a
|
||||
// real result trips the streaming tool-call detector
|
||||
// (chat_stream_workers.go) into thinking a tool call landed,
|
||||
// gating off content emission for the rest of the stream
|
||||
// (issue #9988).
|
||||
if !(isPartialJSON && len(obj) == 0) {
|
||||
results = append(results, obj)
|
||||
}
|
||||
} else if arr, ok := jsonValue.([]any); ok {
|
||||
// Handle arrays: extract objects from array
|
||||
for _, item := range arr {
|
||||
|
||||
@@ -1782,6 +1782,101 @@ value
|
||||
// Results may be empty or contain partial data
|
||||
Expect(len(results)).To(BeNumerically(">=", 0))
|
||||
})
|
||||
|
||||
// Regression: https://github.com/mudler/LocalAI/issues/9988.
|
||||
// The streaming tool-call detector calls ParseJSONIterative on each
|
||||
// new content chunk. If the parser returns a stub object whose only
|
||||
// key is the synthetic healing marker, the caller treats it as
|
||||
// "tool call detected" and gates content emission — qwen3 with
|
||||
// streaming + tools used to leak only the first two characters of
|
||||
// the JSON ("{\"") to clients as a result.
|
||||
// Regression: https://github.com/mudler/LocalAI/issues/9988.
|
||||
// parseJSONWithStack inserts a random-integer healing marker into
|
||||
// keys (and sometimes values) to make a partial input parseable.
|
||||
// Those marker characters must never reach the caller — keys made
|
||||
// entirely of the marker must be dropped, and a marker suffix on a
|
||||
// partial key must be stripped down to the prefix the model
|
||||
// actually typed. Without this the streaming worker sees garbage
|
||||
// keys like `"4310046988783340008"` and mistakes the stub for a
|
||||
// completed tool call, then gates off content emission.
|
||||
DescribeTable("partial JSON starts must not surface healing markers in keys",
|
||||
func(input string) {
|
||||
parser := NewChatMsgParser(input, true)
|
||||
marker := parser.HealingMarker()
|
||||
results, err := ParseJSONIterative(input, true)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
for _, obj := range results {
|
||||
for k := range obj {
|
||||
Expect(k).NotTo(ContainSubstring(marker),
|
||||
"healing marker leaked into key %q for input=%q (full=%+v)", k, input, obj)
|
||||
Expect(k).NotTo(MatchRegexp(`^[A-Za-z]?\d{6,}$`),
|
||||
"key %q looks like a synthetic numeric marker for input=%q (full=%+v)",
|
||||
k, input, obj)
|
||||
}
|
||||
}
|
||||
},
|
||||
Entry("just an opening brace", `{`),
|
||||
Entry("brace + quote", `{"`),
|
||||
Entry("brace + partial key", `{"n`),
|
||||
Entry("brace + quoted partial key", `{"na`),
|
||||
Entry("brace + complete key, no value yet", `{"name"`),
|
||||
Entry("brace + key + colon", `{"name":`),
|
||||
Entry("brace + key + opening quote of value", `{"name":"`),
|
||||
Entry("brace + partial value", `{"name":"ans`),
|
||||
)
|
||||
|
||||
DescribeTable("partial JSON that has not yet committed a tool name must not surface a stub object",
|
||||
// The streaming tool-call detector treats every entry returned
|
||||
// by ParseJSONIterative as a potential new tool call. For very
|
||||
// early partial inputs like `{` or `{"` there is nothing the
|
||||
// caller can act on yet — returning a stub object bumps
|
||||
// lastEmittedCount and gates off content emission.
|
||||
// (Partial-key results like `{"n` → `{"n": 1}` are OK at the
|
||||
// parser level — the streaming caller filters them by
|
||||
// requiring a usable `name` field. See the streaming
|
||||
// defense in chat_stream_workers.go.)
|
||||
func(input string) {
|
||||
results, err := ParseJSONIterative(input, true)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
Expect(results).To(BeEmpty(),
|
||||
"ParseJSONIterative(%q) should return no results — the partial input has no anchor", input)
|
||||
},
|
||||
Entry("just an opening brace", `{`),
|
||||
Entry("brace + quote", `{"`),
|
||||
)
|
||||
|
||||
It("returns a clean tool call once the JSON has a real name (issue #9988)", func() {
|
||||
results, err := ParseJSONIterative(`{"name":"answer","arguments":{"message":"Hi"}}`, true)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
Expect(results).To(HaveLen(1))
|
||||
Expect(results[0]).To(HaveKeyWithValue("name", "answer"))
|
||||
for k := range results[0] {
|
||||
Expect(k).NotTo(MatchRegexp(`^[A-Za-z]?\d{6,}$`),
|
||||
"healing marker leaked as key %q", k)
|
||||
}
|
||||
})
|
||||
|
||||
It("strips healing-marker keys even when a real name is present (issue #9988)", func() {
|
||||
// `{"name":"answer"` with no closing brace healed into a stub
|
||||
// with both `name:"answer"` AND a marker-only key. The marker
|
||||
// key must not surface.
|
||||
parser := NewChatMsgParser(`{"name":"answer"`, true)
|
||||
parser.SetHealingMarker("$marker$")
|
||||
jsonValue, isPartial, _, err := parser.TryConsumeJSON()
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
Expect(isPartial).To(BeTrue())
|
||||
obj, ok := jsonValue.(map[string]any)
|
||||
Expect(ok).To(BeTrue())
|
||||
Expect(obj).To(HaveKeyWithValue("name", "answer"))
|
||||
for k := range obj {
|
||||
Expect(k).NotTo(ContainSubstring("$marker$"),
|
||||
"healing marker leaked into key %q", k)
|
||||
}
|
||||
})
|
||||
})
|
||||
|
||||
Describe("Comprehensive JSON partial parsing tests (matching llama.cpp)", func() {
|
||||
|
||||
Reference in New Issue
Block a user