fix(toolcall): consider also literal \n between tags

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
Ettore Di Giacinto
2026-03-01 11:20:46 +01:00
parent f49a8edd87
commit 1fc8ad854f
2 changed files with 34 additions and 6 deletions

View File

@@ -305,6 +305,16 @@ func AllSpace(s string) bool {
return strings.TrimSpace(s) == ""
}
// allSpaceOrEscapedNewlines reports whether s is empty or contains only whitespace
// and the two-character sequences \n and \r (as in escaped JSON or backtick strings).
// Used for XML tool-call prelude checks so that content with literal \n between
// tags is accepted like real newlines, matching behavior when input has actual newlines.
func allSpaceOrEscapedNewlines(s string) bool {
normalized := strings.ReplaceAll(s, "\\n", "")
normalized = strings.ReplaceAll(normalized, "\\r", "")
return strings.TrimSpace(normalized) == ""
}
// TryConsumeJSON attempts to consume a JSON value from the current position
// Returns the parsed JSON (can be object, array, or any JSON type), whether it's partial,
// and the jsonDumpMarker (non-empty if JSON was healed)
@@ -721,7 +731,7 @@ func (p *ChatMsgParser) TryConsumeXMLToolCalls(format *XMLToolCallFormat) (bool,
// No more scopes found, break
break
}
if !AllSpace(tc.Prelude) {
if !allSpaceOrEscapedNewlines(tc.Prelude) {
// Non-whitespace before scope_start, stop parsing
p.MoveTo(tc.Groups[0].Begin - len(tc.Prelude))
break
@@ -743,7 +753,7 @@ func (p *ChatMsgParser) TryConsumeXMLToolCalls(format *XMLToolCallFormat) (bool,
break
}
if !AllSpace(tc.Prelude) {
if !allSpaceOrEscapedNewlines(tc.Prelude) {
// Non-whitespace before tool_start, stop parsing
p.MoveTo(tc.Groups[0].Begin - len(tc.Prelude))
break
@@ -845,7 +855,7 @@ func (p *ChatMsgParser) TryConsumeXMLToolCalls(format *XMLToolCallFormat) (bool,
break
}
if !AllSpace(keyStart.Prelude) {
if !allSpaceOrEscapedNewlines(keyStart.Prelude) {
// Non-whitespace before key_start, stop parsing parameters
p.MoveTo(keyStart.Groups[0].Begin - len(keyStart.Prelude))
break
@@ -1009,7 +1019,7 @@ func (p *ChatMsgParser) TryConsumeXMLToolCalls(format *XMLToolCallFormat) (bool,
// Rewind to json_end and check if val_end follows
p.MoveTo(jsonEnd)
valEndSize, valEnd := tryFindValEnd()
if valEnd != nil && AllSpace(valEnd.Prelude) && jsonHealingMarker == "" {
if valEnd != nil && allSpaceOrEscapedNewlines(valEnd.Prelude) && jsonHealingMarker == "" {
// val_end follows JSON
if len(valEnd.Groups) > 0 {
matchedSize := valEnd.Groups[0].End - valEnd.Groups[0].Begin
@@ -1105,7 +1115,7 @@ func (p *ChatMsgParser) TryConsumeXMLToolCalls(format *XMLToolCallFormat) (bool,
return false, &ChatMsgPartialException{Message: "incomplete tool_call"}
}
if !AllSpace(toolEnd.Prelude) {
if !allSpaceOrEscapedNewlines(toolEnd.Prelude) {
return returnError(errors.New("non-whitespace before tool_end"), recovery)
}
@@ -1147,7 +1157,7 @@ func (p *ChatMsgParser) TryConsumeXMLToolCalls(format *XMLToolCallFormat) (bool,
break
}
break
} else if !AllSpace(tc.Prelude) {
} else if !allSpaceOrEscapedNewlines(tc.Prelude) {
// Non-whitespace before scope_end - this might be another scope_start
// Check if it's actually another scope_start
if format.ScopeStart != "" {

View File

@@ -1726,6 +1726,24 @@ value
// Arguments should contain partial flag
Expect(results[0].Arguments).To(ContainSubstring("key"))
})
It("should return tool call when leading text precedes tool block (real newlines)", func() {
input := "The memory reclaimer functionality already exists! Let me examine the watchdog to understand how it works and what might need to be implemented for \"auto-fit\" vs unloading.\n\n<tool_call>\n<function=bash>\n<parameter=script>\ncd /root/worktrees/LocalAI/task_8562 && cat core/application/watchdog.go\n</parameter>\n</function>\n</tool_call>"
results, err := ParseXMLIterative(input, nil, true)
Expect(err).NotTo(HaveOccurred())
Expect(results).NotTo(BeNil())
Expect(results).To(HaveLen(1))
Expect(results[0].Name).To(Equal("bash"))
Expect(results[0].Arguments).To(ContainSubstring("task_8562"))
})
It("should return tool call when leading text precedes tool block (literal \\n between tags)", func() {
input := `The memory reclaimer functionality already exists! Let me examine the watchdog to understand how it works and what might need to be implemented for "auto-fit" vs unloading.\n\n<tool_call>\n<function=bash>\n<parameter=script>\ncd /root/worktrees/LocalAI/task_8562 && cat core/application/watchdog.go\n</parameter>\n</function>\n</tool_call>`
results, err := ParseXMLIterative(input, nil, false)
Expect(err).NotTo(HaveOccurred())
Expect(results).NotTo(BeNil())
Expect(results).To(HaveLen(1))
Expect(results[0].Name).To(Equal("bash"))
Expect(results[0].Arguments).To(ContainSubstring("task_8562"))
})
})
Describe("ParseJSONIterative", func() {