Files
LocalAI/pkg/functions/iterative_parser.go
Ettore Di Giacinto 21c84f432f feat(function): Add tool streaming, XML Tool Call Parsing Support (#7865)
* feat(function): Add XML Tool Call Parsing Support

Extend the function parsing system in LocalAI to support XML-style tool calls, similar to how JSON tool calls are currently parsed. This will allow models that return XML format (like <tool_call><function=name><parameter=key>value</parameter></function></tool_call>) to be properly parsed alongside text content.

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* thinking before tool calls, more strict support for corner cases with no tools

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Support streaming tools

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Iterative JSON

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Iterative parsing

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Consume JSON marker

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Fixup

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* add tests

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Fix pending TODOs

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Don't run other parsing with ParseRegex

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-01-05 18:25:40 +01:00

1396 lines
41 KiB
Go

package functions
import (
"encoding/json"
"errors"
"fmt"
"math/rand"
"regexp"
"strings"
"unicode"
"unicode/utf8"
"github.com/mudler/xlog"
)
// ChatMsgPartialException represents a partial parsing exception (recoverable)
type ChatMsgPartialException struct {
Message string
}
func (e *ChatMsgPartialException) Error() string {
return e.Message
}
// StringRange represents a range of characters in the input string
type StringRange struct {
Begin int
End int
}
// FindLiteralResult represents the result of finding a literal in the input
type FindLiteralResult struct {
Prelude string
Groups []StringRange
}
// ChatMsgParser is an iterative parser similar to llama.cpp's common_chat_msg_parser
// It tracks position in the input and can parse incrementally, supporting partial parsing
type ChatMsgParser struct {
input string
isPartial bool
pos int
healingMarker string
content strings.Builder
reasoning strings.Builder
toolCalls []FuncCallResults
}
// NewChatMsgParser creates a new iterative parser
func NewChatMsgParser(input string, isPartial bool) *ChatMsgParser {
// Generate a unique healing marker (similar to llama.cpp)
healingMarker := generateHealingMarker(input)
return &ChatMsgParser{
input: input,
isPartial: isPartial,
pos: 0,
healingMarker: healingMarker,
toolCalls: make([]FuncCallResults, 0),
}
}
// generateHealingMarker generates a unique marker that doesn't appear in the input
func generateHealingMarker(input string) string {
for {
id := fmt.Sprintf("%d", rand.Int63())
if !strings.Contains(input, id) {
return id
}
}
}
// SetHealingMarker sets a custom healing marker for testing purposes
func (p *ChatMsgParser) SetHealingMarker(marker string) {
p.healingMarker = marker
}
// Input returns the input string
func (p *ChatMsgParser) Input() string {
return p.input
}
// Pos returns the current position in the input
func (p *ChatMsgParser) Pos() int {
return p.pos
}
// IsPartial returns whether this is a partial parse
func (p *ChatMsgParser) IsPartial() bool {
return p.isPartial
}
// HealingMarker returns the healing marker used for partial JSON
func (p *ChatMsgParser) HealingMarker() string {
return p.healingMarker
}
// MoveTo moves the parser position to a specific index
func (p *ChatMsgParser) MoveTo(pos int) error {
if pos < 0 || pos > len(p.input) {
return fmt.Errorf("invalid position: %d (input length: %d)", pos, len(p.input))
}
p.pos = pos
return nil
}
// MoveBack moves the parser position back by n characters
func (p *ChatMsgParser) MoveBack(n int) error {
if p.pos < n {
return fmt.Errorf("can't move back %d characters from position %d", n, p.pos)
}
p.pos -= n
return nil
}
// Str returns the substring at the given range
func (p *ChatMsgParser) Str(rng StringRange) string {
if rng.Begin < 0 || rng.End > len(p.input) || rng.Begin > rng.End {
return ""
}
return p.input[rng.Begin:rng.End]
}
// ConsumeRest returns the remaining input from current position to end
func (p *ChatMsgParser) ConsumeRest() string {
if p.pos >= len(p.input) {
return ""
}
result := p.input[p.pos:]
p.pos = len(p.input)
return result
}
// AddContent appends content to the result
func (p *ChatMsgParser) AddContent(content string) {
p.content.WriteString(content)
}
// AddReasoningContent appends reasoning content to the result
func (p *ChatMsgParser) AddReasoningContent(reasoning string) {
p.reasoning.WriteString(reasoning)
}
// AddToolCall adds a tool call to the result
func (p *ChatMsgParser) AddToolCall(name, id, arguments string) bool {
if name == "" {
return false
}
p.toolCalls = append(p.toolCalls, FuncCallResults{
Name: name,
Arguments: arguments,
})
return true
}
// ToolCalls returns the parsed tool calls
func (p *ChatMsgParser) ToolCalls() []FuncCallResults {
return p.toolCalls
}
// Content returns the parsed content
func (p *ChatMsgParser) Content() string {
return p.content.String()
}
// Reasoning returns the parsed reasoning content
func (p *ChatMsgParser) Reasoning() string {
return p.reasoning.String()
}
// rstrip removes trailing whitespace from a string
func rstrip(s string) string {
return strings.TrimRightFunc(s, unicode.IsSpace)
}
// eraseSpaces erases a substring and surrounding spaces, replacing with newlines
// Reference: llama.cpp/common/chat-parser-xml-toolcall.cpp lines 659-668
func eraseSpaces(str string, l, r int) (string, int) {
if l < 0 || r < 0 || l > len(str) || r > len(str) || l > r {
return str, l
}
// Move l left to include leading spaces
for l > 0 && l < len(str) && unicode.IsSpace(rune(str[l-1])) {
l--
}
// Move r right to include trailing spaces
for r < len(str) && unicode.IsSpace(rune(str[r])) {
r++
}
// Replace with newlines
result := str[:l]
if l < r {
result += "\n"
if l+1 < r {
result += "\n"
}
}
newL := l
if newL != 0 {
newL += 2
}
if newL < len(str) && newL <= r {
result += str[r:]
} else if newL < len(str) {
result += str[newL:]
}
return result, newL
}
// ClearTools clears all parsed tool calls
func (p *ChatMsgParser) ClearTools() {
p.toolCalls = p.toolCalls[:0]
}
// TryConsumeLiteral attempts to consume a literal string at the current position
// Returns true if the literal was found and consumed, false otherwise
func (p *ChatMsgParser) TryConsumeLiteral(literal string) bool {
if len(literal) == 0 {
return true
}
if p.pos+len(literal) > len(p.input) {
return false
}
if p.input[p.pos:p.pos+len(literal)] == literal {
p.pos += len(literal)
return true
}
return false
}
// ConsumeLiteral consumes a literal string, throwing an error if not found
func (p *ChatMsgParser) ConsumeLiteral(literal string) error {
if !p.TryConsumeLiteral(literal) {
return &ChatMsgPartialException{Message: fmt.Sprintf("Expected literal: %s", literal)}
}
return nil
}
// TryFindLiteral finds a literal string starting from the current position
// Returns the result if found, nil otherwise
// Similar to llama.cpp's try_find_literal
func (p *ChatMsgParser) TryFindLiteral(literal string) *FindLiteralResult {
if len(literal) == 0 {
return nil
}
// Search for the literal starting from current position
idx := strings.Index(p.input[p.pos:], literal)
if idx == -1 {
// If partial parsing is enabled, try to find partial matches
if p.isPartial {
partialIdx := stringFindPartialStop(p.input[p.pos:], literal)
if partialIdx != -1 && partialIdx >= 0 {
result := &FindLiteralResult{
Prelude: p.input[p.pos : p.pos+partialIdx],
Groups: []StringRange{
{Begin: p.pos + partialIdx, End: len(p.input)},
},
}
p.pos = len(p.input)
return result
}
}
return nil
}
idx += p.pos
result := &FindLiteralResult{
Prelude: p.input[p.pos:idx],
Groups: []StringRange{
{Begin: idx, End: idx + len(literal)},
},
}
p.pos = idx + len(literal)
return result
}
// stringFindPartialStop finds where a partial string match might stop
// This is used for streaming/partial parsing
func stringFindPartialStop(s, needle string) int {
if len(needle) == 0 || len(s) == 0 {
return -1
}
// Check if s ends with a prefix of needle
for i := len(needle); i > 0; i-- {
if len(s) >= i && s[len(s)-i:] == needle[:i] {
return len(s) - i
}
}
return -1
}
// ConsumeSpaces consumes whitespace characters
func (p *ChatMsgParser) ConsumeSpaces() bool {
consumed := false
for p.pos < len(p.input) && unicode.IsSpace(rune(p.input[p.pos])) {
p.pos++
consumed = true
}
return consumed
}
// AllSpace checks if a string contains only whitespace
func AllSpace(s string) bool {
return strings.TrimSpace(s) == ""
}
// TryConsumeJSON attempts to consume a JSON value from the current position
// Returns the parsed JSON (can be object, array, or any JSON type), whether it's partial,
// and the jsonDumpMarker (non-empty if JSON was healed)
// Matches llama.cpp's try_consume_json() which returns common_json containing any JSON type and healing_marker
func (p *ChatMsgParser) TryConsumeJSON() (any, bool, string, error) {
// Skip whitespace
p.ConsumeSpaces()
if p.pos >= len(p.input) {
return nil, false, "", errors.New("end of input")
}
// Try to parse JSON starting from current position
jsonStart := p.pos
if p.input[p.pos] != '{' && p.input[p.pos] != '[' {
return nil, false, "", errors.New("not a JSON object or array")
}
// Try parsing complete JSON first using decoder to get exact position
// Use any to support objects, arrays, and other JSON types (matching llama.cpp)
decoder := json.NewDecoder(strings.NewReader(p.input[jsonStart:]))
var jsonValue any
if err := decoder.Decode(&jsonValue); err == nil {
// Complete JSON parsed successfully
// Calculate position after JSON using decoder's input offset
p.pos = jsonStart + int(decoder.InputOffset())
return jsonValue, false, "", nil
}
// If parsing failed, try to find where JSON might end
// Find matching brace/bracket
depth := 0
inString := false
escape := false
jsonEnd := -1
for i := p.pos; i < len(p.input); i++ {
ch := p.input[i]
if escape {
escape = false
continue
}
if ch == '\\' {
escape = true
continue
}
if ch == '"' {
inString = !inString
continue
}
if inString {
continue
}
if ch == '{' || ch == '[' {
depth++
} else if ch == '}' || ch == ']' {
depth--
if depth == 0 {
jsonEnd = i + 1
break
}
}
}
if jsonEnd == -1 {
// Incomplete JSON (partial)
if p.isPartial {
// Use stack-based healing matching llama.cpp's implementation
partialInput := p.input[jsonStart:]
healedValue, wasHealed, jsonDumpMarker, err := parseJSONWithStack(partialInput, p.healingMarker)
if err == nil && wasHealed {
// Successfully healed - remove healing marker from result
cleaned := removeHealingMarkerFromJSONAny(healedValue, p.healingMarker)
p.pos = len(p.input)
return cleaned, true, jsonDumpMarker, nil
}
}
return nil, true, "", errors.New("incomplete JSON")
}
// Parse complete JSON
jsonStr := p.input[jsonStart:jsonEnd]
if err := json.Unmarshal([]byte(jsonStr), &jsonValue); err != nil {
return nil, false, "", err
}
p.pos = jsonEnd
return jsonValue, false, "", nil
}
// tryConsumeJSONPrimitive attempts to consume a JSON primitive (null, true, false, or number)
// This is a fallback when TryConsumeJSON fails because it only accepts objects/arrays
// Reference: llama.cpp/common/chat-parser-xml-toolcall.cpp lines 506-520
func (p *ChatMsgParser) tryConsumeJSONPrimitive() (any, bool) {
// Consume spaces first
p.ConsumeSpaces()
if p.pos >= len(p.input) {
return nil, false
}
// Get UTF-8 safe view of remaining input
remaining := p.input[p.pos:]
safeView := utf8TruncateSafeView(remaining)
// Check for null, true, false (minimum 4 chars needed)
if len(safeView) >= 4 {
prefix := safeView
if len(prefix) > 6 {
prefix = prefix[:6]
}
if strings.HasPrefix(prefix, "null") {
// Check if it's complete "null" (followed by space, comma, }, ], or end)
if len(safeView) >= 4 {
if len(safeView) == 4 || isJSONTerminator(safeView[4]) {
p.pos += 4
return nil, false
}
}
} else if strings.HasPrefix(prefix, "true") {
if len(safeView) >= 4 {
if len(safeView) == 4 || isJSONTerminator(safeView[4]) {
p.pos += 4
return true, false
}
}
} else if strings.HasPrefix(prefix, "false") {
if len(safeView) >= 5 {
if len(safeView) == 5 || isJSONTerminator(safeView[5]) {
p.pos += 5
return false, false
}
}
}
}
// Check for number: [0-9-][0-9]*(\.\d*)?([eE][+-]?\d*)?
// Use regex to match number pattern
numberRegex := regexp.MustCompile(`^[0-9-][0-9]*(\.\d*)?([eE][+-]?\d*)?`)
if match := numberRegex.FindString(safeView); match != "" {
// Try to parse as number
var numValue float64
if _, err := fmt.Sscanf(match, "%f", &numValue); err == nil {
// Check if match is followed by a JSON terminator or end of input
if len(safeView) == len(match) || isJSONTerminator(safeView[len(match)]) {
p.pos += len(match)
return numValue, false
}
}
}
return nil, false
}
// isJSONTerminator checks if a character is a valid JSON terminator
func isJSONTerminator(ch byte) bool {
return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' ||
ch == ',' || ch == '}' || ch == ']' || ch == ':' || ch == '<'
}
// utf8TruncateSafeView truncates a string at a safe UTF-8 boundary
// This is a helper function to avoid importing from parse.go
func utf8TruncateSafeView(s string) string {
if len(s) == 0 {
return s
}
// Check if the string ends at a valid UTF-8 boundary
// If not, truncate to the last valid boundary
for i := len(s); i > 0 && i > len(s)-4; i-- {
if utf8.ValidString(s[:i]) {
return s[:i]
}
}
// If we can't find a valid boundary in the last 4 bytes, truncate conservatively
if len(s) > 3 {
return s[:len(s)-3]
}
return ""
}
// isJSONObjectOrArray checks if a value is a JSON object or array
func isJSONObjectOrArray(v any) bool {
switch v.(type) {
case map[string]any, []any:
return true
default:
return false
}
}
// isJSONString checks if a value is a JSON string
func isJSONString(v any) bool {
_, ok := v.(string)
return ok
}
// trimPotentialPartialWord removes partial XML tags from the end of content
// This prevents emitting incomplete tags during streaming
// Reference: llama.cpp/common/chat-parser-xml-toolcall.cpp lines 684-692
func trimPotentialPartialWord(content string, format *XMLToolCallFormat, startThink, endThink string) string {
patterns := []string{
startThink,
endThink,
format.ScopeStart,
format.ToolStart,
format.ToolSep,
format.KeyStart,
format.KeyValSep,
}
if format.KeyValSep2 != nil {
patterns = append(patterns, *format.KeyValSep2)
}
patterns = append(patterns, format.ValEnd)
if format.LastValEnd != nil {
patterns = append(patterns, *format.LastValEnd)
}
patterns = append(patterns, format.ToolEnd)
if format.LastToolEnd != nil {
patterns = append(patterns, *format.LastToolEnd)
}
patterns = append(patterns, format.ScopeEnd)
bestMatch := len(content)
for _, pattern := range patterns {
if len(pattern) == 0 {
continue
}
// Check for suffix matches from end of content backwards
maxStart := len(content) - len(pattern)
if maxStart < 0 {
maxStart = 0
}
for matchIdx := len(content); matchIdx > maxStart; matchIdx-- {
matchLen := len(content) - matchIdx
if matchLen > 0 && matchIdx < len(content) {
// Check if pattern matches as suffix starting at matchIdx
if matchIdx+matchLen <= len(content) {
substr := content[matchIdx : matchIdx+matchLen]
if len(substr) <= len(pattern) && strings.HasPrefix(pattern, substr) {
if matchIdx < bestMatch {
bestMatch = matchIdx
}
}
}
}
}
}
if len(content) > bestMatch {
return content[:bestMatch]
}
return content
}
// removeHealingMarkerFromJSON removes healing markers from a parsed JSON structure (objects only)
func removeHealingMarkerFromJSON(value map[string]any, marker string) map[string]any {
result := make(map[string]any)
for k, v := range value {
if str, ok := v.(string); ok {
if idx := strings.Index(str, marker); idx != -1 {
v = str[:idx]
}
} else if nestedMap, ok := v.(map[string]any); ok {
v = removeHealingMarkerFromJSON(nestedMap, marker)
}
result[k] = v
}
return result
}
// removeHealingMarkerFromJSONAny removes healing markers from any JSON type (objects, arrays, etc.)
func removeHealingMarkerFromJSONAny(value any, marker string) any {
switch v := value.(type) {
case map[string]any:
return removeHealingMarkerFromJSON(v, marker)
case []any:
result := make([]any, len(v))
for i, item := range v {
result[i] = removeHealingMarkerFromJSONAny(item, marker)
}
return result
case string:
if idx := strings.Index(v, marker); idx != -1 {
return v[:idx]
}
return v
default:
return v
}
}
// TryConsumeXMLToolCalls attempts to parse XML tool calls using the iterative parser
// Returns true if tool calls were found and parsed, false otherwise
// Similar to llama.cpp's parse_xml_tool_calls
func (p *ChatMsgParser) TryConsumeXMLToolCalls(format *XMLToolCallFormat) (bool, error) {
if format == nil {
return false, errors.New("format is required")
}
// Handle Functionary format (JSON parameters inside XML tags) - use regex parser
if format.KeyStart == "" && format.ToolStart == "<function=" {
// Fall back to regex-based parser for Functionary format
results, err := parseFunctionaryFormat(p.input[p.pos:], format)
if err != nil || len(results) == 0 {
return false, nil
}
for _, result := range results {
p.AddToolCall(result.Name, "", result.Arguments)
}
return true, nil
}
// Handle JSON-like formats (Apriel-1.5, Xiaomi-MiMo) - use regex parser
if format.ToolStart != "" && strings.Contains(format.ToolStart, "{\"name\"") {
results, err := parseJSONLikeXMLFormat(p.input[p.pos:], format)
if err != nil || len(results) == 0 {
return false, nil
}
for _, result := range results {
p.AddToolCall(result.Name, "", result.Arguments)
}
return true, nil
}
// Validate required fields for standard XML formats
if format.ToolStart == "" || format.KeyStart == "" || format.KeyValSep == "" ||
format.ValEnd == "" || format.ToolEnd == "" {
return false, errors.New("required format fields missing")
}
startPos := p.pos
recovery := true
// Helper to return error with optional recovery
returnError := func(err error, canRecover bool) (bool, error) {
xlog.Debug("Failed to parse XML tool call", "error", err, "position", p.pos)
if canRecover && recovery {
p.MoveTo(startPos)
return false, nil
}
return false, fmt.Errorf("tool call parsing failed with unrecoverable errors: %w", err)
}
// Helper to find val_end or last_val_end
tryFindValEnd := func() (int, *FindLiteralResult) {
savedPos := p.pos
tc := p.TryFindLiteral(format.ValEnd)
valEndSize := len(format.ValEnd)
if format.LastValEnd != nil {
p.MoveTo(savedPos)
tc2 := p.tryFind2LiteralSplitBySpaces(*format.LastValEnd, format.ToolEnd)
if format.LastToolEnd != nil {
p.MoveTo(savedPos)
tc3 := p.tryFind2LiteralSplitBySpaces(*format.LastValEnd, *format.LastToolEnd)
if tc3 != nil && (tc2 == nil || len(tc2.Prelude) > len(tc3.Prelude)) {
tc2 = tc3
}
}
if tc2 != nil && (tc == nil || len(tc.Prelude) > len(tc2.Prelude)) {
tc = tc2
if tc.Groups[0].End > len(p.input) {
tc.Groups[0].End = len(p.input)
}
if tc.Groups[0].Begin+len(*format.LastValEnd) < len(p.input) {
tc.Groups[0].End = tc.Groups[0].Begin + len(*format.LastValEnd)
}
p.MoveTo(tc.Groups[0].End)
valEndSize = len(*format.LastValEnd)
} else {
p.MoveTo(savedPos)
}
}
return valEndSize, tc
}
// Helper to find tool_end or last_tool_end
tryFindToolEnd := func() (int, *FindLiteralResult) {
savedPos := p.pos
tc := p.TryFindLiteral(format.ToolEnd)
toolEndSize := len(format.ToolEnd)
if format.LastToolEnd != nil {
p.MoveTo(savedPos)
tc2 := p.tryFind2LiteralSplitBySpaces(*format.LastToolEnd, format.ScopeEnd)
if tc2 != nil && (tc == nil || len(tc.Prelude) > len(tc2.Prelude)) {
tc = tc2
if tc.Groups[0].End > len(p.input) {
tc.Groups[0].End = len(p.input)
}
if tc.Groups[0].Begin+len(*format.LastToolEnd) < len(p.input) {
tc.Groups[0].End = tc.Groups[0].Begin + len(*format.LastToolEnd)
}
p.MoveTo(tc.Groups[0].End)
toolEndSize = len(*format.LastToolEnd)
} else {
p.MoveTo(savedPos)
}
}
return toolEndSize, tc
}
// Parse multiple scopes (for formats like qwen3-coder that can have multiple <tool_call> blocks)
// Continue parsing until no more scopes are found
for {
// Parse scope_start if present
if format.ScopeStart != "" && !AllSpace(format.ScopeStart) {
tc := p.TryFindLiteral(format.ScopeStart)
if tc == nil {
// No more scopes found, break
break
}
if !AllSpace(tc.Prelude) {
// Non-whitespace before scope_start, stop parsing
p.MoveTo(tc.Groups[0].Begin - len(tc.Prelude))
break
}
// Validate size match (partial detection)
if len(tc.Groups) > 0 {
matchedSize := tc.Groups[0].End - tc.Groups[0].Begin
if matchedSize != len(format.ScopeStart) {
return false, &ChatMsgPartialException{Message: fmt.Sprintf("Partial literal: %s", format.ScopeStart)}
}
}
}
// Parse tool calls within this scope
scopeToolCallsFound := false
for {
tc := p.TryFindLiteral(format.ToolStart)
if tc == nil {
break
}
if !AllSpace(tc.Prelude) {
// Non-whitespace before tool_start, stop parsing
p.MoveTo(tc.Groups[0].Begin - len(tc.Prelude))
break
}
// Find function name
var funcName *FindLiteralResult
if AllSpace(format.ToolSep) {
// GLM 4.5 format: function name is between tool_start and key_start
funcName = p.TryFindLiteral(format.KeyStart)
} else {
// Standard format: function name is between tool_start and tool_sep
funcName = p.TryFindLiteral(format.ToolSep)
}
if funcName == nil {
// Try to find tool_end instead (empty tool call)
_, toolEnd := tryFindToolEnd()
if toolEnd != nil {
// Empty tool call - extract function name from between tool_start and tool_end
nameStart := tc.Groups[0].End
nameEnd := toolEnd.Groups[0].Begin
functionName := ""
if nameEnd > nameStart {
functionName = strings.TrimSpace(p.input[nameStart:nameEnd])
}
argsJSON, _ := json.Marshal(map[string]any{})
p.AddToolCall(functionName, "", string(argsJSON))
recovery = false
continue
}
// Partial tool name not supported
return false, &ChatMsgPartialException{Message: "incomplete tool_call"}
}
// Check if tool_end appears in function name prelude (empty tool call)
functionNamePrelude := funcName.Prelude
if strings.Contains(functionNamePrelude, format.ToolEnd) ||
(format.LastToolEnd != nil && strings.Contains(functionNamePrelude, *format.LastToolEnd)) {
// Empty tool call - function name is empty, tool_end is in the prelude
// Move back to start of tool_start and find tool_end
p.MoveTo(tc.Groups[0].Begin)
_, toolEnd := tryFindToolEnd()
if toolEnd != nil {
// Extract function name from between tool_start and tool_end
nameStart := tc.Groups[0].End
nameEnd := toolEnd.Groups[0].Begin
functionName := ""
if nameEnd > nameStart {
functionName = strings.TrimSpace(p.input[nameStart:nameEnd])
// Remove tool_sep if present
if !AllSpace(format.ToolSep) && strings.HasSuffix(functionName, format.ToolSep) {
functionName = strings.TrimSpace(functionName[:len(functionName)-len(format.ToolSep)])
}
}
argsJSON, _ := json.Marshal(map[string]any{})
p.AddToolCall(functionName, "", string(argsJSON))
recovery = false
continue
}
}
// Extract function name from prelude
// Move to appropriate position based on format
if AllSpace(format.ToolSep) {
// GLM 4.5 format: function name is on a separate line after tool_start, before key_start
// The prelude contains the function name
p.MoveTo(funcName.Groups[0].Begin)
} else {
// Standard format: function name is before tool_sep
p.MoveTo(funcName.Groups[0].End)
}
functionName := strings.TrimSpace(funcName.Prelude)
// Handle Kimi-K2 function name stripping
if strings.HasPrefix(functionName, "functions.") {
functionName = functionName[10:]
if idx := strings.LastIndex(functionName, ":"); idx != -1 {
suffix := functionName[idx+1:]
allDigits := true
for _, r := range suffix {
if r < '0' || r > '9' {
allDigits = false
break
}
}
if allDigits {
functionName = functionName[:idx]
}
}
}
// Parse arguments
arguments := make(map[string]any)
for {
keyStart := p.TryFindLiteral(format.KeyStart)
if keyStart == nil {
break
}
if !AllSpace(keyStart.Prelude) {
// Non-whitespace before key_start, stop parsing parameters
p.MoveTo(keyStart.Groups[0].Begin - len(keyStart.Prelude))
break
}
// Validate size match (partial detection)
if len(keyStart.Groups) > 0 {
matchedSize := keyStart.Groups[0].End - keyStart.Groups[0].Begin
if matchedSize != len(format.KeyStart) {
// Partial key_start, emit tool call with current args
argsJSON, _ := json.Marshal(arguments)
if len(argsJSON) > 0 && argsJSON[len(argsJSON)-1] == '}' {
argsJSON = argsJSON[:len(argsJSON)-1]
}
p.AddToolCall(functionName, "", string(argsJSON))
return false, &ChatMsgPartialException{Message: fmt.Sprintf("Partial literal: %s", format.KeyStart)}
}
}
// Find key_val_sep
keyValSep := p.TryFindLiteral(format.KeyValSep)
if keyValSep == nil {
// Generate partial args
rest := p.ConsumeRest()
arguments[rest+"XML_TOOL_CALL_PARTIAL_FLAG"] = ""
argsJSON, _ := json.Marshal(arguments)
toolStr := string(argsJSON)
if cleaned, isPartial := partialJSON(toolStr); isPartial {
p.AddToolCall(functionName, "", cleaned)
} else {
p.AddToolCall(functionName, "", toolStr)
}
return false, &ChatMsgPartialException{
Message: fmt.Sprintf("Expected %s after %s", format.KeyValSep, format.KeyStart),
}
}
// Validate size match
if len(keyValSep.Groups) > 0 {
matchedSize := keyValSep.Groups[0].End - keyValSep.Groups[0].Begin
if matchedSize != len(format.KeyValSep) {
// Partial key_val_sep
rest := keyValSep.Prelude
arguments[rest+"XML_TOOL_CALL_PARTIAL_FLAG"] = ""
argsJSON, _ := json.Marshal(arguments)
toolStr := string(argsJSON)
if cleaned, isPartial := partialJSON(toolStr); isPartial {
p.AddToolCall(functionName, "", cleaned)
} else {
p.AddToolCall(functionName, "", toolStr)
}
return false, &ChatMsgPartialException{Message: fmt.Sprintf("Partial literal: %s", format.KeyValSep)}
}
}
key := strings.TrimSpace(keyValSep.Prelude)
recovery = false
// Handle key_val_sep2 if present (GLM 4.5 format)
// For GLM 4.5, key_val_sep2 is "</arg_key>\n<arg_value>"
// We need to consume it but it's optional - if not found, the value might be empty
if format.KeyValSep2 != nil {
// Try to consume it, but don't fail if not found (might be empty value)
p.TryConsumeLiteral(*format.KeyValSep2)
}
// Save position before attempting JSON parsing
// Reference: llama.cpp/common/chat-parser-xml-toolcall.cpp lines 499-555
valStart := p.pos
// Try to parse JSON first (if raw_argval is false/null)
// This matches llama.cpp's approach: try JSON before finding val_end
var jsonValue any
var jsonHealingMarker string
jsonParsed := false
if format.RawArgVal == nil || !*format.RawArgVal {
// Try JSON parsing (objects/arrays)
jsonVal, _, jsonDumpMarker, err := p.TryConsumeJSON()
if err == nil {
jsonValue = jsonVal
jsonHealingMarker = jsonDumpMarker
jsonParsed = true
} else {
// Try primitive fallback (null, true, false, numbers)
primitiveVal, found := p.tryConsumeJSONPrimitive()
if found {
jsonValue = primitiveVal
jsonParsed = true
} else {
// Reset position if JSON parsing failed
p.MoveTo(valStart)
}
}
}
// If JSON was parsed, check if val_end follows
if jsonParsed {
jsonEnd := p.pos
p.ConsumeSpaces()
// Check if at end of input (partial case)
if p.pos >= len(p.input) {
// Partial JSON - handle based on format and JSON type
if format.RawArgVal != nil && !*format.RawArgVal {
// raw_argval is false - only JSON allowed
if isJSONObjectOrArray(jsonValue) || isJSONString(jsonValue) {
arguments[key] = jsonValue
argsJSON, _ := json.Marshal(arguments)
toolStr := string(argsJSON)
// Use jsonDumpMarker to cut precisely (matching llama.cpp lines 532-538)
if jsonHealingMarker != "" {
// Find jsonDumpMarker in the JSON string and cut there
// Matching llama.cpp: GGML_ASSERT(std::string::npos != json_str.rfind(...))
idx := strings.LastIndex(toolStr, jsonHealingMarker)
if idx != -1 {
toolStr = toolStr[:idx]
} else {
// Marker should always be found if it was returned from parseJSONWithStack
// Log warning but continue with fallback
jsonPreview := toolStr
if len(jsonPreview) > 100 {
jsonPreview = jsonPreview[:100]
}
xlog.Debug("jsonDumpMarker not found in JSON string, using fallback", "marker", jsonHealingMarker, "json", jsonPreview)
// Fallback: remove trailing } if present
if len(toolStr) > 0 && toolStr[len(toolStr)-1] == '}' {
toolStr = toolStr[:len(toolStr)-1]
}
}
} else {
// Remove trailing } if present (matching llama.cpp line 537)
if len(toolStr) > 0 && toolStr[len(toolStr)-1] == '}' {
toolStr = toolStr[:len(toolStr)-1]
}
}
p.AddToolCall(functionName, "", toolStr)
return false, &ChatMsgPartialException{
Message: "JSON arg_value detected. Waiting for more tokens for validations.",
}
}
}
// Generate partial args
genPartialArgs := func(needle string) {
arguments[key] = needle
argsJSON, _ := json.Marshal(arguments)
toolStr := string(argsJSON)
if cleaned, isPartial := partialJSON(toolStr); isPartial {
p.AddToolCall(functionName, "", cleaned)
} else {
p.AddToolCall(functionName, "", toolStr)
}
}
genPartialArgs("XML_TOOL_CALL_PARTIAL_FLAG")
return false, &ChatMsgPartialException{
Message: "JSON arg_value detected. Waiting for more tokens for validations.",
}
}
// Rewind to json_end and check if val_end follows
p.MoveTo(jsonEnd)
valEndSize, valEnd := tryFindValEnd()
if valEnd != nil && AllSpace(valEnd.Prelude) && jsonHealingMarker == "" {
// val_end follows JSON
if len(valEnd.Groups) > 0 {
matchedSize := valEnd.Groups[0].End - valEnd.Groups[0].Begin
if matchedSize == valEndSize {
// Complete val_end - use JSON value
arguments[key] = jsonValue
} else {
// Partial val_end
genPartialArgs := func(needle string) {
arguments[key] = needle
argsJSON, _ := json.Marshal(arguments)
toolStr := string(argsJSON)
if cleaned, isPartial := partialJSON(toolStr); isPartial {
p.AddToolCall(functionName, "", cleaned)
} else {
p.AddToolCall(functionName, "", toolStr)
}
}
genPartialArgs("XML_TOOL_CALL_PARTIAL_FLAG")
return false, &ChatMsgPartialException{
Message: fmt.Sprintf("Partial literal: %s", format.ValEnd),
}
}
}
} else {
// val_end doesn't follow - rewind and parse as text
p.MoveTo(valStart)
jsonParsed = false
}
}
// If JSON wasn't parsed or val_end didn't follow, parse as plain text
if !jsonParsed {
valEndSize, valEnd := tryFindValEnd()
if valEnd == nil {
// Partial value
rest := p.ConsumeRest()
if format.TrimRawArgVal {
rest = strings.TrimSpace(rest)
}
arguments[key] = rest + "XML_TOOL_CALL_PARTIAL_FLAG"
argsJSON, _ := json.Marshal(arguments)
toolStr := string(argsJSON)
if cleaned, isPartial := partialJSON(toolStr); isPartial {
p.AddToolCall(functionName, "", cleaned)
} else {
p.AddToolCall(functionName, "", toolStr)
}
return false, &ChatMsgPartialException{
Message: fmt.Sprintf("Expected %s after %s", format.ValEnd, format.KeyValSep),
}
}
// Validate size match
if len(valEnd.Groups) > 0 {
matchedSize := valEnd.Groups[0].End - valEnd.Groups[0].Begin
if matchedSize != valEndSize {
// Partial val_end
rest := valEnd.Prelude
if format.TrimRawArgVal {
rest = strings.TrimSpace(rest)
}
arguments[key] = rest + "XML_TOOL_CALL_PARTIAL_FLAG"
argsJSON, _ := json.Marshal(arguments)
toolStr := string(argsJSON)
if cleaned, isPartial := partialJSON(toolStr); isPartial {
p.AddToolCall(functionName, "", cleaned)
} else {
p.AddToolCall(functionName, "", toolStr)
}
return false, &ChatMsgPartialException{Message: fmt.Sprintf("Partial literal: %s", format.ValEnd)}
}
}
// Parse value using parseParameterValue to match regex parser behavior
// This handles JSON-first parsing correctly for text fallback
valueStr := strings.TrimSpace(valEnd.Prelude)
value := parseParameterValue(valueStr, format)
arguments[key] = value
}
}
// Find tool_end
toolEndSize, toolEnd := tryFindToolEnd()
if toolEnd == nil {
// Partial tool call
argsJSON, _ := json.Marshal(arguments)
toolStr := string(argsJSON)
if len(toolStr) > 0 && toolStr[len(toolStr)-1] == '}' {
toolStr = toolStr[:len(toolStr)-1]
}
p.AddToolCall(functionName, "", toolStr)
return false, &ChatMsgPartialException{Message: "incomplete tool_call"}
}
if !AllSpace(toolEnd.Prelude) {
return returnError(errors.New("non-whitespace before tool_end"), recovery)
}
// Validate size match
if len(toolEnd.Groups) > 0 {
matchedSize := toolEnd.Groups[0].End - toolEnd.Groups[0].Begin
if matchedSize == toolEndSize {
// Complete tool call
argsJSON, _ := json.Marshal(arguments)
if !p.AddToolCall(functionName, "", string(argsJSON)) {
return false, &ChatMsgPartialException{Message: "Failed to add XML tool call"}
}
recovery = false
continue
}
}
// Partial tool_end
argsJSON, _ := json.Marshal(arguments)
toolStr := string(argsJSON)
if len(toolStr) > 0 && toolStr[len(toolStr)-1] == '}' {
toolStr = toolStr[:len(toolStr)-1]
}
p.AddToolCall(functionName, "", toolStr)
return false, &ChatMsgPartialException{Message: "incomplete tool_call"}
}
// Parse scope_end if present (for this scope)
if format.ScopeEnd != "" {
tc := p.TryFindLiteral(format.ScopeEnd)
if tc == nil {
// Expected scope_end but not found
if !p.isPartial {
// If we found tool calls in this scope, it's okay to not have scope_end
// (might be multiple scopes or incomplete)
if !scopeToolCallsFound {
return returnError(errors.New("expected scope_end"), recovery)
}
break
}
break
} else if !AllSpace(tc.Prelude) {
// Non-whitespace before scope_end - this might be another scope_start
// Check if it's actually another scope_start
if format.ScopeStart != "" {
// Check if the non-whitespace is actually another scope_start
testPos := tc.Groups[0].Begin - len(tc.Prelude)
if testPos >= 0 && testPos < len(p.input) {
testInput := p.input[testPos:]
if strings.HasPrefix(testInput, format.ScopeStart) {
// It's another scope_start, break to continue outer loop
p.MoveTo(testPos)
break
}
}
}
return returnError(errors.New("non-whitespace before scope_end"), recovery)
}
// Successfully found scope_end, continue to next scope if any
scopeToolCallsFound = true
} else {
// No scope_end defined, we're done after parsing tool calls
break
}
}
return len(p.toolCalls) > 0, nil
}
// ParseMsgWithXMLToolCalls parses content with reasoning blocks and XML tool calls
// This matches llama.cpp's parse_msg_with_xml_tool_calls function
// Reference: llama.cpp/common/chat-parser-xml-toolcall.cpp lines 654-872
func (p *ChatMsgParser) ParseMsgWithXMLToolCalls(format *XMLToolCallFormat, startThink, endThink string) error {
if format == nil {
return errors.New("format is required")
}
// Default reasoning tags if not provided
if startThink == "" {
startThink = "<think>"
}
if endThink == "" {
endThink = "</think>"
}
// Trim leading spaces without affecting keyword matching
p.ConsumeSpaces()
// Parse content
reasoningUnclosed := false // TODO: support thinking_forced_open from syntax
unclosedReasoningContent := ""
for {
// Find scope_start + tool_start using tryFind2LiteralSplitBySpaces
tc := p.tryFind2LiteralSplitBySpaces(format.ScopeStart, format.ToolStart)
var content string
var toolCallStart string
if tc != nil {
content = tc.Prelude
toolCallStart = p.Str(tc.Groups[0])
} else {
content = p.ConsumeRest()
content = utf8TruncateSafeView(content)
}
// Handle unclosed think block
if reasoningUnclosed {
pos := strings.Index(content, endThink)
if pos == -1 && p.pos != len(p.input) {
unclosedReasoningContent += content
if !(format.AllowToolcallInThink && tc != nil) {
unclosedReasoningContent += toolCallStart
continue
}
} else {
reasoningUnclosed = false
var reasoningContent string
if pos == -1 {
reasoningContent = content
content = ""
} else {
reasoningContent = content[:pos]
content = content[pos+len(endThink):]
}
if p.pos == len(p.input) && AllSpace(content) {
reasoningContent = rstrip(reasoningContent)
reasoningContent = trimPotentialPartialWord(reasoningContent, format, startThink, endThink)
reasoningContent = rstrip(reasoningContent)
if reasoningContent == "" {
unclosedReasoningContent = rstrip(unclosedReasoningContent)
unclosedReasoningContent = trimPotentialPartialWord(unclosedReasoningContent, format, startThink, endThink)
unclosedReasoningContent = rstrip(unclosedReasoningContent)
if unclosedReasoningContent == "" {
continue
}
}
}
// TODO: Handle reasoning_format and reasoning_in_content from syntax
// For now, always add to reasoning content
p.AddReasoningContent(unclosedReasoningContent)
p.AddReasoningContent(reasoningContent)
unclosedReasoningContent = ""
}
}
// Handle multiple think blocks
toolcallInThink := false
thinkStart := strings.Index(content, startThink)
for thinkStart != -1 {
thinkEnd := strings.Index(content[thinkStart+len(startThink):], endThink)
if thinkEnd != -1 {
thinkEnd += thinkStart + len(startThink)
// Extract reasoning content
reasoningContent := content[thinkStart+len(startThink) : thinkEnd]
p.AddReasoningContent(reasoningContent)
// Erase the reasoning block from content
content, _ = eraseSpaces(content, thinkStart, thinkEnd+len(endThink)-1)
thinkStart = strings.Index(content, startThink)
} else {
// Unclosed reasoning block
if format.AllowToolcallInThink {
unclosedReasoningContent = content[thinkStart+len(startThink):]
} else {
unclosedReasoningContent = content[thinkStart+len(startThink):] + toolCallStart
}
reasoningUnclosed = true
content = content[:thinkStart]
toolcallInThink = true
break
}
}
// TODO: Handle reasoning_format and reasoning_in_content
// For now, strip content and handle unclosed end_think tokens
content = rstrip(content)
pos := strings.LastIndex(content, endThink)
for pos != -1 {
content, pos = eraseSpaces(content, pos, pos+len(endThink)-1)
pos = strings.LastIndex(content, endThink)
}
// Strip leading whitespace if needed
content = strings.TrimLeftFunc(content, unicode.IsSpace)
// Remove potential partial suffix
if p.pos == len(p.input) {
if unclosedReasoningContent == "" {
content = rstrip(content)
content = trimPotentialPartialWord(content, format, startThink, endThink)
content = rstrip(content)
} else {
unclosedReasoningContent = rstrip(unclosedReasoningContent)
unclosedReasoningContent = trimPotentialPartialWord(unclosedReasoningContent, format, startThink, endThink)
unclosedReasoningContent = rstrip(unclosedReasoningContent)
}
}
// Consume unclosed_reasoning_content if allow_toolcall_in_think is set
if format.AllowToolcallInThink && unclosedReasoningContent != "" {
// TODO: Handle reasoning_format
p.AddReasoningContent(unclosedReasoningContent)
unclosedReasoningContent = ""
}
// Add content
if content != "" {
// TODO: Handle reasoning_format for multiple content blocks
if p.content.Len() > 0 {
p.AddContent("\n\n")
}
p.AddContent(content)
}
// Skip tool call if it's in thinking block and allow_toolcall_in_think is not set
if toolcallInThink && !format.AllowToolcallInThink {
continue
}
// No tool call found, break
if tc == nil {
break
}
// Parse tool calls
p.MoveTo(tc.Groups[0].Begin)
success, err := p.TryConsumeXMLToolCalls(format)
if err != nil {
// Check if it's a partial exception
if _, ok := err.(*ChatMsgPartialException); ok {
// Partial parse, continue
continue
}
return err
}
if success {
endOfTool := p.pos
p.ConsumeSpaces()
if p.pos != len(p.input) {
p.MoveTo(endOfTool)
if p.content.Len() > 0 {
p.AddContent("\n\n")
}
}
} else {
// Tool call parsing failed, add next character as content
if p.pos < len(p.input) {
nextChar := string(p.input[p.pos])
nextChar = rstrip(nextChar)
p.AddContent(nextChar)
p.pos++
}
}
}
return nil
}
// tryFind2LiteralSplitBySpaces finds two literals separated by spaces
func (p *ChatMsgParser) tryFind2LiteralSplitBySpaces(literal1, literal2 string) *FindLiteralResult {
savedPos := p.pos
// Try to find first literal
tc1 := p.TryFindLiteral(literal1)
if tc1 == nil {
p.MoveTo(savedPos)
return nil
}
// Consume spaces
p.ConsumeSpaces()
// Try to find second literal
tc2 := p.TryFindLiteral(literal2)
if tc2 == nil {
p.MoveTo(savedPos)
return nil
}
// Combine results - extract the text between the two literals
betweenText := p.input[tc1.Groups[0].End:tc2.Groups[0].Begin]
return &FindLiteralResult{
Prelude: tc1.Prelude + strings.TrimSpace(betweenText) + tc2.Prelude,
Groups: []StringRange{
{Begin: tc1.Groups[0].Begin, End: tc2.Groups[0].End},
},
}
}