mirror of
https://github.com/mudler/LocalAI.git
synced 2026-01-22 05:11:21 -05:00
* feat(openresponses): support reasoning blocks Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * allow to disable reasoning, refactor common logic Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Add option to only strip reasoning Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Add configurations for custom reasoning tokens Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
259 lines
8.8 KiB
Go
259 lines
8.8 KiB
Go
package reasoning
|
|
|
|
import (
|
|
"strings"
|
|
)
|
|
|
|
// DetectThinkingStartToken checks if the prompt or template contains a thinking start token
|
|
// and returns the detected token. This indicates that the model's prompt template
|
|
// already includes the thinking token, so the model output will start with reasoning
|
|
// content without an explicit opening tag.
|
|
// Returns the detected token if found, empty string otherwise.
|
|
// Common tokens checked (in order of specificity - longer first):
|
|
// Based on llama.cpp's chat-parser.cpp implementations:
|
|
// - <|START_THINKING|> (Command-R models)
|
|
// - <|inner_prefix|> (Apertus models)
|
|
// - <seed:think> (Seed models)
|
|
// - <think> (DeepSeek, Granite, ExaOne models)
|
|
// - <|think|> (Solar Open models)
|
|
// - <thinking> (General thinking tag)
|
|
// - [THINK] (Magistral models)
|
|
// Custom tokens from config are checked first, then default tokens.
|
|
func DetectThinkingStartToken(prompt string, config *Config) string {
|
|
// Common thinking start tokens (in order of specificity - longer first)
|
|
// Based on llama.cpp's chat-parser.cpp implementations
|
|
defaultTokens := []string{
|
|
"<|START_THINKING|>", // Command-R models
|
|
"<|inner_prefix|>", // Apertus models
|
|
"<seed:think>", // Seed models
|
|
"<think>", // DeepSeek, Granite, ExaOne models
|
|
"<|think|>", // Solar Open models
|
|
"<thinking>", // General thinking tag
|
|
"[THINK]", // Magistral models
|
|
}
|
|
|
|
// Merge custom tokens with default tokens (custom tokens first for priority)
|
|
var thinkingStartTokens []string
|
|
if config != nil && len(config.ThinkingStartTokens) > 0 {
|
|
thinkingStartTokens = append(thinkingStartTokens, config.ThinkingStartTokens...)
|
|
}
|
|
thinkingStartTokens = append(thinkingStartTokens, defaultTokens...)
|
|
|
|
// Check if prompt ends with any of these tokens (allowing for trailing whitespace/newlines)
|
|
trimmedPrompt := strings.TrimRight(prompt, " \t\n\r")
|
|
for _, token := range thinkingStartTokens {
|
|
if strings.Contains(trimmedPrompt, token) {
|
|
return token
|
|
}
|
|
}
|
|
|
|
// Also check if any of these tokens appear near the end (within last 100 chars)
|
|
// This handles cases where there might be stop tokens or other content after
|
|
if len(trimmedPrompt) > 100 {
|
|
lastPart := trimmedPrompt[len(trimmedPrompt)-100:]
|
|
for _, token := range thinkingStartTokens {
|
|
if idx := strings.LastIndex(lastPart, token); idx != -1 {
|
|
// Check if this is the last meaningful content (only whitespace after)
|
|
afterToken := lastPart[idx+len(token):]
|
|
if strings.TrimSpace(afterToken) == "" {
|
|
return token
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return ""
|
|
}
|
|
|
|
// ExtractReasoningWithConfig extracts reasoning from content with the given config.
|
|
// If reasoning is disabled, it returns the original content.
|
|
// If thinking start token prefill is enabled, it prepends the thinking start token to the content.
|
|
// It returns the extracted reasoning and the cleaned content.
|
|
func ExtractReasoningWithConfig(content, thinkingStartToken string, config Config) (reasoning string, cleanedContent string) {
|
|
cleanedContent = content
|
|
// If reasoning is not disabled, prepend the thinking start token if needed and extract reasoning
|
|
if config.DisableReasoning == nil || !*config.DisableReasoning {
|
|
// If thinking start token prefill is not disabled, prepend the thinking start token
|
|
if config.DisableReasoningTagPrefill == nil || !*config.DisableReasoningTagPrefill {
|
|
cleanedContent = PrependThinkingTokenIfNeeded(cleanedContent, thinkingStartToken)
|
|
}
|
|
// Extract reasoning from the cleaned content
|
|
reasoning, cleanedContent = ExtractReasoning(cleanedContent, &config)
|
|
if config.StripReasoningOnly != nil && *config.StripReasoningOnly {
|
|
reasoning = ""
|
|
}
|
|
}
|
|
|
|
return reasoning, cleanedContent
|
|
}
|
|
|
|
// PrependThinkingTokenIfNeeded prepends the thinking start token to content if it was
|
|
// detected in the prompt. This allows the standard extraction logic to work correctly
|
|
// for models where the thinking token is already in the prompt.
|
|
func PrependThinkingTokenIfNeeded(content string, startToken string) string {
|
|
if startToken == "" {
|
|
return content
|
|
}
|
|
|
|
// Check if content already starts with the token (allowing for leading whitespace)
|
|
trimmed := strings.TrimLeftFunc(content, func(r rune) bool {
|
|
return r == ' ' || r == '\t' || r == '\n' || r == '\r'
|
|
})
|
|
|
|
// If content already starts with the token, don't prepend
|
|
if strings.Contains(trimmed, startToken) {
|
|
return content
|
|
}
|
|
|
|
// Find where leading whitespace ends
|
|
whitespaceEnd := 0
|
|
for whitespaceEnd < len(content) {
|
|
r := content[whitespaceEnd]
|
|
if r != ' ' && r != '\t' && r != '\n' && r != '\r' {
|
|
break
|
|
}
|
|
whitespaceEnd++
|
|
}
|
|
|
|
// Prepend the token after whitespace to make it look like normal tagged content
|
|
if whitespaceEnd > 0 {
|
|
return content[:whitespaceEnd] + startToken + content[whitespaceEnd:]
|
|
}
|
|
return startToken + content
|
|
}
|
|
|
|
// ExtractReasoning extracts reasoning content from thinking tags and returns
|
|
// both the extracted reasoning and the cleaned content (with tags removed).
|
|
// It handles <thinking>...</thinking> and <think>...</think> tags.
|
|
// Multiple reasoning blocks are concatenated with newlines.
|
|
// Custom tag pairs from config are checked first, then default tag pairs.
|
|
func ExtractReasoning(content string, config *Config) (reasoning string, cleanedContent string) {
|
|
if content == "" {
|
|
return "", content
|
|
}
|
|
|
|
var reasoningParts []string
|
|
var cleanedParts []string
|
|
remaining := content
|
|
|
|
// Define default tag pairs to look for (matching llama.cpp's chat-parser.cpp)
|
|
defaultTagPairs := []struct {
|
|
start string
|
|
end string
|
|
}{
|
|
{"<|START_THINKING|>", "<|END_THINKING|>"}, // Command-R models
|
|
{"<|inner_prefix|>", "<|inner_suffix|>"}, // Apertus models
|
|
{"<seed:think>", "</seed:think>"}, // Seed models
|
|
{"<think>", "</think>"}, // DeepSeek, Granite, ExaOne models
|
|
{"<|think|>", "<|end|><|begin|>assistant<|content|>"}, // Solar Open models (complex end)
|
|
{"<thinking>", "</thinking>"}, // General thinking tag
|
|
{"[THINK]", "[/THINK]"}, // Magistral models
|
|
}
|
|
|
|
// Merge custom tag pairs with default tag pairs (custom pairs first for priority)
|
|
var tagPairs []struct {
|
|
start string
|
|
end string
|
|
}
|
|
if config != nil && len(config.TagPairs) > 0 {
|
|
for _, pair := range config.TagPairs {
|
|
if pair.Start != "" && pair.End != "" {
|
|
tagPairs = append(tagPairs, struct {
|
|
start string
|
|
end string
|
|
}{pair.Start, pair.End})
|
|
}
|
|
}
|
|
}
|
|
// Add default tag pairs
|
|
for _, pair := range defaultTagPairs {
|
|
tagPairs = append(tagPairs, pair)
|
|
}
|
|
|
|
// Track the last position we've processed
|
|
lastPos := 0
|
|
|
|
for {
|
|
// Find the earliest tag start
|
|
earliestStart := -1
|
|
earliestEnd := -1
|
|
isUnclosed := false
|
|
var matchedTag struct {
|
|
start string
|
|
end string
|
|
}
|
|
|
|
for _, tagPair := range tagPairs {
|
|
startIdx := strings.Index(remaining[lastPos:], tagPair.start)
|
|
if startIdx == -1 {
|
|
continue
|
|
}
|
|
startIdx += lastPos
|
|
|
|
// Find the corresponding end tag
|
|
endIdx := strings.Index(remaining[startIdx+len(tagPair.start):], tagPair.end)
|
|
if endIdx == -1 {
|
|
// Unclosed tag - extract what we have
|
|
if earliestStart == -1 || startIdx < earliestStart {
|
|
earliestStart = startIdx
|
|
earliestEnd = len(remaining)
|
|
isUnclosed = true
|
|
matchedTag = tagPair
|
|
}
|
|
continue
|
|
}
|
|
endIdx += startIdx + len(tagPair.start)
|
|
|
|
// Found a complete tag pair
|
|
if earliestStart == -1 || startIdx < earliestStart {
|
|
earliestStart = startIdx
|
|
earliestEnd = endIdx + len(tagPair.end)
|
|
isUnclosed = false
|
|
matchedTag = tagPair
|
|
}
|
|
}
|
|
|
|
if earliestStart == -1 {
|
|
// No more tags found, add remaining content
|
|
if lastPos < len(remaining) {
|
|
cleanedParts = append(cleanedParts, remaining[lastPos:])
|
|
}
|
|
break
|
|
}
|
|
|
|
// Add content before the tag
|
|
if earliestStart > lastPos {
|
|
cleanedParts = append(cleanedParts, remaining[lastPos:earliestStart])
|
|
}
|
|
|
|
// Extract reasoning content
|
|
reasoningStart := earliestStart + len(matchedTag.start)
|
|
// For unclosed tags, earliestEnd is already at the end of the string
|
|
// For closed tags, earliestEnd points to after the closing tag, so we subtract the end tag length
|
|
var reasoningEnd int
|
|
if isUnclosed {
|
|
// Unclosed tag - extract everything to the end
|
|
reasoningEnd = len(remaining)
|
|
} else {
|
|
// Closed tag - exclude the end tag
|
|
reasoningEnd = earliestEnd - len(matchedTag.end)
|
|
}
|
|
if reasoningEnd > reasoningStart {
|
|
reasoningContent := strings.TrimSpace(remaining[reasoningStart:reasoningEnd])
|
|
if reasoningContent != "" {
|
|
reasoningParts = append(reasoningParts, reasoningContent)
|
|
}
|
|
}
|
|
|
|
// Move past this tag
|
|
lastPos = earliestEnd
|
|
}
|
|
|
|
// Combine reasoning parts
|
|
reasoning = strings.Join(reasoningParts, "\n\n")
|
|
// Combine cleaned content parts
|
|
cleanedContent = strings.Join(cleanedParts, "")
|
|
|
|
return reasoning, cleanedContent
|
|
}
|