feat(openresponses): Support reasoning blocks (#8133)

* feat(openresponses): support reasoning blocks Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * allow to disable reasoning, refactor common logic Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Add option to only strip reasoning Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Add configurations for custom reasoning tokens Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-01-29 00:32:44 -05:00 · 2026-01-21 00:11:45 +01:00
parent 34e054f607
commit c491c6ca90
7 changed files with 1289 additions and 155 deletions
--- a/pkg/reasoning/config.go
+++ b/pkg/reasoning/config.go
@@ -1,5 +1,15 @@
 package reasoning

-type Config struct {
-	DisableReasoningTagPrefill *bool `yaml:"disable_reasoning_tag_prefill,omitempty" json:"disable_reasoning_tag_prefill,omitempty"`
+// TagPair represents a start/end tag pair for reasoning extraction
+type TagPair struct {
+	Start string `yaml:"start" json:"start"`
+	End   string `yaml:"end" json:"end"`
+}
+
+type Config struct {
+	DisableReasoningTagPrefill *bool     `yaml:"disable_reasoning_tag_prefill,omitempty" json:"disable_reasoning_tag_prefill,omitempty"`
+	DisableReasoning           *bool     `yaml:"disable,omitempty" json:"disable,omitempty"`
+	StripReasoningOnly         *bool     `yaml:"strip_reasoning_only,omitempty" json:"strip_reasoning_only,omitempty"`
+	ThinkingStartTokens        []string  `yaml:"thinking_start_tokens,omitempty" json:"thinking_start_tokens,omitempty"`
+	TagPairs                   []TagPair `yaml:"tag_pairs,omitempty" json:"tag_pairs,omitempty"`
 }
--- a/pkg/reasoning/reasoning.go
+++ b/pkg/reasoning/reasoning.go
@@ -17,12 +17,12 @@ import (
 // - <think>    (DeepSeek, Granite, ExaOne models)
 // - <|think|>               (Solar Open models)
 // - <thinking>              (General thinking tag)
-// - <think>                 (GLM models)
 // - [THINK]                 (Magistral models)
-func DetectThinkingStartToken(prompt string) string {
+// Custom tokens from config are checked first, then default tokens.
+func DetectThinkingStartToken(prompt string, config *Config) string {
 	// Common thinking start tokens (in order of specificity - longer first)
 	// Based on llama.cpp's chat-parser.cpp implementations
-	thinkingStartTokens := []string{
+	defaultTokens := []string{
 		"<|START_THINKING|>", // Command-R models
 		"<|inner_prefix|>",   // Apertus models
 		"<seed:think>",       // Seed models
@@ -32,6 +32,13 @@ func DetectThinkingStartToken(prompt string) string {
 		"[THINK]",            // Magistral models
 	}

+	// Merge custom tokens with default tokens (custom tokens first for priority)
+	var thinkingStartTokens []string
+	if config != nil && len(config.ThinkingStartTokens) > 0 {
+		thinkingStartTokens = append(thinkingStartTokens, config.ThinkingStartTokens...)
+	}
+	thinkingStartTokens = append(thinkingStartTokens, defaultTokens...)
+
 	// Check if prompt ends with any of these tokens (allowing for trailing whitespace/newlines)
 	trimmedPrompt := strings.TrimRight(prompt, " \t\n\r")
 	for _, token := range thinkingStartTokens {
@@ -58,6 +65,28 @@ func DetectThinkingStartToken(prompt string) string {
 	return ""
 }

+// ExtractReasoningWithConfig extracts reasoning from content with the given config.
+// If reasoning is disabled, it returns the original content.
+// If thinking start token prefill is enabled, it prepends the thinking start token to the content.
+// It returns the extracted reasoning and the cleaned content.
+func ExtractReasoningWithConfig(content, thinkingStartToken string, config Config) (reasoning string, cleanedContent string) {
+	cleanedContent = content
+	// If reasoning is not disabled, prepend the thinking start token if needed and extract reasoning
+	if config.DisableReasoning == nil || !*config.DisableReasoning {
+		// If thinking start token prefill is not disabled, prepend the thinking start token
+		if config.DisableReasoningTagPrefill == nil || !*config.DisableReasoningTagPrefill {
+			cleanedContent = PrependThinkingTokenIfNeeded(cleanedContent, thinkingStartToken)
+		}
+		// Extract reasoning from the cleaned content
+		reasoning, cleanedContent = ExtractReasoning(cleanedContent, &config)
+		if config.StripReasoningOnly != nil && *config.StripReasoningOnly {
+			reasoning = ""
+		}
+	}
+
+	return reasoning, cleanedContent
+}
+
 // PrependThinkingTokenIfNeeded prepends the thinking start token to content if it was
 // detected in the prompt. This allows the standard extraction logic to work correctly
 // for models where the thinking token is already in the prompt.
@@ -97,7 +126,8 @@ func PrependThinkingTokenIfNeeded(content string, startToken string) string {
 // both the extracted reasoning and the cleaned content (with tags removed).
 // It handles <thinking>...</thinking> and <think>...</think> tags.
 // Multiple reasoning blocks are concatenated with newlines.
-func ExtractReasoning(content string) (reasoning string, cleanedContent string) {
+// Custom tag pairs from config are checked first, then default tag pairs.
+func ExtractReasoning(content string, config *Config) (reasoning string, cleanedContent string) {
 	if content == "" {
 		return "", content
 	}
@@ -106,8 +136,8 @@ func ExtractReasoning(content string) (reasoning string, cleanedContent string)
 	var cleanedParts []string
 	remaining := content

-	// Define tag pairs to look for (matching llama.cpp's chat-parser.cpp)
-	tagPairs := []struct {
+	// Define default tag pairs to look for (matching llama.cpp's chat-parser.cpp)
+	defaultTagPairs := []struct {
 		start string
 		end   string
 	}{
@@ -120,6 +150,26 @@ func ExtractReasoning(content string) (reasoning string, cleanedContent string)
 		{"[THINK]", "[/THINK]"},                               // Magistral models
 	}

+	// Merge custom tag pairs with default tag pairs (custom pairs first for priority)
+	var tagPairs []struct {
+		start string
+		end   string
+	}
+	if config != nil && len(config.TagPairs) > 0 {
+		for _, pair := range config.TagPairs {
+			if pair.Start != "" && pair.End != "" {
+				tagPairs = append(tagPairs, struct {
+					start string
+					end   string
+				}{pair.Start, pair.End})
+			}
+		}
+	}
+	// Add default tag pairs
+	for _, pair := range defaultTagPairs {
+		tagPairs = append(tagPairs, pair)
+	}
+
 	// Track the last position we've processed
 	lastPos := 0

--- a/pkg/reasoning/reasoning_test.go
+++ b/pkg/reasoning/reasoning_test.go