Compare commits

...

3 Commits

Author SHA1 Message Date
Ettore Di Giacinto
61a6e95f7d Additional thinking tags
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-01-20 12:02:35 +01:00
Ettore Di Giacinto
a352125726 chore: refactorings
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-01-20 11:48:00 +01:00
Ettore Di Giacinto
187e474daf fix(reasoning): handle only closing tags
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-01-20 11:40:29 +01:00
9 changed files with 833 additions and 389 deletions

View File

@@ -10,6 +10,7 @@ import (
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/pkg/downloader"
"github.com/mudler/LocalAI/pkg/functions"
"github.com/mudler/LocalAI/pkg/reasoning"
"github.com/mudler/cogito"
"gopkg.in/yaml.v3"
)
@@ -51,6 +52,7 @@ type ModelConfig struct {
ResponseFormatMap map[string]interface{} `yaml:"-" json:"-"`
FunctionsConfig functions.FunctionsConfig `yaml:"function,omitempty" json:"function,omitempty"`
ReasoningConfig reasoning.ReasoningConfig `yaml:"reasoning,omitempty" json:"reasoning,omitempty"`
FeatureFlag FeatureFlag `yaml:"feature_flags,omitempty" json:"feature_flags,omitempty"` // Feature Flag registry. We move fast, and features may break on a per model/backend basis. Registry for (usually temporary) flags that indicate aborting something early.
// LLM configs (GPT4ALL, Llama.cpp, ...)

View File

@@ -13,6 +13,7 @@ import (
"github.com/mudler/LocalAI/core/http/middleware"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/pkg/functions"
"github.com/mudler/LocalAI/pkg/reasoning"
"github.com/mudler/LocalAI/core/templates"
"github.com/mudler/LocalAI/pkg/model"
@@ -43,10 +44,19 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
lastEmittedReasoning := ""
lastEmittedCleanedContent := ""
// Configure reasoning extraction options
// Auto-detect if prompt ends with thinking tag
// or use explicit config setting
thinkingForcedOpen := config.ReasoningConfig.ThinkingForcedOpen || reasoning.DetectThinkingForcedOpen(s)
_, _, err := ComputeChoices(req, s, config, cl, startupOptions, loader, func(s string, c *[]schema.Choice) {}, func(s string, tokenUsage backend.TokenUsage) bool {
accumulatedContent += s
// Extract reasoning from accumulated content
currentReasoning, cleanedContent := functions.ExtractReasoning(accumulatedContent)
opts := []reasoning.Option{}
if thinkingForcedOpen {
opts = append(opts, reasoning.WithThinkingForcedOpen())
}
currentReasoning, cleanedContent := reasoning.Extract(accumulatedContent, opts...)
// Calculate new reasoning delta (what we haven't emitted yet)
var reasoningDelta *string
@@ -230,7 +240,13 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
return err
}
// Extract reasoning before processing tool calls
reasoning, cleanedResult := functions.ExtractReasoning(result)
// Auto-detect if prompt ends with thinking tag or use explicit config
toolsThinkingForcedOpen := config.ReasoningConfig.ThinkingForcedOpen || reasoning.DetectThinkingForcedOpen(prompt)
opts := []reasoning.Option{}
if toolsThinkingForcedOpen {
opts = append(opts, reasoning.WithThinkingForcedOpen())
}
extractedReasoning, cleanedResult := reasoning.Extract(result, opts...)
result = cleanedResult
textContentToReturn = functions.ParseTextContent(result, config.FunctionsConfig)
@@ -266,8 +282,8 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
}
var deltaReasoning *string
if reasoning != "" {
deltaReasoning = &reasoning
if extractedReasoning != "" {
deltaReasoning = &extractedReasoning
}
delta := &schema.Message{Content: &result}
if deltaReasoning != nil {
@@ -618,17 +634,24 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
// no streaming mode
default:
// Auto-detect if prompt ends with thinking tag for non-streaming mode
nonStreamThinkingForcedOpen := config.ReasoningConfig.ThinkingForcedOpen || reasoning.DetectThinkingForcedOpen(predInput)
tokenCallback := func(s string, c *[]schema.Choice) {
// Extract reasoning from the response
reasoning, cleanedS := functions.ExtractReasoning(s)
s = cleanedS
var extractedReasoning string
opts := []reasoning.Option{}
if nonStreamThinkingForcedOpen {
opts = append(opts, reasoning.WithThinkingForcedOpen())
}
extractedReasoning, s = reasoning.Extract(s, opts...)
if !shouldUseFn {
// no function is called, just reply and use stop as finish reason
stopReason := FinishReasonStop
message := &schema.Message{Role: "assistant", Content: &s}
if reasoning != "" {
message.Reasoning = &reasoning
if extractedReasoning != "" {
message.Reasoning = &extractedReasoning
}
*c = append(*c, schema.Choice{FinishReason: &stopReason, Index: 0, Message: message})
return
@@ -650,8 +673,8 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
stopReason := FinishReasonStop
message := &schema.Message{Role: "assistant", Content: &result}
if reasoning != "" {
message.Reasoning = &reasoning
if extractedReasoning != "" {
message.Reasoning = &extractedReasoning
}
*c = append(*c, schema.Choice{
FinishReason: &stopReason,
@@ -664,8 +687,8 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
Role: "assistant",
},
}
if reasoning != "" {
toolChoice.Message.Reasoning = &reasoning
if extractedReasoning != "" {
toolChoice.Message.Reasoning = &extractedReasoning
}
for _, ss := range results {
@@ -695,8 +718,8 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
"arguments": args,
},
}
if reasoning != "" {
message.Reasoning = &reasoning
if extractedReasoning != "" {
message.Reasoning = &extractedReasoning
}
*c = append(*c, schema.Choice{
FinishReason: &functionCallReason,

View File

@@ -1,114 +0,0 @@
package functions
import (
"strings"
)
// ExtractReasoning extracts reasoning content from thinking tags and returns
// both the extracted reasoning and the cleaned content (with tags removed).
// It handles <thinking>...</thinking> and <think>...</think> tags.
// Multiple reasoning blocks are concatenated with newlines.
func ExtractReasoning(content string) (reasoning string, cleanedContent string) {
if content == "" {
return "", content
}
var reasoningParts []string
var cleanedParts []string
remaining := content
// Define tag pairs to look for
tagPairs := []struct {
start string
end string
}{
{"<thinking>", "</thinking>"},
{"<think>", "</think>"},
}
// Track the last position we've processed
lastPos := 0
for {
// Find the earliest tag start
earliestStart := -1
earliestEnd := -1
isUnclosed := false
var matchedTag struct {
start string
end string
}
for _, tagPair := range tagPairs {
startIdx := strings.Index(remaining[lastPos:], tagPair.start)
if startIdx == -1 {
continue
}
startIdx += lastPos
// Find the corresponding end tag
endIdx := strings.Index(remaining[startIdx+len(tagPair.start):], tagPair.end)
if endIdx == -1 {
// Unclosed tag - extract what we have
if earliestStart == -1 || startIdx < earliestStart {
earliestStart = startIdx
earliestEnd = len(remaining)
isUnclosed = true
matchedTag = tagPair
}
continue
}
endIdx += startIdx + len(tagPair.start)
// Found a complete tag pair
if earliestStart == -1 || startIdx < earliestStart {
earliestStart = startIdx
earliestEnd = endIdx + len(tagPair.end)
isUnclosed = false
matchedTag = tagPair
}
}
if earliestStart == -1 {
// No more tags found, add remaining content
if lastPos < len(remaining) {
cleanedParts = append(cleanedParts, remaining[lastPos:])
}
break
}
// Add content before the tag
if earliestStart > lastPos {
cleanedParts = append(cleanedParts, remaining[lastPos:earliestStart])
}
// Extract reasoning content
reasoningStart := earliestStart + len(matchedTag.start)
// For unclosed tags, earliestEnd is already at the end of the string
// For closed tags, earliestEnd points to after the closing tag, so we subtract the end tag length
var reasoningEnd int
if isUnclosed {
// Unclosed tag - extract everything to the end
reasoningEnd = len(remaining)
} else {
// Closed tag - exclude the end tag
reasoningEnd = earliestEnd - len(matchedTag.end)
}
if reasoningEnd > reasoningStart {
reasoningContent := strings.TrimSpace(remaining[reasoningStart:reasoningEnd])
if reasoningContent != "" {
reasoningParts = append(reasoningParts, reasoningContent)
}
}
// Move past this tag
lastPos = earliestEnd
}
// Combine reasoning parts
reasoning = strings.Join(reasoningParts, "\n\n")
// Combine cleaned content parts
cleanedContent = strings.Join(cleanedParts, "")
return reasoning, cleanedContent
}

View File

@@ -1,261 +0,0 @@
package functions_test
import (
"strings"
. "github.com/mudler/LocalAI/pkg/functions"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
var _ = Describe("ExtractReasoning", func() {
Context("when content has no reasoning tags", func() {
It("should return empty reasoning and original content", func() {
content := "This is regular content without any tags."
reasoning, cleaned := ExtractReasoning(content)
Expect(reasoning).To(BeEmpty())
Expect(cleaned).To(Equal(content))
})
It("should handle empty string", func() {
content := ""
reasoning, cleaned := ExtractReasoning(content)
Expect(reasoning).To(BeEmpty())
Expect(cleaned).To(BeEmpty())
})
It("should handle content with only whitespace", func() {
content := " \n\t "
reasoning, cleaned := ExtractReasoning(content)
Expect(reasoning).To(BeEmpty())
Expect(cleaned).To(Equal(content))
})
})
Context("when content has <thinking> tags", func() {
It("should extract reasoning from single thinking block", func() {
content := "Some text <thinking>This is my reasoning</thinking> More text"
reasoning, cleaned := ExtractReasoning(content)
Expect(reasoning).To(Equal("This is my reasoning"))
Expect(cleaned).To(Equal("Some text More text"))
})
It("should extract reasoning and preserve surrounding content", func() {
content := "Before <thinking>Reasoning here</thinking> After"
reasoning, cleaned := ExtractReasoning(content)
Expect(reasoning).To(Equal("Reasoning here"))
Expect(cleaned).To(Equal("Before After"))
})
It("should handle thinking block at the start", func() {
content := "<thinking>Start reasoning</thinking> Regular content"
reasoning, cleaned := ExtractReasoning(content)
Expect(reasoning).To(Equal("Start reasoning"))
Expect(cleaned).To(Equal(" Regular content"))
})
It("should handle thinking block at the end", func() {
content := "Regular content <thinking>End reasoning</thinking>"
reasoning, cleaned := ExtractReasoning(content)
Expect(reasoning).To(Equal("End reasoning"))
Expect(cleaned).To(Equal("Regular content "))
})
It("should handle only thinking block", func() {
content := "<thinking>Only reasoning</thinking>"
reasoning, cleaned := ExtractReasoning(content)
Expect(reasoning).To(Equal("Only reasoning"))
Expect(cleaned).To(BeEmpty())
})
It("should trim whitespace from reasoning content", func() {
content := "Text <thinking> \n Reasoning with spaces \n </thinking> More"
reasoning, cleaned := ExtractReasoning(content)
Expect(reasoning).To(Equal("Reasoning with spaces"))
Expect(cleaned).To(Equal("Text More"))
})
})
Context("when content has <think> tags", func() {
It("should extract reasoning from redacted_reasoning block", func() {
content := "Text <think>Redacted reasoning</think> More"
reasoning, cleaned := ExtractReasoning(content)
Expect(reasoning).To(Equal("Redacted reasoning"))
Expect(cleaned).To(Equal("Text More"))
})
It("should handle redacted_reasoning with multiline content", func() {
content := "Before <think>Line 1\nLine 2\nLine 3</think> After"
reasoning, cleaned := ExtractReasoning(content)
Expect(reasoning).To(Equal("Line 1\nLine 2\nLine 3"))
Expect(cleaned).To(Equal("Before After"))
})
It("should handle redacted_reasoning with complex content", func() {
content := "Start <think>Complex reasoning\nwith\nmultiple\nlines</think> End"
reasoning, cleaned := ExtractReasoning(content)
Expect(reasoning).To(Equal("Complex reasoning\nwith\nmultiple\nlines"))
Expect(cleaned).To(Equal("Start End"))
})
})
Context("when content has multiple reasoning blocks", func() {
It("should concatenate multiple thinking blocks with newlines", func() {
content := "Text <thinking>First</thinking> Middle <thinking>Second</thinking> End"
reasoning, cleaned := ExtractReasoning(content)
Expect(reasoning).To(Equal("First\n\nSecond"))
Expect(cleaned).To(Equal("Text Middle End"))
})
It("should handle multiple different tag types", func() {
content := "A <thinking>One</thinking> B <think>Two</think> C <think>Three</think> D"
reasoning, cleaned := ExtractReasoning(content)
Expect(reasoning).To(ContainSubstring("One"))
Expect(reasoning).To(ContainSubstring("Two"))
Expect(reasoning).To(ContainSubstring("Three"))
Expect(cleaned).To(Equal("A B C D"))
})
It("should handle nested tags correctly (extracts first match)", func() {
content := "Text <thinking>Outer <think>Inner</think></thinking> More"
reasoning, cleaned := ExtractReasoning(content)
// Should extract the outer thinking block
Expect(reasoning).To(ContainSubstring("Outer"))
Expect(reasoning).To(ContainSubstring("Inner"))
Expect(cleaned).To(Equal("Text More"))
})
})
Context("when content has unclosed reasoning tags", func() {
It("should extract unclosed thinking block", func() {
content := "Text <thinking>Unclosed reasoning"
reasoning, cleaned := ExtractReasoning(content)
Expect(reasoning).To(Equal("Unclosed reasoning"))
Expect(cleaned).To(Equal("Text "))
})
It("should extract unclosed think block", func() {
content := "Before <think>Incomplete"
reasoning, cleaned := ExtractReasoning(content)
Expect(reasoning).To(Equal("Incomplete"))
Expect(cleaned).To(Equal("Before "))
})
It("should extract unclosed redacted_reasoning block", func() {
content := "Start <think>Partial reasoning content"
reasoning, cleaned := ExtractReasoning(content)
Expect(reasoning).To(Equal("Partial reasoning content"))
Expect(cleaned).To(Equal("Start "))
})
It("should handle unclosed tag at the end", func() {
content := "Regular content <thinking>Unclosed at end"
reasoning, cleaned := ExtractReasoning(content)
Expect(reasoning).To(Equal("Unclosed at end"))
Expect(cleaned).To(Equal("Regular content "))
})
})
Context("when content has empty reasoning blocks", func() {
It("should ignore empty thinking block", func() {
content := "Text <thinking></thinking> More"
reasoning, cleaned := ExtractReasoning(content)
Expect(reasoning).To(BeEmpty())
Expect(cleaned).To(Equal("Text More"))
})
It("should ignore thinking block with only whitespace", func() {
content := "Text <thinking> \n\t </thinking> More"
reasoning, cleaned := ExtractReasoning(content)
Expect(reasoning).To(BeEmpty())
Expect(cleaned).To(Equal("Text More"))
})
})
Context("when content has reasoning tags with special characters", func() {
It("should handle reasoning with newlines", func() {
content := "Before <thinking>Line 1\nLine 2\nLine 3</thinking> After"
reasoning, cleaned := ExtractReasoning(content)
Expect(reasoning).To(Equal("Line 1\nLine 2\nLine 3"))
Expect(cleaned).To(Equal("Before After"))
})
It("should handle reasoning with code blocks", func() {
content := "Text <thinking>Reasoning with ```code``` blocks</thinking> More"
reasoning, cleaned := ExtractReasoning(content)
Expect(reasoning).To(Equal("Reasoning with ```code``` blocks"))
Expect(cleaned).To(Equal("Text More"))
})
It("should handle reasoning with JSON", func() {
content := "Before <think>{\"key\": \"value\"}</think> After"
reasoning, cleaned := ExtractReasoning(content)
Expect(reasoning).To(Equal("{\"key\": \"value\"}"))
Expect(cleaned).To(Equal("Before After"))
})
It("should handle reasoning with HTML-like content", func() {
content := "Text <thinking>Reasoning with <tags> inside</thinking> More"
reasoning, cleaned := ExtractReasoning(content)
Expect(reasoning).To(Equal("Reasoning with <tags> inside"))
Expect(cleaned).To(Equal("Text More"))
})
})
Context("when content has reasoning mixed with regular content", func() {
It("should preserve content order correctly", func() {
content := "Start <thinking>Reasoning</thinking> Middle <think>More reasoning</think> End"
reasoning, cleaned := ExtractReasoning(content)
Expect(reasoning).To(ContainSubstring("Reasoning"))
Expect(reasoning).To(ContainSubstring("More reasoning"))
Expect(cleaned).To(Equal("Start Middle End"))
})
It("should handle reasoning in the middle of a sentence", func() {
content := "This is a <thinking>reasoning</thinking> sentence."
reasoning, cleaned := ExtractReasoning(content)
Expect(reasoning).To(Equal("reasoning"))
Expect(cleaned).To(Equal("This is a sentence."))
})
})
Context("edge cases", func() {
It("should handle content with only opening tag", func() {
content := "<thinking>"
reasoning, cleaned := ExtractReasoning(content)
Expect(reasoning).To(BeEmpty())
Expect(cleaned).To(Equal(""))
})
It("should handle content with only closing tag", func() {
content := "</thinking>"
reasoning, cleaned := ExtractReasoning(content)
Expect(reasoning).To(BeEmpty())
Expect(cleaned).To(Equal("</thinking>"))
})
It("should handle mismatched tags", func() {
content := "<thinking>Content</think>"
reasoning, cleaned := ExtractReasoning(content)
// Should extract unclosed thinking block
Expect(reasoning).To(ContainSubstring("Content"))
Expect(cleaned).To(Equal(""))
})
It("should handle very long reasoning content", func() {
longReasoning := strings.Repeat("This is reasoning content. ", 100)
content := "Text <thinking>" + longReasoning + "</thinking> More"
reasoning, cleaned := ExtractReasoning(content)
// TrimSpace is applied, so we need to account for that
Expect(reasoning).To(Equal(strings.TrimSpace(longReasoning)))
Expect(cleaned).To(Equal("Text More"))
})
It("should handle reasoning with unicode characters", func() {
content := "Text <thinking>Reasoning with 中文 and emoji 🧠</thinking> More"
reasoning, cleaned := ExtractReasoning(content)
Expect(reasoning).To(Equal("Reasoning with 中文 and emoji 🧠"))
Expect(cleaned).To(Equal("Text More"))
})
})
})

8
pkg/reasoning/config.go Normal file
View File

@@ -0,0 +1,8 @@
package reasoning
type ReasoningConfig struct {
// ThinkingForcedOpen indicates that the model outputs reasoning without an opening tag.
// When true, all content from the start is treated as reasoning until a closing tag is found.
// This is useful for models like GLM-4 that output reasoning without <think> but end with </think>.
ThinkingForcedOpen bool `yaml:"thinking_forced_open,omitempty" json:"thinking_forced_open,omitempty"`
}

18
pkg/reasoning/options.go Normal file
View File

@@ -0,0 +1,18 @@
package reasoning
// options holds the configuration for reasoning extraction
type options struct {
thinkingForcedOpen bool
}
// Option is a functional option for configuring reasoning extraction
type Option func(*options)
// WithThinkingForcedOpen configures the extractor to treat all content from the start
// as reasoning until a closing tag is found. This is useful for models like GLM-4
// that output reasoning without <think> but end with </think>.
func WithThinkingForcedOpen() Option {
return func(o *options) {
o.thinkingForcedOpen = true
}
}

256
pkg/reasoning/reasoning.go Normal file
View File

@@ -0,0 +1,256 @@
package reasoning
import (
"strings"
)
// Common thinking/reasoning opening tags used by various models.
// These match the tags detected by llama.cpp in common/chat.cpp
var thinkingOpenTags = []string{
// DeepSeek R1, V3.1, Nemotron V2, MiniMax M2, Hermes 2 Pro, Granite, Exaone MOE
"<think>\n",
"<think>",
// Generic thinking tags
"<thinking>\n",
"<thinking>",
// Apertus
"<|inner_prefix|>",
// Command R7B
"<|START_THINKING|>",
// Seed
"<seed:think>",
// Magistral (not in llama.cpp but common)
"[THINK]\n",
"[THINK]",
}
// DetectThinkingForcedOpen checks if a prompt ends with a thinking opening tag.
// This is used to automatically detect when the model template has already added
// the opening thinking tag, meaning the model will output reasoning content directly.
// Returns true if the prompt ends with a known thinking opening tag.
func DetectThinkingForcedOpen(prompt string) bool {
for _, tag := range thinkingOpenTags {
if strings.HasSuffix(prompt, tag) {
return true
}
}
return false
}
// Extract extracts reasoning content from thinking tags and returns
// both the extracted reasoning and the cleaned content (with tags removed).
// It handles <thinking>...</thinking> and <think>...</think> tags.
// Multiple reasoning blocks are concatenated with newlines.
// It also handles the case where only a closing tag is present (no opening tag),
// in which case everything before the closing tag is treated as reasoning.
//
// Use WithThinkingForcedOpen() option when all content from the start should be
// treated as reasoning until a closing tag is found.
func Extract(content string, opts ...Option) (reasoning string, cleanedContent string) {
if content == "" {
return "", content
}
cfg := &options{}
for _, opt := range opts {
opt(cfg)
}
if cfg.thinkingForcedOpen {
return extractForcedOpen(content)
}
return extractFromTags(content)
}
// extractForcedOpen handles the case where reasoning starts without an opening tag.
// All content from the start is treated as reasoning until a closing tag is found.
func extractForcedOpen(content string) (reasoning string, cleanedContent string) {
// Look for the earliest closing tag
// These match the closing tags used by llama.cpp for various models
closingTags := []string{
"</thinking>",
"</think>",
"<|END_THINKING|>", // Command R7B
"<|inner_suffix|>", // Apertus
"</seed:think>", // Seed
"[/THINK]", // Magistral
}
earliestCloseIdx := -1
var matchedCloseTag string
for _, closeTag := range closingTags {
idx := strings.Index(content, closeTag)
if idx != -1 && (earliestCloseIdx == -1 || idx < earliestCloseIdx) {
earliestCloseIdx = idx
matchedCloseTag = closeTag
}
}
if earliestCloseIdx == -1 {
// No closing tag found - all content is reasoning (still streaming)
return strings.TrimSpace(content), ""
}
// Found closing tag - everything before is reasoning, everything after is content
reasoning = strings.TrimSpace(content[:earliestCloseIdx])
cleanedContent = content[earliestCloseIdx+len(matchedCloseTag):]
// Continue processing the rest for any additional reasoning blocks
if cleanedContent != "" {
additionalReasoning, finalContent := extractFromTags(cleanedContent)
if additionalReasoning != "" {
if reasoning != "" {
reasoning = reasoning + "\n\n" + additionalReasoning
} else {
reasoning = additionalReasoning
}
}
cleanedContent = finalContent
}
return reasoning, cleanedContent
}
// extractFromTags extracts reasoning content from thinking tags.
// This is the core implementation that handles standard tag-based extraction.
func extractFromTags(content string) (reasoning string, cleanedContent string) {
if content == "" {
return "", content
}
var reasoningParts []string
var cleanedParts []string
remaining := content
// Define tag pairs to look for
// These match the tags used by llama.cpp for various models
tagPairs := []struct {
start string
end string
}{
{"<thinking>", "</thinking>"},
{"<think>", "</think>"},
{"<|START_THINKING|>", "<|END_THINKING|>"}, // Command R7B
{"<|inner_prefix|>", "<|inner_suffix|>"}, // Apertus
{"<seed:think>", "</seed:think>"}, // Seed
{"[THINK]", "[/THINK]"}, // Magistral
}
// Track the last position we've processed
lastPos := 0
for {
// Find the earliest tag start
earliestStart := -1
earliestEnd := -1
isUnclosed := false
isClosingOnly := false
var matchedTag struct {
start string
end string
}
for _, tagPair := range tagPairs {
startIdx := strings.Index(remaining[lastPos:], tagPair.start)
endIdx := strings.Index(remaining[lastPos:], tagPair.end)
// Check for closing-only tag (closing tag appears before or without opening tag)
if endIdx != -1 && (startIdx == -1 || endIdx < startIdx) {
// Found a closing tag without a preceding opening tag
closingTagPos := endIdx + lastPos
if earliestStart == -1 || closingTagPos < earliestStart || (isClosingOnly && closingTagPos < earliestEnd) {
earliestStart = lastPos
earliestEnd = closingTagPos + len(tagPair.end)
isClosingOnly = true
isUnclosed = false
matchedTag = tagPair
}
continue
}
if startIdx == -1 {
continue
}
startIdx += lastPos
// Find the corresponding end tag after the start tag
endIdxAfterStart := strings.Index(remaining[startIdx+len(tagPair.start):], tagPair.end)
if endIdxAfterStart == -1 {
// Unclosed tag - extract what we have
if earliestStart == -1 || startIdx < earliestStart {
earliestStart = startIdx
earliestEnd = len(remaining)
isUnclosed = true
isClosingOnly = false
matchedTag = tagPair
}
continue
}
endIdxAfterStart += startIdx + len(tagPair.start)
// Found a complete tag pair
if earliestStart == -1 || startIdx < earliestStart {
earliestStart = startIdx
earliestEnd = endIdxAfterStart + len(tagPair.end)
isUnclosed = false
isClosingOnly = false
matchedTag = tagPair
}
}
if earliestStart == -1 {
// No more tags found, add remaining content
if lastPos < len(remaining) {
cleanedParts = append(cleanedParts, remaining[lastPos:])
}
break
}
if isClosingOnly {
// Closing tag without opening tag - content before closing tag is reasoning
reasoningContent := strings.TrimSpace(remaining[lastPos : earliestEnd-len(matchedTag.end)])
if reasoningContent != "" {
reasoningParts = append(reasoningParts, reasoningContent)
}
// Move past the closing tag
lastPos = earliestEnd
continue
}
// Add content before the tag
if earliestStart > lastPos {
cleanedParts = append(cleanedParts, remaining[lastPos:earliestStart])
}
// Extract reasoning content
reasoningStart := earliestStart + len(matchedTag.start)
// For unclosed tags, earliestEnd is already at the end of the string
// For closed tags, earliestEnd points to after the closing tag, so we subtract the end tag length
var reasoningEnd int
if isUnclosed {
// Unclosed tag - extract everything to the end
reasoningEnd = len(remaining)
} else {
// Closed tag - exclude the end tag
reasoningEnd = earliestEnd - len(matchedTag.end)
}
if reasoningEnd > reasoningStart {
reasoningContent := strings.TrimSpace(remaining[reasoningStart:reasoningEnd])
if reasoningContent != "" {
reasoningParts = append(reasoningParts, reasoningContent)
}
}
// Move past this tag
lastPos = earliestEnd
}
// Combine reasoning parts
reasoning = strings.Join(reasoningParts, "\n\n")
// Combine cleaned content parts
cleanedContent = strings.Join(cleanedParts, "")
return reasoning, cleanedContent
}

View File

@@ -0,0 +1,13 @@
package reasoning_test
import (
"testing"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
func TestReasoning(t *testing.T) {
RegisterFailHandler(Fail)
RunSpecs(t, "Reasoning Suite")
}

View File

@@ -0,0 +1,499 @@
package reasoning_test
import (
"strings"
. "github.com/mudler/LocalAI/pkg/reasoning"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
var _ = Describe("DetectThinkingForcedOpen", func() {
It("should detect <think> at end of prompt", func() {
Expect(DetectThinkingForcedOpen("Some prompt<think>")).To(BeTrue())
Expect(DetectThinkingForcedOpen("Some prompt<think>\n")).To(BeTrue())
})
It("should detect <thinking> at end of prompt", func() {
Expect(DetectThinkingForcedOpen("Some prompt<thinking>")).To(BeTrue())
Expect(DetectThinkingForcedOpen("Some prompt<thinking>\n")).To(BeTrue())
})
It("should detect model-specific tags", func() {
Expect(DetectThinkingForcedOpen("Some prompt<|inner_prefix|>")).To(BeTrue())
Expect(DetectThinkingForcedOpen("Some prompt<|START_THINKING|>")).To(BeTrue())
Expect(DetectThinkingForcedOpen("Some prompt<seed:think>")).To(BeTrue())
Expect(DetectThinkingForcedOpen("Some prompt[THINK]")).To(BeTrue())
Expect(DetectThinkingForcedOpen("Some prompt[THINK]\n")).To(BeTrue())
})
It("should not detect if tag is in the middle", func() {
Expect(DetectThinkingForcedOpen("Some <think> prompt")).To(BeFalse())
Expect(DetectThinkingForcedOpen("<think>reasoning</think>")).To(BeFalse())
})
It("should not detect if no thinking tag", func() {
Expect(DetectThinkingForcedOpen("Some regular prompt")).To(BeFalse())
Expect(DetectThinkingForcedOpen("")).To(BeFalse())
})
})
var _ = Describe("Extract", func() {
Context("when content has no reasoning tags", func() {
It("should return empty reasoning and original content", func() {
content := "This is regular content without any tags."
reasoning, cleaned := Extract(content)
Expect(reasoning).To(BeEmpty())
Expect(cleaned).To(Equal(content))
})
It("should handle empty string", func() {
content := ""
reasoning, cleaned := Extract(content)
Expect(reasoning).To(BeEmpty())
Expect(cleaned).To(BeEmpty())
})
It("should handle content with only whitespace", func() {
content := " \n\t "
reasoning, cleaned := Extract(content)
Expect(reasoning).To(BeEmpty())
Expect(cleaned).To(Equal(content))
})
})
Context("when content has <thinking> tags", func() {
It("should extract reasoning from single thinking block", func() {
content := "Some text <thinking>This is my reasoning</thinking> More text"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(Equal("This is my reasoning"))
Expect(cleaned).To(Equal("Some text More text"))
})
It("should extract reasoning and preserve surrounding content", func() {
content := "Before <thinking>Reasoning here</thinking> After"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(Equal("Reasoning here"))
Expect(cleaned).To(Equal("Before After"))
})
It("should handle thinking block at the start", func() {
content := "<thinking>Start reasoning</thinking> Regular content"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(Equal("Start reasoning"))
Expect(cleaned).To(Equal(" Regular content"))
})
It("should handle thinking block at the end", func() {
content := "Regular content <thinking>End reasoning</thinking>"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(Equal("End reasoning"))
Expect(cleaned).To(Equal("Regular content "))
})
It("should handle only thinking block", func() {
content := "<thinking>Only reasoning</thinking>"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(Equal("Only reasoning"))
Expect(cleaned).To(BeEmpty())
})
It("should trim whitespace from reasoning content", func() {
content := "Text <thinking> \n Reasoning with spaces \n </thinking> More"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(Equal("Reasoning with spaces"))
Expect(cleaned).To(Equal("Text More"))
})
})
Context("when content has <think> tags", func() {
It("should extract reasoning from redacted_reasoning block", func() {
content := "Text <think>Redacted reasoning</think> More"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(Equal("Redacted reasoning"))
Expect(cleaned).To(Equal("Text More"))
})
It("should handle redacted_reasoning with multiline content", func() {
content := "Before <think>Line 1\nLine 2\nLine 3</think> After"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(Equal("Line 1\nLine 2\nLine 3"))
Expect(cleaned).To(Equal("Before After"))
})
It("should handle redacted_reasoning with complex content", func() {
content := "Start <think>Complex reasoning\nwith\nmultiple\nlines</think> End"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(Equal("Complex reasoning\nwith\nmultiple\nlines"))
Expect(cleaned).To(Equal("Start End"))
})
})
Context("when content has multiple reasoning blocks", func() {
It("should concatenate multiple thinking blocks with newlines", func() {
content := "Text <thinking>First</thinking> Middle <thinking>Second</thinking> End"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(Equal("First\n\nSecond"))
Expect(cleaned).To(Equal("Text Middle End"))
})
It("should handle multiple different tag types", func() {
content := "A <thinking>One</thinking> B <think>Two</think> C <think>Three</think> D"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(ContainSubstring("One"))
Expect(reasoning).To(ContainSubstring("Two"))
Expect(reasoning).To(ContainSubstring("Three"))
Expect(cleaned).To(Equal("A B C D"))
})
It("should handle nested tags correctly (extracts first match)", func() {
content := "Text <thinking>Outer <think>Inner</think></thinking> More"
reasoning, cleaned := Extract(content)
// Should extract the outer thinking block
Expect(reasoning).To(ContainSubstring("Outer"))
Expect(reasoning).To(ContainSubstring("Inner"))
Expect(cleaned).To(Equal("Text More"))
})
})
Context("when content has unclosed reasoning tags", func() {
It("should extract unclosed thinking block", func() {
content := "Text <thinking>Unclosed reasoning"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(Equal("Unclosed reasoning"))
Expect(cleaned).To(Equal("Text "))
})
It("should extract unclosed think block", func() {
content := "Before <think>Incomplete"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(Equal("Incomplete"))
Expect(cleaned).To(Equal("Before "))
})
It("should extract unclosed redacted_reasoning block", func() {
content := "Start <think>Partial reasoning content"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(Equal("Partial reasoning content"))
Expect(cleaned).To(Equal("Start "))
})
It("should handle unclosed tag at the end", func() {
content := "Regular content <thinking>Unclosed at end"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(Equal("Unclosed at end"))
Expect(cleaned).To(Equal("Regular content "))
})
})
Context("when content has empty reasoning blocks", func() {
It("should ignore empty thinking block", func() {
content := "Text <thinking></thinking> More"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(BeEmpty())
Expect(cleaned).To(Equal("Text More"))
})
It("should ignore thinking block with only whitespace", func() {
content := "Text <thinking> \n\t </thinking> More"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(BeEmpty())
Expect(cleaned).To(Equal("Text More"))
})
})
Context("when content has reasoning tags with special characters", func() {
It("should handle reasoning with newlines", func() {
content := "Before <thinking>Line 1\nLine 2\nLine 3</thinking> After"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(Equal("Line 1\nLine 2\nLine 3"))
Expect(cleaned).To(Equal("Before After"))
})
It("should handle reasoning with code blocks", func() {
content := "Text <thinking>Reasoning with ```code``` blocks</thinking> More"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(Equal("Reasoning with ```code``` blocks"))
Expect(cleaned).To(Equal("Text More"))
})
It("should handle reasoning with JSON", func() {
content := "Before <think>{\"key\": \"value\"}</think> After"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(Equal("{\"key\": \"value\"}"))
Expect(cleaned).To(Equal("Before After"))
})
It("should handle reasoning with HTML-like content", func() {
content := "Text <thinking>Reasoning with <tags> inside</thinking> More"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(Equal("Reasoning with <tags> inside"))
Expect(cleaned).To(Equal("Text More"))
})
})
Context("when content has reasoning mixed with regular content", func() {
It("should preserve content order correctly", func() {
content := "Start <thinking>Reasoning</thinking> Middle <think>More reasoning</think> End"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(ContainSubstring("Reasoning"))
Expect(reasoning).To(ContainSubstring("More reasoning"))
Expect(cleaned).To(Equal("Start Middle End"))
})
It("should handle reasoning in the middle of a sentence", func() {
content := "This is a <thinking>reasoning</thinking> sentence."
reasoning, cleaned := Extract(content)
Expect(reasoning).To(Equal("reasoning"))
Expect(cleaned).To(Equal("This is a sentence."))
})
})
Context("edge cases without WithThinkingForcedOpen", func() {
It("should handle content with only opening tag", func() {
content := "<thinking>"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(BeEmpty())
Expect(cleaned).To(Equal(""))
})
It("should handle content with only closing tag (no content before)", func() {
content := "</thinking>"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(BeEmpty())
Expect(cleaned).To(BeEmpty())
})
It("should extract reasoning when only closing tag is present", func() {
// GLM-4 style: reasoning content followed by closing tag without opening tag
content := "This is reasoning content</think>this is the actual response"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(Equal("This is reasoning content"))
Expect(cleaned).To(Equal("this is the actual response"))
})
It("should handle closing-only tag with multiline reasoning", func() {
content := "1. First point\n2. Second point\n3. Third point</think>Final answer"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(Equal("1. First point\n2. Second point\n3. Third point"))
Expect(cleaned).To(Equal("Final answer"))
})
It("should handle closing-only tag with complex reasoning (GLM-4 example)", func() {
content := "**Analyze the user's input:** The user says something.\n\n**Final Decision:** Output the text.</think>this is a test"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(Equal("**Analyze the user's input:** The user says something.\n\n**Final Decision:** Output the text."))
Expect(cleaned).To(Equal("this is a test"))
})
It("should handle closing-only thinking tag", func() {
content := "Some reasoning here</thinking>actual content"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(Equal("Some reasoning here"))
Expect(cleaned).To(Equal("actual content"))
})
It("should handle mismatched tags", func() {
content := "<thinking>Content</think>"
reasoning, cleaned := Extract(content)
// Should extract unclosed thinking block
Expect(reasoning).To(ContainSubstring("Content"))
Expect(cleaned).To(Equal(""))
})
It("should handle very long reasoning content", func() {
longReasoning := strings.Repeat("This is reasoning content. ", 100)
content := "Text <thinking>" + longReasoning + "</thinking> More"
reasoning, cleaned := Extract(content)
// TrimSpace is applied, so we need to account for that
Expect(reasoning).To(Equal(strings.TrimSpace(longReasoning)))
Expect(cleaned).To(Equal("Text More"))
})
It("should handle reasoning with unicode characters", func() {
content := "Text <thinking>Reasoning with 中文 and emoji 🧠</thinking> More"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(Equal("Reasoning with 中文 and emoji 🧠"))
Expect(cleaned).To(Equal("Text More"))
})
})
Context("with WithThinkingForcedOpen option", func() {
It("should treat all content as reasoning until closing tag", func() {
content := "This is reasoning</think>this is content"
reasoning, cleaned := Extract(content, WithThinkingForcedOpen())
Expect(reasoning).To(Equal("This is reasoning"))
Expect(cleaned).To(Equal("this is content"))
})
It("should treat all content as reasoning when no closing tag (streaming)", func() {
content := "This is reasoning content still streaming"
reasoning, cleaned := Extract(content, WithThinkingForcedOpen())
Expect(reasoning).To(Equal("This is reasoning content still streaming"))
Expect(cleaned).To(BeEmpty())
})
It("should handle GLM-4 style output", func() {
content := "**Analyze:** The user says something.\n\n**Final Decision:** Output the text.</think>this is a test"
reasoning, cleaned := Extract(content, WithThinkingForcedOpen())
Expect(reasoning).To(Equal("**Analyze:** The user says something.\n\n**Final Decision:** Output the text."))
Expect(cleaned).To(Equal("this is a test"))
})
It("should handle multiline reasoning with closing tag", func() {
content := "1. First point\n2. Second point\n3. Third point</think>Final answer"
reasoning, cleaned := Extract(content, WithThinkingForcedOpen())
Expect(reasoning).To(Equal("1. First point\n2. Second point\n3. Third point"))
Expect(cleaned).To(Equal("Final answer"))
})
It("should handle </thinking> closing tag", func() {
content := "Some reasoning here</thinking>actual content"
reasoning, cleaned := Extract(content, WithThinkingForcedOpen())
Expect(reasoning).To(Equal("Some reasoning here"))
Expect(cleaned).To(Equal("actual content"))
})
It("should handle additional reasoning blocks after initial forced open", func() {
content := "Initial reasoning</think>content<think>more reasoning</think>final content"
reasoning, cleaned := Extract(content, WithThinkingForcedOpen())
Expect(reasoning).To(Equal("Initial reasoning\n\nmore reasoning"))
Expect(cleaned).To(Equal("contentfinal content"))
})
It("should handle empty content", func() {
reasoning, cleaned := Extract("", WithThinkingForcedOpen())
Expect(reasoning).To(BeEmpty())
Expect(cleaned).To(BeEmpty())
})
It("should handle only closing tag", func() {
content := "</think>only content"
reasoning, cleaned := Extract(content, WithThinkingForcedOpen())
Expect(reasoning).To(BeEmpty())
Expect(cleaned).To(Equal("only content"))
})
It("should find earliest closing tag", func() {
// </think> comes before </thinking>
content := "Reasoning</think>content</thinking>more"
reasoning, cleaned := Extract(content, WithThinkingForcedOpen())
Expect(reasoning).To(Equal("Reasoning"))
Expect(cleaned).To(Equal("content</thinking>more"))
})
It("should handle Command R7B closing tag", func() {
content := "Reasoning content<|END_THINKING|>actual response"
reasoning, cleaned := Extract(content, WithThinkingForcedOpen())
Expect(reasoning).To(Equal("Reasoning content"))
Expect(cleaned).To(Equal("actual response"))
})
It("should handle Apertus closing tag", func() {
content := "Reasoning content<|inner_suffix|>actual response"
reasoning, cleaned := Extract(content, WithThinkingForcedOpen())
Expect(reasoning).To(Equal("Reasoning content"))
Expect(cleaned).To(Equal("actual response"))
})
It("should handle Seed closing tag", func() {
content := "Reasoning content</seed:think>actual response"
reasoning, cleaned := Extract(content, WithThinkingForcedOpen())
Expect(reasoning).To(Equal("Reasoning content"))
Expect(cleaned).To(Equal("actual response"))
})
It("should handle Magistral closing tag", func() {
content := "Reasoning content[/THINK]actual response"
reasoning, cleaned := Extract(content, WithThinkingForcedOpen())
Expect(reasoning).To(Equal("Reasoning content"))
Expect(cleaned).To(Equal("actual response"))
})
})
Context("with model-specific tag pairs", func() {
It("should extract Command R7B reasoning tags", func() {
content := "Before <|START_THINKING|>reasoning here<|END_THINKING|> After"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(Equal("reasoning here"))
Expect(cleaned).To(Equal("Before After"))
})
It("should extract Apertus reasoning tags", func() {
content := "Before <|inner_prefix|>reasoning here<|inner_suffix|> After"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(Equal("reasoning here"))
Expect(cleaned).To(Equal("Before After"))
})
It("should extract Seed reasoning tags", func() {
content := "Before <seed:think>reasoning here</seed:think> After"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(Equal("reasoning here"))
Expect(cleaned).To(Equal("Before After"))
})
It("should extract Magistral reasoning tags", func() {
content := "Before [THINK]reasoning here[/THINK] After"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(Equal("reasoning here"))
Expect(cleaned).To(Equal("Before After"))
})
It("should handle unclosed Command R7B tag", func() {
content := "Before <|START_THINKING|>reasoning still streaming"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(Equal("reasoning still streaming"))
Expect(cleaned).To(Equal("Before "))
})
It("should handle unclosed Apertus tag", func() {
content := "Before <|inner_prefix|>reasoning still streaming"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(Equal("reasoning still streaming"))
Expect(cleaned).To(Equal("Before "))
})
It("should handle unclosed Seed tag", func() {
content := "Before <seed:think>reasoning still streaming"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(Equal("reasoning still streaming"))
Expect(cleaned).To(Equal("Before "))
})
It("should handle unclosed Magistral tag", func() {
content := "Before [THINK]reasoning still streaming"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(Equal("reasoning still streaming"))
Expect(cleaned).To(Equal("Before "))
})
It("should handle closing-only Command R7B tag", func() {
content := "Reasoning content<|END_THINKING|>actual response"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(Equal("Reasoning content"))
Expect(cleaned).To(Equal("actual response"))
})
It("should handle closing-only Apertus tag", func() {
content := "Reasoning content<|inner_suffix|>actual response"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(Equal("Reasoning content"))
Expect(cleaned).To(Equal("actual response"))
})
It("should handle closing-only Seed tag", func() {
content := "Reasoning content</seed:think>actual response"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(Equal("Reasoning content"))
Expect(cleaned).To(Equal("actual response"))
})
It("should handle closing-only Magistral tag", func() {
content := "Reasoning content[/THINK]actual response"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(Equal("Reasoning content"))
Expect(cleaned).To(Equal("actual response"))
})
})
})