mirror of
https://github.com/mudler/LocalAI.git
synced 2026-01-21 12:51:18 -05:00
* chore: extract reasoning to its own package Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * make sure we detect thinking tokens from template Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Allow to override via config, add tests Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
507 lines
18 KiB
Go
507 lines
18 KiB
Go
package reasoning_test
|
|
|
|
import (
|
|
"strings"
|
|
|
|
. "github.com/mudler/LocalAI/pkg/reasoning"
|
|
. "github.com/onsi/ginkgo/v2"
|
|
. "github.com/onsi/gomega"
|
|
)
|
|
|
|
var _ = Describe("ExtractReasoning", func() {
|
|
Context("when content has no reasoning tags", func() {
|
|
It("should return empty reasoning and original content", func() {
|
|
content := "This is regular content without any tags."
|
|
reasoning, cleaned := ExtractReasoning(content)
|
|
Expect(reasoning).To(BeEmpty())
|
|
Expect(cleaned).To(Equal(content))
|
|
})
|
|
|
|
It("should handle empty string", func() {
|
|
content := ""
|
|
reasoning, cleaned := ExtractReasoning(content)
|
|
Expect(reasoning).To(BeEmpty())
|
|
Expect(cleaned).To(BeEmpty())
|
|
})
|
|
|
|
It("should handle content with only whitespace", func() {
|
|
content := " \n\t "
|
|
reasoning, cleaned := ExtractReasoning(content)
|
|
Expect(reasoning).To(BeEmpty())
|
|
Expect(cleaned).To(Equal(content))
|
|
})
|
|
})
|
|
|
|
Context("when content has <thinking> tags", func() {
|
|
It("should extract reasoning from single thinking block", func() {
|
|
content := "Some text <thinking>This is my reasoning</thinking> More text"
|
|
reasoning, cleaned := ExtractReasoning(content)
|
|
Expect(reasoning).To(Equal("This is my reasoning"))
|
|
Expect(cleaned).To(Equal("Some text More text"))
|
|
})
|
|
|
|
It("should extract reasoning and preserve surrounding content", func() {
|
|
content := "Before <thinking>Reasoning here</thinking> After"
|
|
reasoning, cleaned := ExtractReasoning(content)
|
|
Expect(reasoning).To(Equal("Reasoning here"))
|
|
Expect(cleaned).To(Equal("Before After"))
|
|
})
|
|
|
|
It("should handle thinking block at the start", func() {
|
|
content := "<thinking>Start reasoning</thinking> Regular content"
|
|
reasoning, cleaned := ExtractReasoning(content)
|
|
Expect(reasoning).To(Equal("Start reasoning"))
|
|
Expect(cleaned).To(Equal(" Regular content"))
|
|
})
|
|
|
|
It("should handle thinking block at the end", func() {
|
|
content := "Regular content <thinking>End reasoning</thinking>"
|
|
reasoning, cleaned := ExtractReasoning(content)
|
|
Expect(reasoning).To(Equal("End reasoning"))
|
|
Expect(cleaned).To(Equal("Regular content "))
|
|
})
|
|
|
|
It("should handle only thinking block", func() {
|
|
content := "<thinking>Only reasoning</thinking>"
|
|
reasoning, cleaned := ExtractReasoning(content)
|
|
Expect(reasoning).To(Equal("Only reasoning"))
|
|
Expect(cleaned).To(BeEmpty())
|
|
})
|
|
|
|
It("should trim whitespace from reasoning content", func() {
|
|
content := "Text <thinking> \n Reasoning with spaces \n </thinking> More"
|
|
reasoning, cleaned := ExtractReasoning(content)
|
|
Expect(reasoning).To(Equal("Reasoning with spaces"))
|
|
Expect(cleaned).To(Equal("Text More"))
|
|
})
|
|
})
|
|
|
|
Context("when content has <think> tags", func() {
|
|
It("should extract reasoning from redacted_reasoning block", func() {
|
|
content := "Text <think>Redacted reasoning</think> More"
|
|
reasoning, cleaned := ExtractReasoning(content)
|
|
Expect(reasoning).To(Equal("Redacted reasoning"))
|
|
Expect(cleaned).To(Equal("Text More"))
|
|
})
|
|
|
|
It("should handle redacted_reasoning with multiline content", func() {
|
|
content := "Before <think>Line 1\nLine 2\nLine 3</think> After"
|
|
reasoning, cleaned := ExtractReasoning(content)
|
|
Expect(reasoning).To(Equal("Line 1\nLine 2\nLine 3"))
|
|
Expect(cleaned).To(Equal("Before After"))
|
|
})
|
|
|
|
It("should handle redacted_reasoning with complex content", func() {
|
|
content := "Start <think>Complex reasoning\nwith\nmultiple\nlines</think> End"
|
|
reasoning, cleaned := ExtractReasoning(content)
|
|
Expect(reasoning).To(Equal("Complex reasoning\nwith\nmultiple\nlines"))
|
|
Expect(cleaned).To(Equal("Start End"))
|
|
})
|
|
})
|
|
|
|
Context("when content has multiple reasoning blocks", func() {
|
|
It("should concatenate multiple thinking blocks with newlines", func() {
|
|
content := "Text <thinking>First</thinking> Middle <thinking>Second</thinking> End"
|
|
reasoning, cleaned := ExtractReasoning(content)
|
|
Expect(reasoning).To(Equal("First\n\nSecond"))
|
|
Expect(cleaned).To(Equal("Text Middle End"))
|
|
})
|
|
|
|
It("should handle multiple different tag types", func() {
|
|
content := "A <thinking>One</thinking> B <think>Two</think> C <think>Three</think> D"
|
|
reasoning, cleaned := ExtractReasoning(content)
|
|
Expect(reasoning).To(ContainSubstring("One"))
|
|
Expect(reasoning).To(ContainSubstring("Two"))
|
|
Expect(reasoning).To(ContainSubstring("Three"))
|
|
Expect(cleaned).To(Equal("A B C D"))
|
|
})
|
|
|
|
It("should handle nested tags correctly (extracts first match)", func() {
|
|
content := "Text <thinking>Outer <think>Inner</think></thinking> More"
|
|
reasoning, cleaned := ExtractReasoning(content)
|
|
// Should extract the outer thinking block
|
|
Expect(reasoning).To(ContainSubstring("Outer"))
|
|
Expect(reasoning).To(ContainSubstring("Inner"))
|
|
Expect(cleaned).To(Equal("Text More"))
|
|
})
|
|
})
|
|
|
|
Context("when content has unclosed reasoning tags", func() {
|
|
It("should extract unclosed thinking block", func() {
|
|
content := "Text <thinking>Unclosed reasoning"
|
|
reasoning, cleaned := ExtractReasoning(content)
|
|
Expect(reasoning).To(Equal("Unclosed reasoning"))
|
|
Expect(cleaned).To(Equal("Text "))
|
|
})
|
|
|
|
It("should extract unclosed think block", func() {
|
|
content := "Before <think>Incomplete"
|
|
reasoning, cleaned := ExtractReasoning(content)
|
|
Expect(reasoning).To(Equal("Incomplete"))
|
|
Expect(cleaned).To(Equal("Before "))
|
|
})
|
|
|
|
It("should extract unclosed redacted_reasoning block", func() {
|
|
content := "Start <think>Partial reasoning content"
|
|
reasoning, cleaned := ExtractReasoning(content)
|
|
Expect(reasoning).To(Equal("Partial reasoning content"))
|
|
Expect(cleaned).To(Equal("Start "))
|
|
})
|
|
|
|
It("should handle unclosed tag at the end", func() {
|
|
content := "Regular content <thinking>Unclosed at end"
|
|
reasoning, cleaned := ExtractReasoning(content)
|
|
Expect(reasoning).To(Equal("Unclosed at end"))
|
|
Expect(cleaned).To(Equal("Regular content "))
|
|
})
|
|
})
|
|
|
|
Context("when content has empty reasoning blocks", func() {
|
|
It("should ignore empty thinking block", func() {
|
|
content := "Text <thinking></thinking> More"
|
|
reasoning, cleaned := ExtractReasoning(content)
|
|
Expect(reasoning).To(BeEmpty())
|
|
Expect(cleaned).To(Equal("Text More"))
|
|
})
|
|
|
|
It("should ignore thinking block with only whitespace", func() {
|
|
content := "Text <thinking> \n\t </thinking> More"
|
|
reasoning, cleaned := ExtractReasoning(content)
|
|
Expect(reasoning).To(BeEmpty())
|
|
Expect(cleaned).To(Equal("Text More"))
|
|
})
|
|
})
|
|
|
|
Context("when content has reasoning tags with special characters", func() {
|
|
It("should handle reasoning with newlines", func() {
|
|
content := "Before <thinking>Line 1\nLine 2\nLine 3</thinking> After"
|
|
reasoning, cleaned := ExtractReasoning(content)
|
|
Expect(reasoning).To(Equal("Line 1\nLine 2\nLine 3"))
|
|
Expect(cleaned).To(Equal("Before After"))
|
|
})
|
|
|
|
It("should handle reasoning with code blocks", func() {
|
|
content := "Text <thinking>Reasoning with ```code``` blocks</thinking> More"
|
|
reasoning, cleaned := ExtractReasoning(content)
|
|
Expect(reasoning).To(Equal("Reasoning with ```code``` blocks"))
|
|
Expect(cleaned).To(Equal("Text More"))
|
|
})
|
|
|
|
It("should handle reasoning with JSON", func() {
|
|
content := "Before <think>{\"key\": \"value\"}</think> After"
|
|
reasoning, cleaned := ExtractReasoning(content)
|
|
Expect(reasoning).To(Equal("{\"key\": \"value\"}"))
|
|
Expect(cleaned).To(Equal("Before After"))
|
|
})
|
|
|
|
It("should handle reasoning with HTML-like content", func() {
|
|
content := "Text <thinking>Reasoning with <tags> inside</thinking> More"
|
|
reasoning, cleaned := ExtractReasoning(content)
|
|
Expect(reasoning).To(Equal("Reasoning with <tags> inside"))
|
|
Expect(cleaned).To(Equal("Text More"))
|
|
})
|
|
})
|
|
|
|
Context("when content has reasoning mixed with regular content", func() {
|
|
It("should preserve content order correctly", func() {
|
|
content := "Start <thinking>Reasoning</thinking> Middle <think>More reasoning</think> End"
|
|
reasoning, cleaned := ExtractReasoning(content)
|
|
Expect(reasoning).To(ContainSubstring("Reasoning"))
|
|
Expect(reasoning).To(ContainSubstring("More reasoning"))
|
|
Expect(cleaned).To(Equal("Start Middle End"))
|
|
})
|
|
|
|
It("should handle reasoning in the middle of a sentence", func() {
|
|
content := "This is a <thinking>reasoning</thinking> sentence."
|
|
reasoning, cleaned := ExtractReasoning(content)
|
|
Expect(reasoning).To(Equal("reasoning"))
|
|
Expect(cleaned).To(Equal("This is a sentence."))
|
|
})
|
|
})
|
|
|
|
Context("edge cases", func() {
|
|
It("should handle content with only opening tag", func() {
|
|
content := "<thinking>"
|
|
reasoning, cleaned := ExtractReasoning(content)
|
|
Expect(reasoning).To(BeEmpty())
|
|
Expect(cleaned).To(Equal(""))
|
|
})
|
|
|
|
It("should handle content with only closing tag", func() {
|
|
content := "</thinking>"
|
|
reasoning, cleaned := ExtractReasoning(content)
|
|
Expect(reasoning).To(BeEmpty())
|
|
Expect(cleaned).To(Equal("</thinking>"))
|
|
})
|
|
|
|
It("should handle mismatched tags", func() {
|
|
content := "<thinking>Content</think>"
|
|
reasoning, cleaned := ExtractReasoning(content)
|
|
// Should extract unclosed thinking block
|
|
Expect(reasoning).To(ContainSubstring("Content"))
|
|
Expect(cleaned).To(Equal(""))
|
|
})
|
|
|
|
It("should handle very long reasoning content", func() {
|
|
longReasoning := strings.Repeat("This is reasoning content. ", 100)
|
|
content := "Text <thinking>" + longReasoning + "</thinking> More"
|
|
reasoning, cleaned := ExtractReasoning(content)
|
|
// TrimSpace is applied, so we need to account for that
|
|
Expect(reasoning).To(Equal(strings.TrimSpace(longReasoning)))
|
|
Expect(cleaned).To(Equal("Text More"))
|
|
})
|
|
|
|
It("should handle reasoning with unicode characters", func() {
|
|
content := "Text <thinking>Reasoning with 中文 and emoji 🧠</thinking> More"
|
|
reasoning, cleaned := ExtractReasoning(content)
|
|
Expect(reasoning).To(Equal("Reasoning with 中文 and emoji 🧠"))
|
|
Expect(cleaned).To(Equal("Text More"))
|
|
})
|
|
})
|
|
|
|
Context("when content has <|START_THINKING|> tags (Command-R)", func() {
|
|
It("should extract reasoning from START_THINKING block", func() {
|
|
content := "Text <|START_THINKING|>Command-R reasoning<|END_THINKING|> More"
|
|
reasoning, cleaned := ExtractReasoning(content)
|
|
Expect(reasoning).To(Equal("Command-R reasoning"))
|
|
Expect(cleaned).To(Equal("Text More"))
|
|
})
|
|
|
|
It("should handle unclosed START_THINKING block", func() {
|
|
content := "Before <|START_THINKING|>Incomplete reasoning"
|
|
reasoning, cleaned := ExtractReasoning(content)
|
|
Expect(reasoning).To(Equal("Incomplete reasoning"))
|
|
Expect(cleaned).To(Equal("Before "))
|
|
})
|
|
})
|
|
|
|
Context("when content has <|inner_prefix|> tags (Apertus)", func() {
|
|
It("should extract reasoning from inner_prefix block", func() {
|
|
content := "Text <|inner_prefix|>Apertus reasoning<|inner_suffix|> More"
|
|
reasoning, cleaned := ExtractReasoning(content)
|
|
Expect(reasoning).To(Equal("Apertus reasoning"))
|
|
Expect(cleaned).To(Equal("Text More"))
|
|
})
|
|
})
|
|
|
|
Context("when content has <seed:think> tags (Seed)", func() {
|
|
It("should extract reasoning from seed:think block", func() {
|
|
content := "Text <seed:think>Seed reasoning</seed:think> More"
|
|
reasoning, cleaned := ExtractReasoning(content)
|
|
Expect(reasoning).To(Equal("Seed reasoning"))
|
|
Expect(cleaned).To(Equal("Text More"))
|
|
})
|
|
})
|
|
|
|
Context("when content has <|think|> tags (Solar Open)", func() {
|
|
It("should extract reasoning from Solar Open think block", func() {
|
|
content := "Text <|think|>Solar reasoning<|end|><|begin|>assistant<|content|> More"
|
|
reasoning, cleaned := ExtractReasoning(content)
|
|
Expect(reasoning).To(Equal("Solar reasoning"))
|
|
Expect(cleaned).To(Equal("Text More"))
|
|
})
|
|
})
|
|
|
|
Context("when content has [THINK] tags (Magistral)", func() {
|
|
It("should extract reasoning from THINK block", func() {
|
|
content := "Text [THINK]Magistral reasoning[/THINK] More"
|
|
reasoning, cleaned := ExtractReasoning(content)
|
|
Expect(reasoning).To(Equal("Magistral reasoning"))
|
|
Expect(cleaned).To(Equal("Text More"))
|
|
})
|
|
|
|
It("should handle unclosed THINK block", func() {
|
|
content := "Before [THINK]Incomplete reasoning"
|
|
reasoning, cleaned := ExtractReasoning(content)
|
|
Expect(reasoning).To(Equal("Incomplete reasoning"))
|
|
Expect(cleaned).To(Equal("Before "))
|
|
})
|
|
})
|
|
})
|
|
|
|
var _ = Describe("DetectThinkingStartToken", func() {
|
|
Context("when prompt contains thinking start tokens", func() {
|
|
It("should detect <|START_THINKING|> at the end", func() {
|
|
prompt := "Some prompt text <|START_THINKING|>"
|
|
token := DetectThinkingStartToken(prompt)
|
|
Expect(token).To(Equal("<|START_THINKING|>"))
|
|
})
|
|
|
|
It("should detect <think> at the end", func() {
|
|
prompt := "Prompt with <think>"
|
|
token := DetectThinkingStartToken(prompt)
|
|
Expect(token).To(Equal("<think>"))
|
|
})
|
|
|
|
It("should detect <thinking> at the end", func() {
|
|
prompt := "Some text <thinking>"
|
|
token := DetectThinkingStartToken(prompt)
|
|
Expect(token).To(Equal("<thinking>"))
|
|
})
|
|
|
|
It("should detect <|inner_prefix|> at the end", func() {
|
|
prompt := "Prompt <|inner_prefix|>"
|
|
token := DetectThinkingStartToken(prompt)
|
|
Expect(token).To(Equal("<|inner_prefix|>"))
|
|
})
|
|
|
|
It("should detect <seed:think> at the end", func() {
|
|
prompt := "Text <seed:think>"
|
|
token := DetectThinkingStartToken(prompt)
|
|
Expect(token).To(Equal("<seed:think>"))
|
|
})
|
|
|
|
It("should detect <|think|> at the end", func() {
|
|
prompt := "Prompt <|think|>"
|
|
token := DetectThinkingStartToken(prompt)
|
|
Expect(token).To(Equal("<|think|>"))
|
|
})
|
|
|
|
It("should detect [THINK] at the end", func() {
|
|
prompt := "Text [THINK]"
|
|
token := DetectThinkingStartToken(prompt)
|
|
Expect(token).To(Equal("[THINK]"))
|
|
})
|
|
|
|
It("should handle trailing whitespace", func() {
|
|
prompt := "Prompt <|START_THINKING|> \n\t "
|
|
token := DetectThinkingStartToken(prompt)
|
|
Expect(token).To(Equal("<|START_THINKING|>"))
|
|
})
|
|
|
|
It("should detect token near the end (within last 100 chars)", func() {
|
|
prefix := strings.Repeat("x", 50)
|
|
prompt := prefix + "<|START_THINKING|>"
|
|
token := DetectThinkingStartToken(prompt)
|
|
Expect(token).To(Equal("<|START_THINKING|>"))
|
|
})
|
|
|
|
It("should detect token when followed by only whitespace", func() {
|
|
prompt := "Text <think> \n "
|
|
token := DetectThinkingStartToken(prompt)
|
|
Expect(token).To(Equal("<think>"))
|
|
})
|
|
})
|
|
|
|
Context("when prompt does not contain thinking tokens", func() {
|
|
It("should return empty string for regular prompt", func() {
|
|
prompt := "This is a regular prompt without thinking tokens"
|
|
token := DetectThinkingStartToken(prompt)
|
|
Expect(token).To(BeEmpty())
|
|
})
|
|
|
|
It("should return empty string for empty prompt", func() {
|
|
prompt := ""
|
|
token := DetectThinkingStartToken(prompt)
|
|
Expect(token).To(BeEmpty())
|
|
})
|
|
|
|
It("should detect token even when far from end (Contains check)", func() {
|
|
prefix := strings.Repeat("x", 150)
|
|
prompt := prefix + "<|START_THINKING|>"
|
|
token := DetectThinkingStartToken(prompt)
|
|
// Current implementation uses Contains, so it finds tokens anywhere
|
|
Expect(token).To(Equal("<|START_THINKING|>"))
|
|
})
|
|
|
|
It("should detect token even when followed by non-whitespace (Contains check)", func() {
|
|
prompt := "Text <|START_THINKING|>more text"
|
|
token := DetectThinkingStartToken(prompt)
|
|
// Current implementation uses Contains, so it finds tokens anywhere
|
|
Expect(token).To(Equal("<|START_THINKING|>"))
|
|
})
|
|
})
|
|
|
|
Context("when multiple tokens are present", func() {
|
|
It("should return the first matching token (most specific)", func() {
|
|
prompt := "Text <|START_THINKING|> <thinking>"
|
|
token := DetectThinkingStartToken(prompt)
|
|
// Should return the first one found (order matters)
|
|
Expect(token).To(Equal("<|START_THINKING|>"))
|
|
})
|
|
})
|
|
})
|
|
|
|
var _ = Describe("PrependThinkingTokenIfNeeded", func() {
|
|
Context("when startToken is empty", func() {
|
|
It("should return content unchanged", func() {
|
|
content := "Some content"
|
|
result := PrependThinkingTokenIfNeeded(content, "")
|
|
Expect(result).To(Equal(content))
|
|
})
|
|
})
|
|
|
|
Context("when content already starts with token", func() {
|
|
It("should not prepend if content starts with token", func() {
|
|
content := "<|START_THINKING|>Reasoning content"
|
|
result := PrependThinkingTokenIfNeeded(content, "<|START_THINKING|>")
|
|
Expect(result).To(Equal(content))
|
|
})
|
|
|
|
It("should not prepend if content starts with token after whitespace", func() {
|
|
content := " <think>Reasoning"
|
|
result := PrependThinkingTokenIfNeeded(content, "<think>")
|
|
Expect(result).To(Equal(content))
|
|
})
|
|
|
|
It("should not prepend if token appears anywhere in content", func() {
|
|
content := "Some text <thinking>Reasoning</thinking>"
|
|
result := PrependThinkingTokenIfNeeded(content, "<thinking>")
|
|
// With Contains check, it should not prepend
|
|
Expect(result).To(Equal(content))
|
|
})
|
|
})
|
|
|
|
Context("when content does not contain token", func() {
|
|
It("should prepend token to content", func() {
|
|
content := "Reasoning content"
|
|
result := PrependThinkingTokenIfNeeded(content, "<|START_THINKING|>")
|
|
Expect(result).To(Equal("<|START_THINKING|>Reasoning content"))
|
|
})
|
|
|
|
It("should prepend token after leading whitespace", func() {
|
|
content := " \n Reasoning content"
|
|
result := PrependThinkingTokenIfNeeded(content, "<think>")
|
|
Expect(result).To(Equal(" \n <think>Reasoning content"))
|
|
})
|
|
|
|
It("should handle empty content", func() {
|
|
content := ""
|
|
result := PrependThinkingTokenIfNeeded(content, "<thinking>")
|
|
Expect(result).To(Equal("<thinking>"))
|
|
})
|
|
|
|
It("should handle content with only whitespace", func() {
|
|
content := " \n\t "
|
|
result := PrependThinkingTokenIfNeeded(content, "<|START_THINKING|>")
|
|
Expect(result).To(Equal(" \n\t <|START_THINKING|>"))
|
|
})
|
|
})
|
|
|
|
Context("with different token types", func() {
|
|
It("should prepend <|START_THINKING|>", func() {
|
|
content := "Reasoning"
|
|
result := PrependThinkingTokenIfNeeded(content, "<|START_THINKING|>")
|
|
Expect(result).To(Equal("<|START_THINKING|>Reasoning"))
|
|
})
|
|
|
|
It("should prepend <think>", func() {
|
|
content := "Reasoning"
|
|
result := PrependThinkingTokenIfNeeded(content, "<think>")
|
|
Expect(result).To(Equal("<think>Reasoning"))
|
|
})
|
|
|
|
It("should prepend <thinking>", func() {
|
|
content := "Reasoning"
|
|
result := PrependThinkingTokenIfNeeded(content, "<thinking>")
|
|
Expect(result).To(Equal("<thinking>Reasoning"))
|
|
})
|
|
|
|
It("should prepend [THINK]", func() {
|
|
content := "Reasoning"
|
|
result := PrependThinkingTokenIfNeeded(content, "[THINK]")
|
|
Expect(result).To(Equal("[THINK]Reasoning"))
|
|
})
|
|
})
|
|
})
|