mirror of
https://github.com/mudler/LocalAI.git
synced 2026-01-21 12:51:18 -05:00
fix(reasoning): support models with reasoning without starting thinking tag (#8132)
* chore: extract reasoning to its own package Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * make sure we detect thinking tokens from template Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Allow to override via config, add tests Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
committed by
GitHub
parent
e886bb291a
commit
34e054f607
@@ -62,16 +62,23 @@ func guessGGUFFromFile(cfg *ModelConfig, f *gguf.GGUFFile, defaultCtx int) {
|
||||
cfg.NGPULayers = &defaultHigh
|
||||
}
|
||||
|
||||
xlog.Debug("guessDefaultsFromFile: NGPULayers set", "NGPULayers", cfg.NGPULayers)
|
||||
xlog.Debug("[gguf] guessDefaultsFromFile: NGPULayers set", "NGPULayers", cfg.NGPULayers, "modelName", f.Metadata().Name)
|
||||
|
||||
// identify from well known templates first, otherwise use the raw jinja template
|
||||
chatTemplate, found := f.Header.MetadataKV.Get("tokenizer.chat_template")
|
||||
if found {
|
||||
// fill jinja template
|
||||
cfg.modelTemplate = chatTemplate.ValueString()
|
||||
}
|
||||
|
||||
// template estimations
|
||||
if cfg.HasTemplate() {
|
||||
// nothing to guess here
|
||||
xlog.Debug("guessDefaultsFromFile: template already set", "name", cfg.Name)
|
||||
xlog.Debug("[gguf] guessDefaultsFromFile: template already set", "name", cfg.Name, "modelName", f.Metadata().Name)
|
||||
return
|
||||
}
|
||||
|
||||
xlog.Debug("Model file loaded", "file", cfg.ModelFileName(), "eosTokenID", f.Tokenizer().EOSTokenID, "bosTokenID", f.Tokenizer().BOSTokenID, "modelName", f.Metadata().Name, "architecture", f.Architecture().Architecture)
|
||||
xlog.Debug("[gguf] Model file loaded", "file", cfg.ModelFileName(), "eosTokenID", f.Tokenizer().EOSTokenID, "bosTokenID", f.Tokenizer().BOSTokenID, "modelName", f.Metadata().Name, "architecture", f.Architecture().Architecture)
|
||||
|
||||
// guess the name
|
||||
if cfg.Name == "" {
|
||||
@@ -83,4 +90,5 @@ func guessGGUFFromFile(cfg *ModelConfig, f *gguf.GGUFFile, defaultCtx int) {
|
||||
cfg.FunctionsConfig.GrammarConfig.NoGrammar = true
|
||||
cfg.Options = append(cfg.Options, "use_jinja:true")
|
||||
cfg.KnownUsecaseStrings = append(cfg.KnownUsecaseStrings, "FLAG_CHAT")
|
||||
|
||||
}
|
||||
|
||||
@@ -10,6 +10,7 @@ import (
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
"github.com/mudler/LocalAI/pkg/downloader"
|
||||
"github.com/mudler/LocalAI/pkg/functions"
|
||||
"github.com/mudler/LocalAI/pkg/reasoning"
|
||||
"github.com/mudler/cogito"
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
@@ -30,6 +31,7 @@ type TTSConfig struct {
|
||||
// @Description ModelConfig represents a model configuration
|
||||
type ModelConfig struct {
|
||||
modelConfigFile string `yaml:"-" json:"-"`
|
||||
modelTemplate string `yaml:"-" json:"-"`
|
||||
schema.PredictionOptions `yaml:"parameters,omitempty" json:"parameters,omitempty"`
|
||||
Name string `yaml:"name,omitempty" json:"name,omitempty"`
|
||||
|
||||
@@ -51,6 +53,7 @@ type ModelConfig struct {
|
||||
ResponseFormatMap map[string]interface{} `yaml:"-" json:"-"`
|
||||
|
||||
FunctionsConfig functions.FunctionsConfig `yaml:"function,omitempty" json:"function,omitempty"`
|
||||
ReasoningConfig reasoning.Config `yaml:"reasoning,omitempty" json:"reasoning,omitempty"`
|
||||
|
||||
FeatureFlag FeatureFlag `yaml:"feature_flags,omitempty" json:"feature_flags,omitempty"` // Feature Flag registry. We move fast, and features may break on a per model/backend basis. Registry for (usually temporary) flags that indicate aborting something early.
|
||||
// LLM configs (GPT4ALL, Llama.cpp, ...)
|
||||
@@ -521,6 +524,11 @@ func (c *ModelConfig) GetModelConfigFile() string {
|
||||
return c.modelConfigFile
|
||||
}
|
||||
|
||||
// GetModelTemplate returns the model's chat template if available
|
||||
func (c *ModelConfig) GetModelTemplate() string {
|
||||
return c.modelTemplate
|
||||
}
|
||||
|
||||
type ModelConfigUsecase int
|
||||
|
||||
const (
|
||||
|
||||
@@ -13,6 +13,7 @@ import (
|
||||
"github.com/mudler/LocalAI/core/http/middleware"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
"github.com/mudler/LocalAI/pkg/functions"
|
||||
reason "github.com/mudler/LocalAI/pkg/reasoning"
|
||||
|
||||
"github.com/mudler/LocalAI/core/templates"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
@@ -38,6 +39,16 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
|
||||
}
|
||||
responses <- initialMessage
|
||||
|
||||
// Detect if thinking token is already in prompt or template
|
||||
// When UseTokenizerTemplate is enabled, predInput is empty, so we check the template
|
||||
var template string
|
||||
if config.TemplateConfig.UseTokenizerTemplate {
|
||||
template = config.GetModelTemplate()
|
||||
} else {
|
||||
template = s
|
||||
}
|
||||
thinkingStartToken := reason.DetectThinkingStartToken(template)
|
||||
|
||||
// Track accumulated content for reasoning extraction
|
||||
accumulatedContent := ""
|
||||
lastEmittedReasoning := ""
|
||||
@@ -45,8 +56,12 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
|
||||
|
||||
_, _, err := ComputeChoices(req, s, config, cl, startupOptions, loader, func(s string, c *[]schema.Choice) {}, func(s string, tokenUsage backend.TokenUsage) bool {
|
||||
accumulatedContent += s
|
||||
// Extract reasoning from accumulated content
|
||||
currentReasoning, cleanedContent := functions.ExtractReasoning(accumulatedContent)
|
||||
content := accumulatedContent
|
||||
// Prepend thinking token if needed, then extract reasoning
|
||||
if config.ReasoningConfig.DisableReasoningTagPrefill == nil || !*config.ReasoningConfig.DisableReasoningTagPrefill {
|
||||
content = reason.PrependThinkingTokenIfNeeded(content, thinkingStartToken)
|
||||
}
|
||||
currentReasoning, cleanedContent := reason.ExtractReasoning(content)
|
||||
|
||||
// Calculate new reasoning delta (what we haven't emitted yet)
|
||||
var reasoningDelta *string
|
||||
@@ -118,6 +133,15 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
|
||||
return err
|
||||
}
|
||||
processTools := func(noAction string, prompt string, req *schema.OpenAIRequest, config *config.ModelConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse, extraUsage bool) error {
|
||||
// Detect if thinking token is already in prompt or template
|
||||
var template string
|
||||
if config.TemplateConfig.UseTokenizerTemplate {
|
||||
template = config.GetModelTemplate()
|
||||
} else {
|
||||
template = prompt
|
||||
}
|
||||
thinkingStartToken := reason.DetectThinkingStartToken(template)
|
||||
|
||||
result := ""
|
||||
lastEmittedCount := 0
|
||||
_, tokenUsage, err := ComputeChoices(req, prompt, config, cl, startupOptions, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool {
|
||||
@@ -229,8 +253,12 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// Extract reasoning before processing tool calls
|
||||
reasoning, cleanedResult := functions.ExtractReasoning(result)
|
||||
// Prepend thinking token if needed, then extract reasoning before processing tool calls
|
||||
resultWithToken := result
|
||||
if config.ReasoningConfig.DisableReasoningTagPrefill == nil || !*config.ReasoningConfig.DisableReasoningTagPrefill {
|
||||
resultWithToken = reason.PrependThinkingTokenIfNeeded(result, thinkingStartToken)
|
||||
}
|
||||
reasoning, cleanedResult := reason.ExtractReasoning(resultWithToken)
|
||||
result = cleanedResult
|
||||
|
||||
textContentToReturn = functions.ParseTextContent(result, config.FunctionsConfig)
|
||||
@@ -617,10 +645,24 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
|
||||
|
||||
// no streaming mode
|
||||
default:
|
||||
// Detect if thinking token is already in prompt or template
|
||||
var template string
|
||||
if config.TemplateConfig.UseTokenizerTemplate {
|
||||
template = config.GetModelTemplate() // TODO: this should be the parsed jinja template. But for now this is the best we can do.
|
||||
} else {
|
||||
template = predInput
|
||||
}
|
||||
thinkingStartToken := reason.DetectThinkingStartToken(template)
|
||||
|
||||
xlog.Debug("Thinking start token", "thinkingStartToken", thinkingStartToken, "template", template)
|
||||
|
||||
tokenCallback := func(s string, c *[]schema.Choice) {
|
||||
// Extract reasoning from the response
|
||||
reasoning, cleanedS := functions.ExtractReasoning(s)
|
||||
// Prepend thinking token if needed, then extract reasoning from the response
|
||||
sWithToken := s
|
||||
if config.ReasoningConfig.DisableReasoningTagPrefill == nil || !*config.ReasoningConfig.DisableReasoningTagPrefill {
|
||||
sWithToken = reason.PrependThinkingTokenIfNeeded(s, thinkingStartToken)
|
||||
}
|
||||
reasoning, cleanedS := reason.ExtractReasoning(sWithToken)
|
||||
s = cleanedS
|
||||
|
||||
if !shouldUseFn {
|
||||
|
||||
@@ -1,114 +0,0 @@
|
||||
package functions
|
||||
|
||||
import (
|
||||
"strings"
|
||||
)
|
||||
|
||||
// ExtractReasoning extracts reasoning content from thinking tags and returns
|
||||
// both the extracted reasoning and the cleaned content (with tags removed).
|
||||
// It handles <thinking>...</thinking> and <think>...</think> tags.
|
||||
// Multiple reasoning blocks are concatenated with newlines.
|
||||
func ExtractReasoning(content string) (reasoning string, cleanedContent string) {
|
||||
if content == "" {
|
||||
return "", content
|
||||
}
|
||||
|
||||
var reasoningParts []string
|
||||
var cleanedParts []string
|
||||
remaining := content
|
||||
|
||||
// Define tag pairs to look for
|
||||
tagPairs := []struct {
|
||||
start string
|
||||
end string
|
||||
}{
|
||||
{"<thinking>", "</thinking>"},
|
||||
{"<think>", "</think>"},
|
||||
}
|
||||
|
||||
// Track the last position we've processed
|
||||
lastPos := 0
|
||||
|
||||
for {
|
||||
// Find the earliest tag start
|
||||
earliestStart := -1
|
||||
earliestEnd := -1
|
||||
isUnclosed := false
|
||||
var matchedTag struct {
|
||||
start string
|
||||
end string
|
||||
}
|
||||
|
||||
for _, tagPair := range tagPairs {
|
||||
startIdx := strings.Index(remaining[lastPos:], tagPair.start)
|
||||
if startIdx == -1 {
|
||||
continue
|
||||
}
|
||||
startIdx += lastPos
|
||||
|
||||
// Find the corresponding end tag
|
||||
endIdx := strings.Index(remaining[startIdx+len(tagPair.start):], tagPair.end)
|
||||
if endIdx == -1 {
|
||||
// Unclosed tag - extract what we have
|
||||
if earliestStart == -1 || startIdx < earliestStart {
|
||||
earliestStart = startIdx
|
||||
earliestEnd = len(remaining)
|
||||
isUnclosed = true
|
||||
matchedTag = tagPair
|
||||
}
|
||||
continue
|
||||
}
|
||||
endIdx += startIdx + len(tagPair.start)
|
||||
|
||||
// Found a complete tag pair
|
||||
if earliestStart == -1 || startIdx < earliestStart {
|
||||
earliestStart = startIdx
|
||||
earliestEnd = endIdx + len(tagPair.end)
|
||||
isUnclosed = false
|
||||
matchedTag = tagPair
|
||||
}
|
||||
}
|
||||
|
||||
if earliestStart == -1 {
|
||||
// No more tags found, add remaining content
|
||||
if lastPos < len(remaining) {
|
||||
cleanedParts = append(cleanedParts, remaining[lastPos:])
|
||||
}
|
||||
break
|
||||
}
|
||||
|
||||
// Add content before the tag
|
||||
if earliestStart > lastPos {
|
||||
cleanedParts = append(cleanedParts, remaining[lastPos:earliestStart])
|
||||
}
|
||||
|
||||
// Extract reasoning content
|
||||
reasoningStart := earliestStart + len(matchedTag.start)
|
||||
// For unclosed tags, earliestEnd is already at the end of the string
|
||||
// For closed tags, earliestEnd points to after the closing tag, so we subtract the end tag length
|
||||
var reasoningEnd int
|
||||
if isUnclosed {
|
||||
// Unclosed tag - extract everything to the end
|
||||
reasoningEnd = len(remaining)
|
||||
} else {
|
||||
// Closed tag - exclude the end tag
|
||||
reasoningEnd = earliestEnd - len(matchedTag.end)
|
||||
}
|
||||
if reasoningEnd > reasoningStart {
|
||||
reasoningContent := strings.TrimSpace(remaining[reasoningStart:reasoningEnd])
|
||||
if reasoningContent != "" {
|
||||
reasoningParts = append(reasoningParts, reasoningContent)
|
||||
}
|
||||
}
|
||||
|
||||
// Move past this tag
|
||||
lastPos = earliestEnd
|
||||
}
|
||||
|
||||
// Combine reasoning parts
|
||||
reasoning = strings.Join(reasoningParts, "\n\n")
|
||||
// Combine cleaned content parts
|
||||
cleanedContent = strings.Join(cleanedParts, "")
|
||||
|
||||
return reasoning, cleanedContent
|
||||
}
|
||||
@@ -393,7 +393,7 @@ func (wd *WatchDog) checkMemory() {
|
||||
memoryType = "RAM"
|
||||
}
|
||||
|
||||
xlog.Debug("[WatchDog] Memory check", "type", memoryType, "usage_percent", aggregate.UsagePercent, "threshold_percent", thresholdPercent, "loaded_models", modelCount)
|
||||
//xlog.Debug("[WatchDog] Memory check", "type", memoryType, "usage_percent", aggregate.UsagePercent, "threshold_percent", thresholdPercent, "loaded_models", modelCount)
|
||||
|
||||
// Check if usage exceeds threshold
|
||||
if aggregate.UsagePercent > thresholdPercent {
|
||||
|
||||
5
pkg/reasoning/config.go
Normal file
5
pkg/reasoning/config.go
Normal file
@@ -0,0 +1,5 @@
|
||||
package reasoning
|
||||
|
||||
type Config struct {
|
||||
DisableReasoningTagPrefill *bool `yaml:"disable_reasoning_tag_prefill,omitempty" json:"disable_reasoning_tag_prefill,omitempty"`
|
||||
}
|
||||
208
pkg/reasoning/reasoning.go
Normal file
208
pkg/reasoning/reasoning.go
Normal file
@@ -0,0 +1,208 @@
|
||||
package reasoning
|
||||
|
||||
import (
|
||||
"strings"
|
||||
)
|
||||
|
||||
// DetectThinkingStartToken checks if the prompt or template contains a thinking start token
|
||||
// and returns the detected token. This indicates that the model's prompt template
|
||||
// already includes the thinking token, so the model output will start with reasoning
|
||||
// content without an explicit opening tag.
|
||||
// Returns the detected token if found, empty string otherwise.
|
||||
// Common tokens checked (in order of specificity - longer first):
|
||||
// Based on llama.cpp's chat-parser.cpp implementations:
|
||||
// - <|START_THINKING|> (Command-R models)
|
||||
// - <|inner_prefix|> (Apertus models)
|
||||
// - <seed:think> (Seed models)
|
||||
// - <think> (DeepSeek, Granite, ExaOne models)
|
||||
// - <|think|> (Solar Open models)
|
||||
// - <thinking> (General thinking tag)
|
||||
// - <think> (GLM models)
|
||||
// - [THINK] (Magistral models)
|
||||
func DetectThinkingStartToken(prompt string) string {
|
||||
// Common thinking start tokens (in order of specificity - longer first)
|
||||
// Based on llama.cpp's chat-parser.cpp implementations
|
||||
thinkingStartTokens := []string{
|
||||
"<|START_THINKING|>", // Command-R models
|
||||
"<|inner_prefix|>", // Apertus models
|
||||
"<seed:think>", // Seed models
|
||||
"<think>", // DeepSeek, Granite, ExaOne models
|
||||
"<|think|>", // Solar Open models
|
||||
"<thinking>", // General thinking tag
|
||||
"[THINK]", // Magistral models
|
||||
}
|
||||
|
||||
// Check if prompt ends with any of these tokens (allowing for trailing whitespace/newlines)
|
||||
trimmedPrompt := strings.TrimRight(prompt, " \t\n\r")
|
||||
for _, token := range thinkingStartTokens {
|
||||
if strings.Contains(trimmedPrompt, token) {
|
||||
return token
|
||||
}
|
||||
}
|
||||
|
||||
// Also check if any of these tokens appear near the end (within last 100 chars)
|
||||
// This handles cases where there might be stop tokens or other content after
|
||||
if len(trimmedPrompt) > 100 {
|
||||
lastPart := trimmedPrompt[len(trimmedPrompt)-100:]
|
||||
for _, token := range thinkingStartTokens {
|
||||
if idx := strings.LastIndex(lastPart, token); idx != -1 {
|
||||
// Check if this is the last meaningful content (only whitespace after)
|
||||
afterToken := lastPart[idx+len(token):]
|
||||
if strings.TrimSpace(afterToken) == "" {
|
||||
return token
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
// PrependThinkingTokenIfNeeded prepends the thinking start token to content if it was
|
||||
// detected in the prompt. This allows the standard extraction logic to work correctly
|
||||
// for models where the thinking token is already in the prompt.
|
||||
func PrependThinkingTokenIfNeeded(content string, startToken string) string {
|
||||
if startToken == "" {
|
||||
return content
|
||||
}
|
||||
|
||||
// Check if content already starts with the token (allowing for leading whitespace)
|
||||
trimmed := strings.TrimLeftFunc(content, func(r rune) bool {
|
||||
return r == ' ' || r == '\t' || r == '\n' || r == '\r'
|
||||
})
|
||||
|
||||
// If content already starts with the token, don't prepend
|
||||
if strings.Contains(trimmed, startToken) {
|
||||
return content
|
||||
}
|
||||
|
||||
// Find where leading whitespace ends
|
||||
whitespaceEnd := 0
|
||||
for whitespaceEnd < len(content) {
|
||||
r := content[whitespaceEnd]
|
||||
if r != ' ' && r != '\t' && r != '\n' && r != '\r' {
|
||||
break
|
||||
}
|
||||
whitespaceEnd++
|
||||
}
|
||||
|
||||
// Prepend the token after whitespace to make it look like normal tagged content
|
||||
if whitespaceEnd > 0 {
|
||||
return content[:whitespaceEnd] + startToken + content[whitespaceEnd:]
|
||||
}
|
||||
return startToken + content
|
||||
}
|
||||
|
||||
// ExtractReasoning extracts reasoning content from thinking tags and returns
|
||||
// both the extracted reasoning and the cleaned content (with tags removed).
|
||||
// It handles <thinking>...</thinking> and <think>...</think> tags.
|
||||
// Multiple reasoning blocks are concatenated with newlines.
|
||||
func ExtractReasoning(content string) (reasoning string, cleanedContent string) {
|
||||
if content == "" {
|
||||
return "", content
|
||||
}
|
||||
|
||||
var reasoningParts []string
|
||||
var cleanedParts []string
|
||||
remaining := content
|
||||
|
||||
// Define tag pairs to look for (matching llama.cpp's chat-parser.cpp)
|
||||
tagPairs := []struct {
|
||||
start string
|
||||
end string
|
||||
}{
|
||||
{"<|START_THINKING|>", "<|END_THINKING|>"}, // Command-R models
|
||||
{"<|inner_prefix|>", "<|inner_suffix|>"}, // Apertus models
|
||||
{"<seed:think>", "</seed:think>"}, // Seed models
|
||||
{"<think>", "</think>"}, // DeepSeek, Granite, ExaOne models
|
||||
{"<|think|>", "<|end|><|begin|>assistant<|content|>"}, // Solar Open models (complex end)
|
||||
{"<thinking>", "</thinking>"}, // General thinking tag
|
||||
{"[THINK]", "[/THINK]"}, // Magistral models
|
||||
}
|
||||
|
||||
// Track the last position we've processed
|
||||
lastPos := 0
|
||||
|
||||
for {
|
||||
// Find the earliest tag start
|
||||
earliestStart := -1
|
||||
earliestEnd := -1
|
||||
isUnclosed := false
|
||||
var matchedTag struct {
|
||||
start string
|
||||
end string
|
||||
}
|
||||
|
||||
for _, tagPair := range tagPairs {
|
||||
startIdx := strings.Index(remaining[lastPos:], tagPair.start)
|
||||
if startIdx == -1 {
|
||||
continue
|
||||
}
|
||||
startIdx += lastPos
|
||||
|
||||
// Find the corresponding end tag
|
||||
endIdx := strings.Index(remaining[startIdx+len(tagPair.start):], tagPair.end)
|
||||
if endIdx == -1 {
|
||||
// Unclosed tag - extract what we have
|
||||
if earliestStart == -1 || startIdx < earliestStart {
|
||||
earliestStart = startIdx
|
||||
earliestEnd = len(remaining)
|
||||
isUnclosed = true
|
||||
matchedTag = tagPair
|
||||
}
|
||||
continue
|
||||
}
|
||||
endIdx += startIdx + len(tagPair.start)
|
||||
|
||||
// Found a complete tag pair
|
||||
if earliestStart == -1 || startIdx < earliestStart {
|
||||
earliestStart = startIdx
|
||||
earliestEnd = endIdx + len(tagPair.end)
|
||||
isUnclosed = false
|
||||
matchedTag = tagPair
|
||||
}
|
||||
}
|
||||
|
||||
if earliestStart == -1 {
|
||||
// No more tags found, add remaining content
|
||||
if lastPos < len(remaining) {
|
||||
cleanedParts = append(cleanedParts, remaining[lastPos:])
|
||||
}
|
||||
break
|
||||
}
|
||||
|
||||
// Add content before the tag
|
||||
if earliestStart > lastPos {
|
||||
cleanedParts = append(cleanedParts, remaining[lastPos:earliestStart])
|
||||
}
|
||||
|
||||
// Extract reasoning content
|
||||
reasoningStart := earliestStart + len(matchedTag.start)
|
||||
// For unclosed tags, earliestEnd is already at the end of the string
|
||||
// For closed tags, earliestEnd points to after the closing tag, so we subtract the end tag length
|
||||
var reasoningEnd int
|
||||
if isUnclosed {
|
||||
// Unclosed tag - extract everything to the end
|
||||
reasoningEnd = len(remaining)
|
||||
} else {
|
||||
// Closed tag - exclude the end tag
|
||||
reasoningEnd = earliestEnd - len(matchedTag.end)
|
||||
}
|
||||
if reasoningEnd > reasoningStart {
|
||||
reasoningContent := strings.TrimSpace(remaining[reasoningStart:reasoningEnd])
|
||||
if reasoningContent != "" {
|
||||
reasoningParts = append(reasoningParts, reasoningContent)
|
||||
}
|
||||
}
|
||||
|
||||
// Move past this tag
|
||||
lastPos = earliestEnd
|
||||
}
|
||||
|
||||
// Combine reasoning parts
|
||||
reasoning = strings.Join(reasoningParts, "\n\n")
|
||||
// Combine cleaned content parts
|
||||
cleanedContent = strings.Join(cleanedParts, "")
|
||||
|
||||
return reasoning, cleanedContent
|
||||
}
|
||||
13
pkg/reasoning/reasoning_suite_test.go
Normal file
13
pkg/reasoning/reasoning_suite_test.go
Normal file
@@ -0,0 +1,13 @@
|
||||
package reasoning_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
func TestReasoning(t *testing.T) {
|
||||
RegisterFailHandler(Fail)
|
||||
RunSpecs(t, "Reasoning test suite")
|
||||
}
|
||||
@@ -1,9 +1,9 @@
|
||||
package functions_test
|
||||
package reasoning_test
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
. "github.com/mudler/LocalAI/pkg/functions"
|
||||
. "github.com/mudler/LocalAI/pkg/reasoning"
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
@@ -258,4 +258,249 @@ var _ = Describe("ExtractReasoning", func() {
|
||||
Expect(cleaned).To(Equal("Text More"))
|
||||
})
|
||||
})
|
||||
|
||||
Context("when content has <|START_THINKING|> tags (Command-R)", func() {
|
||||
It("should extract reasoning from START_THINKING block", func() {
|
||||
content := "Text <|START_THINKING|>Command-R reasoning<|END_THINKING|> More"
|
||||
reasoning, cleaned := ExtractReasoning(content)
|
||||
Expect(reasoning).To(Equal("Command-R reasoning"))
|
||||
Expect(cleaned).To(Equal("Text More"))
|
||||
})
|
||||
|
||||
It("should handle unclosed START_THINKING block", func() {
|
||||
content := "Before <|START_THINKING|>Incomplete reasoning"
|
||||
reasoning, cleaned := ExtractReasoning(content)
|
||||
Expect(reasoning).To(Equal("Incomplete reasoning"))
|
||||
Expect(cleaned).To(Equal("Before "))
|
||||
})
|
||||
})
|
||||
|
||||
Context("when content has <|inner_prefix|> tags (Apertus)", func() {
|
||||
It("should extract reasoning from inner_prefix block", func() {
|
||||
content := "Text <|inner_prefix|>Apertus reasoning<|inner_suffix|> More"
|
||||
reasoning, cleaned := ExtractReasoning(content)
|
||||
Expect(reasoning).To(Equal("Apertus reasoning"))
|
||||
Expect(cleaned).To(Equal("Text More"))
|
||||
})
|
||||
})
|
||||
|
||||
Context("when content has <seed:think> tags (Seed)", func() {
|
||||
It("should extract reasoning from seed:think block", func() {
|
||||
content := "Text <seed:think>Seed reasoning</seed:think> More"
|
||||
reasoning, cleaned := ExtractReasoning(content)
|
||||
Expect(reasoning).To(Equal("Seed reasoning"))
|
||||
Expect(cleaned).To(Equal("Text More"))
|
||||
})
|
||||
})
|
||||
|
||||
Context("when content has <|think|> tags (Solar Open)", func() {
|
||||
It("should extract reasoning from Solar Open think block", func() {
|
||||
content := "Text <|think|>Solar reasoning<|end|><|begin|>assistant<|content|> More"
|
||||
reasoning, cleaned := ExtractReasoning(content)
|
||||
Expect(reasoning).To(Equal("Solar reasoning"))
|
||||
Expect(cleaned).To(Equal("Text More"))
|
||||
})
|
||||
})
|
||||
|
||||
Context("when content has [THINK] tags (Magistral)", func() {
|
||||
It("should extract reasoning from THINK block", func() {
|
||||
content := "Text [THINK]Magistral reasoning[/THINK] More"
|
||||
reasoning, cleaned := ExtractReasoning(content)
|
||||
Expect(reasoning).To(Equal("Magistral reasoning"))
|
||||
Expect(cleaned).To(Equal("Text More"))
|
||||
})
|
||||
|
||||
It("should handle unclosed THINK block", func() {
|
||||
content := "Before [THINK]Incomplete reasoning"
|
||||
reasoning, cleaned := ExtractReasoning(content)
|
||||
Expect(reasoning).To(Equal("Incomplete reasoning"))
|
||||
Expect(cleaned).To(Equal("Before "))
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
var _ = Describe("DetectThinkingStartToken", func() {
|
||||
Context("when prompt contains thinking start tokens", func() {
|
||||
It("should detect <|START_THINKING|> at the end", func() {
|
||||
prompt := "Some prompt text <|START_THINKING|>"
|
||||
token := DetectThinkingStartToken(prompt)
|
||||
Expect(token).To(Equal("<|START_THINKING|>"))
|
||||
})
|
||||
|
||||
It("should detect <think> at the end", func() {
|
||||
prompt := "Prompt with <think>"
|
||||
token := DetectThinkingStartToken(prompt)
|
||||
Expect(token).To(Equal("<think>"))
|
||||
})
|
||||
|
||||
It("should detect <thinking> at the end", func() {
|
||||
prompt := "Some text <thinking>"
|
||||
token := DetectThinkingStartToken(prompt)
|
||||
Expect(token).To(Equal("<thinking>"))
|
||||
})
|
||||
|
||||
It("should detect <|inner_prefix|> at the end", func() {
|
||||
prompt := "Prompt <|inner_prefix|>"
|
||||
token := DetectThinkingStartToken(prompt)
|
||||
Expect(token).To(Equal("<|inner_prefix|>"))
|
||||
})
|
||||
|
||||
It("should detect <seed:think> at the end", func() {
|
||||
prompt := "Text <seed:think>"
|
||||
token := DetectThinkingStartToken(prompt)
|
||||
Expect(token).To(Equal("<seed:think>"))
|
||||
})
|
||||
|
||||
It("should detect <|think|> at the end", func() {
|
||||
prompt := "Prompt <|think|>"
|
||||
token := DetectThinkingStartToken(prompt)
|
||||
Expect(token).To(Equal("<|think|>"))
|
||||
})
|
||||
|
||||
It("should detect [THINK] at the end", func() {
|
||||
prompt := "Text [THINK]"
|
||||
token := DetectThinkingStartToken(prompt)
|
||||
Expect(token).To(Equal("[THINK]"))
|
||||
})
|
||||
|
||||
It("should handle trailing whitespace", func() {
|
||||
prompt := "Prompt <|START_THINKING|> \n\t "
|
||||
token := DetectThinkingStartToken(prompt)
|
||||
Expect(token).To(Equal("<|START_THINKING|>"))
|
||||
})
|
||||
|
||||
It("should detect token near the end (within last 100 chars)", func() {
|
||||
prefix := strings.Repeat("x", 50)
|
||||
prompt := prefix + "<|START_THINKING|>"
|
||||
token := DetectThinkingStartToken(prompt)
|
||||
Expect(token).To(Equal("<|START_THINKING|>"))
|
||||
})
|
||||
|
||||
It("should detect token when followed by only whitespace", func() {
|
||||
prompt := "Text <think> \n "
|
||||
token := DetectThinkingStartToken(prompt)
|
||||
Expect(token).To(Equal("<think>"))
|
||||
})
|
||||
})
|
||||
|
||||
Context("when prompt does not contain thinking tokens", func() {
|
||||
It("should return empty string for regular prompt", func() {
|
||||
prompt := "This is a regular prompt without thinking tokens"
|
||||
token := DetectThinkingStartToken(prompt)
|
||||
Expect(token).To(BeEmpty())
|
||||
})
|
||||
|
||||
It("should return empty string for empty prompt", func() {
|
||||
prompt := ""
|
||||
token := DetectThinkingStartToken(prompt)
|
||||
Expect(token).To(BeEmpty())
|
||||
})
|
||||
|
||||
It("should detect token even when far from end (Contains check)", func() {
|
||||
prefix := strings.Repeat("x", 150)
|
||||
prompt := prefix + "<|START_THINKING|>"
|
||||
token := DetectThinkingStartToken(prompt)
|
||||
// Current implementation uses Contains, so it finds tokens anywhere
|
||||
Expect(token).To(Equal("<|START_THINKING|>"))
|
||||
})
|
||||
|
||||
It("should detect token even when followed by non-whitespace (Contains check)", func() {
|
||||
prompt := "Text <|START_THINKING|>more text"
|
||||
token := DetectThinkingStartToken(prompt)
|
||||
// Current implementation uses Contains, so it finds tokens anywhere
|
||||
Expect(token).To(Equal("<|START_THINKING|>"))
|
||||
})
|
||||
})
|
||||
|
||||
Context("when multiple tokens are present", func() {
|
||||
It("should return the first matching token (most specific)", func() {
|
||||
prompt := "Text <|START_THINKING|> <thinking>"
|
||||
token := DetectThinkingStartToken(prompt)
|
||||
// Should return the first one found (order matters)
|
||||
Expect(token).To(Equal("<|START_THINKING|>"))
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
var _ = Describe("PrependThinkingTokenIfNeeded", func() {
|
||||
Context("when startToken is empty", func() {
|
||||
It("should return content unchanged", func() {
|
||||
content := "Some content"
|
||||
result := PrependThinkingTokenIfNeeded(content, "")
|
||||
Expect(result).To(Equal(content))
|
||||
})
|
||||
})
|
||||
|
||||
Context("when content already starts with token", func() {
|
||||
It("should not prepend if content starts with token", func() {
|
||||
content := "<|START_THINKING|>Reasoning content"
|
||||
result := PrependThinkingTokenIfNeeded(content, "<|START_THINKING|>")
|
||||
Expect(result).To(Equal(content))
|
||||
})
|
||||
|
||||
It("should not prepend if content starts with token after whitespace", func() {
|
||||
content := " <think>Reasoning"
|
||||
result := PrependThinkingTokenIfNeeded(content, "<think>")
|
||||
Expect(result).To(Equal(content))
|
||||
})
|
||||
|
||||
It("should not prepend if token appears anywhere in content", func() {
|
||||
content := "Some text <thinking>Reasoning</thinking>"
|
||||
result := PrependThinkingTokenIfNeeded(content, "<thinking>")
|
||||
// With Contains check, it should not prepend
|
||||
Expect(result).To(Equal(content))
|
||||
})
|
||||
})
|
||||
|
||||
Context("when content does not contain token", func() {
|
||||
It("should prepend token to content", func() {
|
||||
content := "Reasoning content"
|
||||
result := PrependThinkingTokenIfNeeded(content, "<|START_THINKING|>")
|
||||
Expect(result).To(Equal("<|START_THINKING|>Reasoning content"))
|
||||
})
|
||||
|
||||
It("should prepend token after leading whitespace", func() {
|
||||
content := " \n Reasoning content"
|
||||
result := PrependThinkingTokenIfNeeded(content, "<think>")
|
||||
Expect(result).To(Equal(" \n <think>Reasoning content"))
|
||||
})
|
||||
|
||||
It("should handle empty content", func() {
|
||||
content := ""
|
||||
result := PrependThinkingTokenIfNeeded(content, "<thinking>")
|
||||
Expect(result).To(Equal("<thinking>"))
|
||||
})
|
||||
|
||||
It("should handle content with only whitespace", func() {
|
||||
content := " \n\t "
|
||||
result := PrependThinkingTokenIfNeeded(content, "<|START_THINKING|>")
|
||||
Expect(result).To(Equal(" \n\t <|START_THINKING|>"))
|
||||
})
|
||||
})
|
||||
|
||||
Context("with different token types", func() {
|
||||
It("should prepend <|START_THINKING|>", func() {
|
||||
content := "Reasoning"
|
||||
result := PrependThinkingTokenIfNeeded(content, "<|START_THINKING|>")
|
||||
Expect(result).To(Equal("<|START_THINKING|>Reasoning"))
|
||||
})
|
||||
|
||||
It("should prepend <think>", func() {
|
||||
content := "Reasoning"
|
||||
result := PrependThinkingTokenIfNeeded(content, "<think>")
|
||||
Expect(result).To(Equal("<think>Reasoning"))
|
||||
})
|
||||
|
||||
It("should prepend <thinking>", func() {
|
||||
content := "Reasoning"
|
||||
result := PrependThinkingTokenIfNeeded(content, "<thinking>")
|
||||
Expect(result).To(Equal("<thinking>Reasoning"))
|
||||
})
|
||||
|
||||
It("should prepend [THINK]", func() {
|
||||
content := "Reasoning"
|
||||
result := PrependThinkingTokenIfNeeded(content, "[THINK]")
|
||||
Expect(result).To(Equal("[THINK]Reasoning"))
|
||||
})
|
||||
})
|
||||
})
|
||||
@@ -569,7 +569,7 @@ func getIntelGPUTop() []GPUMemoryInfo {
|
||||
cmd.Stderr = &stderr
|
||||
|
||||
if err := cmd.Run(); err != nil {
|
||||
xlog.Debug("intel_gpu_top failed", "error", err, "stderr", stderr.String())
|
||||
xlog.Debug("intel_gpu_top failed", "error", err, "stderr", stderr.String(), "stdout", stdout.String())
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user