diff --git a/core/config/gguf.go b/core/config/gguf.go
index f63acd35f..507466d60 100644
--- a/core/config/gguf.go
+++ b/core/config/gguf.go
@@ -62,16 +62,23 @@ func guessGGUFFromFile(cfg *ModelConfig, f *gguf.GGUFFile, defaultCtx int) {
cfg.NGPULayers = &defaultHigh
}
- xlog.Debug("guessDefaultsFromFile: NGPULayers set", "NGPULayers", cfg.NGPULayers)
+ xlog.Debug("[gguf] guessDefaultsFromFile: NGPULayers set", "NGPULayers", cfg.NGPULayers, "modelName", f.Metadata().Name)
+
+ // identify from well known templates first, otherwise use the raw jinja template
+ chatTemplate, found := f.Header.MetadataKV.Get("tokenizer.chat_template")
+ if found {
+ // fill jinja template
+ cfg.modelTemplate = chatTemplate.ValueString()
+ }
// template estimations
if cfg.HasTemplate() {
// nothing to guess here
- xlog.Debug("guessDefaultsFromFile: template already set", "name", cfg.Name)
+ xlog.Debug("[gguf] guessDefaultsFromFile: template already set", "name", cfg.Name, "modelName", f.Metadata().Name)
return
}
- xlog.Debug("Model file loaded", "file", cfg.ModelFileName(), "eosTokenID", f.Tokenizer().EOSTokenID, "bosTokenID", f.Tokenizer().BOSTokenID, "modelName", f.Metadata().Name, "architecture", f.Architecture().Architecture)
+ xlog.Debug("[gguf] Model file loaded", "file", cfg.ModelFileName(), "eosTokenID", f.Tokenizer().EOSTokenID, "bosTokenID", f.Tokenizer().BOSTokenID, "modelName", f.Metadata().Name, "architecture", f.Architecture().Architecture)
// guess the name
if cfg.Name == "" {
@@ -83,4 +90,5 @@ func guessGGUFFromFile(cfg *ModelConfig, f *gguf.GGUFFile, defaultCtx int) {
cfg.FunctionsConfig.GrammarConfig.NoGrammar = true
cfg.Options = append(cfg.Options, "use_jinja:true")
cfg.KnownUsecaseStrings = append(cfg.KnownUsecaseStrings, "FLAG_CHAT")
+
}
diff --git a/core/config/model_config.go b/core/config/model_config.go
index 9010c84e6..6841abf57 100644
--- a/core/config/model_config.go
+++ b/core/config/model_config.go
@@ -10,6 +10,7 @@ import (
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/pkg/downloader"
"github.com/mudler/LocalAI/pkg/functions"
+ "github.com/mudler/LocalAI/pkg/reasoning"
"github.com/mudler/cogito"
"gopkg.in/yaml.v3"
)
@@ -30,6 +31,7 @@ type TTSConfig struct {
// @Description ModelConfig represents a model configuration
type ModelConfig struct {
modelConfigFile string `yaml:"-" json:"-"`
+ modelTemplate string `yaml:"-" json:"-"`
schema.PredictionOptions `yaml:"parameters,omitempty" json:"parameters,omitempty"`
Name string `yaml:"name,omitempty" json:"name,omitempty"`
@@ -51,6 +53,7 @@ type ModelConfig struct {
ResponseFormatMap map[string]interface{} `yaml:"-" json:"-"`
FunctionsConfig functions.FunctionsConfig `yaml:"function,omitempty" json:"function,omitempty"`
+ ReasoningConfig reasoning.Config `yaml:"reasoning,omitempty" json:"reasoning,omitempty"`
FeatureFlag FeatureFlag `yaml:"feature_flags,omitempty" json:"feature_flags,omitempty"` // Feature Flag registry. We move fast, and features may break on a per model/backend basis. Registry for (usually temporary) flags that indicate aborting something early.
// LLM configs (GPT4ALL, Llama.cpp, ...)
@@ -521,6 +524,11 @@ func (c *ModelConfig) GetModelConfigFile() string {
return c.modelConfigFile
}
+// GetModelTemplate returns the model's chat template if available
+func (c *ModelConfig) GetModelTemplate() string {
+ return c.modelTemplate
+}
+
type ModelConfigUsecase int
const (
diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go
index 4ece68d5c..2075a0368 100644
--- a/core/http/endpoints/openai/chat.go
+++ b/core/http/endpoints/openai/chat.go
@@ -13,6 +13,7 @@ import (
"github.com/mudler/LocalAI/core/http/middleware"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/pkg/functions"
+ reason "github.com/mudler/LocalAI/pkg/reasoning"
"github.com/mudler/LocalAI/core/templates"
"github.com/mudler/LocalAI/pkg/model"
@@ -38,6 +39,16 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
}
responses <- initialMessage
+ // Detect if thinking token is already in prompt or template
+ // When UseTokenizerTemplate is enabled, predInput is empty, so we check the template
+ var template string
+ if config.TemplateConfig.UseTokenizerTemplate {
+ template = config.GetModelTemplate()
+ } else {
+ template = s
+ }
+ thinkingStartToken := reason.DetectThinkingStartToken(template)
+
// Track accumulated content for reasoning extraction
accumulatedContent := ""
lastEmittedReasoning := ""
@@ -45,8 +56,12 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
_, _, err := ComputeChoices(req, s, config, cl, startupOptions, loader, func(s string, c *[]schema.Choice) {}, func(s string, tokenUsage backend.TokenUsage) bool {
accumulatedContent += s
- // Extract reasoning from accumulated content
- currentReasoning, cleanedContent := functions.ExtractReasoning(accumulatedContent)
+ content := accumulatedContent
+ // Prepend thinking token if needed, then extract reasoning
+ if config.ReasoningConfig.DisableReasoningTagPrefill == nil || !*config.ReasoningConfig.DisableReasoningTagPrefill {
+ content = reason.PrependThinkingTokenIfNeeded(content, thinkingStartToken)
+ }
+ currentReasoning, cleanedContent := reason.ExtractReasoning(content)
// Calculate new reasoning delta (what we haven't emitted yet)
var reasoningDelta *string
@@ -118,6 +133,15 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
return err
}
processTools := func(noAction string, prompt string, req *schema.OpenAIRequest, config *config.ModelConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse, extraUsage bool) error {
+ // Detect if thinking token is already in prompt or template
+ var template string
+ if config.TemplateConfig.UseTokenizerTemplate {
+ template = config.GetModelTemplate()
+ } else {
+ template = prompt
+ }
+ thinkingStartToken := reason.DetectThinkingStartToken(template)
+
result := ""
lastEmittedCount := 0
_, tokenUsage, err := ComputeChoices(req, prompt, config, cl, startupOptions, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool {
@@ -229,8 +253,12 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
if err != nil {
return err
}
- // Extract reasoning before processing tool calls
- reasoning, cleanedResult := functions.ExtractReasoning(result)
+ // Prepend thinking token if needed, then extract reasoning before processing tool calls
+ resultWithToken := result
+ if config.ReasoningConfig.DisableReasoningTagPrefill == nil || !*config.ReasoningConfig.DisableReasoningTagPrefill {
+ resultWithToken = reason.PrependThinkingTokenIfNeeded(result, thinkingStartToken)
+ }
+ reasoning, cleanedResult := reason.ExtractReasoning(resultWithToken)
result = cleanedResult
textContentToReturn = functions.ParseTextContent(result, config.FunctionsConfig)
@@ -617,10 +645,24 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
// no streaming mode
default:
+ // Detect if thinking token is already in prompt or template
+ var template string
+ if config.TemplateConfig.UseTokenizerTemplate {
+ template = config.GetModelTemplate() // TODO: this should be the parsed jinja template. But for now this is the best we can do.
+ } else {
+ template = predInput
+ }
+ thinkingStartToken := reason.DetectThinkingStartToken(template)
+
+ xlog.Debug("Thinking start token", "thinkingStartToken", thinkingStartToken, "template", template)
tokenCallback := func(s string, c *[]schema.Choice) {
- // Extract reasoning from the response
- reasoning, cleanedS := functions.ExtractReasoning(s)
+ // Prepend thinking token if needed, then extract reasoning from the response
+ sWithToken := s
+ if config.ReasoningConfig.DisableReasoningTagPrefill == nil || !*config.ReasoningConfig.DisableReasoningTagPrefill {
+ sWithToken = reason.PrependThinkingTokenIfNeeded(s, thinkingStartToken)
+ }
+ reasoning, cleanedS := reason.ExtractReasoning(sWithToken)
s = cleanedS
if !shouldUseFn {
diff --git a/pkg/functions/reasoning.go b/pkg/functions/reasoning.go
deleted file mode 100644
index d3cf05808..000000000
--- a/pkg/functions/reasoning.go
+++ /dev/null
@@ -1,114 +0,0 @@
-package functions
-
-import (
- "strings"
-)
-
-// ExtractReasoning extracts reasoning content from thinking tags and returns
-// both the extracted reasoning and the cleaned content (with tags removed).
-// It handles ... and ... tags.
-// Multiple reasoning blocks are concatenated with newlines.
-func ExtractReasoning(content string) (reasoning string, cleanedContent string) {
- if content == "" {
- return "", content
- }
-
- var reasoningParts []string
- var cleanedParts []string
- remaining := content
-
- // Define tag pairs to look for
- tagPairs := []struct {
- start string
- end string
- }{
- {"", ""},
- {"", ""},
- }
-
- // Track the last position we've processed
- lastPos := 0
-
- for {
- // Find the earliest tag start
- earliestStart := -1
- earliestEnd := -1
- isUnclosed := false
- var matchedTag struct {
- start string
- end string
- }
-
- for _, tagPair := range tagPairs {
- startIdx := strings.Index(remaining[lastPos:], tagPair.start)
- if startIdx == -1 {
- continue
- }
- startIdx += lastPos
-
- // Find the corresponding end tag
- endIdx := strings.Index(remaining[startIdx+len(tagPair.start):], tagPair.end)
- if endIdx == -1 {
- // Unclosed tag - extract what we have
- if earliestStart == -1 || startIdx < earliestStart {
- earliestStart = startIdx
- earliestEnd = len(remaining)
- isUnclosed = true
- matchedTag = tagPair
- }
- continue
- }
- endIdx += startIdx + len(tagPair.start)
-
- // Found a complete tag pair
- if earliestStart == -1 || startIdx < earliestStart {
- earliestStart = startIdx
- earliestEnd = endIdx + len(tagPair.end)
- isUnclosed = false
- matchedTag = tagPair
- }
- }
-
- if earliestStart == -1 {
- // No more tags found, add remaining content
- if lastPos < len(remaining) {
- cleanedParts = append(cleanedParts, remaining[lastPos:])
- }
- break
- }
-
- // Add content before the tag
- if earliestStart > lastPos {
- cleanedParts = append(cleanedParts, remaining[lastPos:earliestStart])
- }
-
- // Extract reasoning content
- reasoningStart := earliestStart + len(matchedTag.start)
- // For unclosed tags, earliestEnd is already at the end of the string
- // For closed tags, earliestEnd points to after the closing tag, so we subtract the end tag length
- var reasoningEnd int
- if isUnclosed {
- // Unclosed tag - extract everything to the end
- reasoningEnd = len(remaining)
- } else {
- // Closed tag - exclude the end tag
- reasoningEnd = earliestEnd - len(matchedTag.end)
- }
- if reasoningEnd > reasoningStart {
- reasoningContent := strings.TrimSpace(remaining[reasoningStart:reasoningEnd])
- if reasoningContent != "" {
- reasoningParts = append(reasoningParts, reasoningContent)
- }
- }
-
- // Move past this tag
- lastPos = earliestEnd
- }
-
- // Combine reasoning parts
- reasoning = strings.Join(reasoningParts, "\n\n")
- // Combine cleaned content parts
- cleanedContent = strings.Join(cleanedParts, "")
-
- return reasoning, cleanedContent
-}
diff --git a/pkg/model/watchdog.go b/pkg/model/watchdog.go
index 95d540b45..090488a06 100644
--- a/pkg/model/watchdog.go
+++ b/pkg/model/watchdog.go
@@ -393,7 +393,7 @@ func (wd *WatchDog) checkMemory() {
memoryType = "RAM"
}
- xlog.Debug("[WatchDog] Memory check", "type", memoryType, "usage_percent", aggregate.UsagePercent, "threshold_percent", thresholdPercent, "loaded_models", modelCount)
+ //xlog.Debug("[WatchDog] Memory check", "type", memoryType, "usage_percent", aggregate.UsagePercent, "threshold_percent", thresholdPercent, "loaded_models", modelCount)
// Check if usage exceeds threshold
if aggregate.UsagePercent > thresholdPercent {
diff --git a/pkg/reasoning/config.go b/pkg/reasoning/config.go
new file mode 100644
index 000000000..0fc23cc19
--- /dev/null
+++ b/pkg/reasoning/config.go
@@ -0,0 +1,5 @@
+package reasoning
+
+type Config struct {
+ DisableReasoningTagPrefill *bool `yaml:"disable_reasoning_tag_prefill,omitempty" json:"disable_reasoning_tag_prefill,omitempty"`
+}
diff --git a/pkg/reasoning/reasoning.go b/pkg/reasoning/reasoning.go
new file mode 100644
index 000000000..6add81e75
--- /dev/null
+++ b/pkg/reasoning/reasoning.go
@@ -0,0 +1,208 @@
+package reasoning
+
+import (
+ "strings"
+)
+
+// DetectThinkingStartToken checks if the prompt or template contains a thinking start token
+// and returns the detected token. This indicates that the model's prompt template
+// already includes the thinking token, so the model output will start with reasoning
+// content without an explicit opening tag.
+// Returns the detected token if found, empty string otherwise.
+// Common tokens checked (in order of specificity - longer first):
+// Based on llama.cpp's chat-parser.cpp implementations:
+// - <|START_THINKING|> (Command-R models)
+// - <|inner_prefix|> (Apertus models)
+// - (Seed models)
+// - (DeepSeek, Granite, ExaOne models)
+// - <|think|> (Solar Open models)
+// - (General thinking tag)
+// - (GLM models)
+// - [THINK] (Magistral models)
+func DetectThinkingStartToken(prompt string) string {
+ // Common thinking start tokens (in order of specificity - longer first)
+ // Based on llama.cpp's chat-parser.cpp implementations
+ thinkingStartTokens := []string{
+ "<|START_THINKING|>", // Command-R models
+ "<|inner_prefix|>", // Apertus models
+ "", // Seed models
+ "", // DeepSeek, Granite, ExaOne models
+ "<|think|>", // Solar Open models
+ "", // General thinking tag
+ "[THINK]", // Magistral models
+ }
+
+ // Check if prompt ends with any of these tokens (allowing for trailing whitespace/newlines)
+ trimmedPrompt := strings.TrimRight(prompt, " \t\n\r")
+ for _, token := range thinkingStartTokens {
+ if strings.Contains(trimmedPrompt, token) {
+ return token
+ }
+ }
+
+ // Also check if any of these tokens appear near the end (within last 100 chars)
+ // This handles cases where there might be stop tokens or other content after
+ if len(trimmedPrompt) > 100 {
+ lastPart := trimmedPrompt[len(trimmedPrompt)-100:]
+ for _, token := range thinkingStartTokens {
+ if idx := strings.LastIndex(lastPart, token); idx != -1 {
+ // Check if this is the last meaningful content (only whitespace after)
+ afterToken := lastPart[idx+len(token):]
+ if strings.TrimSpace(afterToken) == "" {
+ return token
+ }
+ }
+ }
+ }
+
+ return ""
+}
+
+// PrependThinkingTokenIfNeeded prepends the thinking start token to content if it was
+// detected in the prompt. This allows the standard extraction logic to work correctly
+// for models where the thinking token is already in the prompt.
+func PrependThinkingTokenIfNeeded(content string, startToken string) string {
+ if startToken == "" {
+ return content
+ }
+
+ // Check if content already starts with the token (allowing for leading whitespace)
+ trimmed := strings.TrimLeftFunc(content, func(r rune) bool {
+ return r == ' ' || r == '\t' || r == '\n' || r == '\r'
+ })
+
+ // If content already starts with the token, don't prepend
+ if strings.Contains(trimmed, startToken) {
+ return content
+ }
+
+ // Find where leading whitespace ends
+ whitespaceEnd := 0
+ for whitespaceEnd < len(content) {
+ r := content[whitespaceEnd]
+ if r != ' ' && r != '\t' && r != '\n' && r != '\r' {
+ break
+ }
+ whitespaceEnd++
+ }
+
+ // Prepend the token after whitespace to make it look like normal tagged content
+ if whitespaceEnd > 0 {
+ return content[:whitespaceEnd] + startToken + content[whitespaceEnd:]
+ }
+ return startToken + content
+}
+
+// ExtractReasoning extracts reasoning content from thinking tags and returns
+// both the extracted reasoning and the cleaned content (with tags removed).
+// It handles ... and ... tags.
+// Multiple reasoning blocks are concatenated with newlines.
+func ExtractReasoning(content string) (reasoning string, cleanedContent string) {
+ if content == "" {
+ return "", content
+ }
+
+ var reasoningParts []string
+ var cleanedParts []string
+ remaining := content
+
+ // Define tag pairs to look for (matching llama.cpp's chat-parser.cpp)
+ tagPairs := []struct {
+ start string
+ end string
+ }{
+ {"<|START_THINKING|>", "<|END_THINKING|>"}, // Command-R models
+ {"<|inner_prefix|>", "<|inner_suffix|>"}, // Apertus models
+ {"", ""}, // Seed models
+ {"", ""}, // DeepSeek, Granite, ExaOne models
+ {"<|think|>", "<|end|><|begin|>assistant<|content|>"}, // Solar Open models (complex end)
+ {"", ""}, // General thinking tag
+ {"[THINK]", "[/THINK]"}, // Magistral models
+ }
+
+ // Track the last position we've processed
+ lastPos := 0
+
+ for {
+ // Find the earliest tag start
+ earliestStart := -1
+ earliestEnd := -1
+ isUnclosed := false
+ var matchedTag struct {
+ start string
+ end string
+ }
+
+ for _, tagPair := range tagPairs {
+ startIdx := strings.Index(remaining[lastPos:], tagPair.start)
+ if startIdx == -1 {
+ continue
+ }
+ startIdx += lastPos
+
+ // Find the corresponding end tag
+ endIdx := strings.Index(remaining[startIdx+len(tagPair.start):], tagPair.end)
+ if endIdx == -1 {
+ // Unclosed tag - extract what we have
+ if earliestStart == -1 || startIdx < earliestStart {
+ earliestStart = startIdx
+ earliestEnd = len(remaining)
+ isUnclosed = true
+ matchedTag = tagPair
+ }
+ continue
+ }
+ endIdx += startIdx + len(tagPair.start)
+
+ // Found a complete tag pair
+ if earliestStart == -1 || startIdx < earliestStart {
+ earliestStart = startIdx
+ earliestEnd = endIdx + len(tagPair.end)
+ isUnclosed = false
+ matchedTag = tagPair
+ }
+ }
+
+ if earliestStart == -1 {
+ // No more tags found, add remaining content
+ if lastPos < len(remaining) {
+ cleanedParts = append(cleanedParts, remaining[lastPos:])
+ }
+ break
+ }
+
+ // Add content before the tag
+ if earliestStart > lastPos {
+ cleanedParts = append(cleanedParts, remaining[lastPos:earliestStart])
+ }
+
+ // Extract reasoning content
+ reasoningStart := earliestStart + len(matchedTag.start)
+ // For unclosed tags, earliestEnd is already at the end of the string
+ // For closed tags, earliestEnd points to after the closing tag, so we subtract the end tag length
+ var reasoningEnd int
+ if isUnclosed {
+ // Unclosed tag - extract everything to the end
+ reasoningEnd = len(remaining)
+ } else {
+ // Closed tag - exclude the end tag
+ reasoningEnd = earliestEnd - len(matchedTag.end)
+ }
+ if reasoningEnd > reasoningStart {
+ reasoningContent := strings.TrimSpace(remaining[reasoningStart:reasoningEnd])
+ if reasoningContent != "" {
+ reasoningParts = append(reasoningParts, reasoningContent)
+ }
+ }
+
+ // Move past this tag
+ lastPos = earliestEnd
+ }
+
+ // Combine reasoning parts
+ reasoning = strings.Join(reasoningParts, "\n\n")
+ // Combine cleaned content parts
+ cleanedContent = strings.Join(cleanedParts, "")
+
+ return reasoning, cleanedContent
+}
diff --git a/pkg/reasoning/reasoning_suite_test.go b/pkg/reasoning/reasoning_suite_test.go
new file mode 100644
index 000000000..a1e03ab0a
--- /dev/null
+++ b/pkg/reasoning/reasoning_suite_test.go
@@ -0,0 +1,13 @@
+package reasoning_test
+
+import (
+ "testing"
+
+ . "github.com/onsi/ginkgo/v2"
+ . "github.com/onsi/gomega"
+)
+
+func TestReasoning(t *testing.T) {
+ RegisterFailHandler(Fail)
+ RunSpecs(t, "Reasoning test suite")
+}
diff --git a/pkg/functions/reasoning_test.go b/pkg/reasoning/reasoning_test.go
similarity index 53%
rename from pkg/functions/reasoning_test.go
rename to pkg/reasoning/reasoning_test.go
index 3f7d07541..f66eca55e 100644
--- a/pkg/functions/reasoning_test.go
+++ b/pkg/reasoning/reasoning_test.go
@@ -1,9 +1,9 @@
-package functions_test
+package reasoning_test
import (
"strings"
- . "github.com/mudler/LocalAI/pkg/functions"
+ . "github.com/mudler/LocalAI/pkg/reasoning"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
@@ -258,4 +258,249 @@ var _ = Describe("ExtractReasoning", func() {
Expect(cleaned).To(Equal("Text More"))
})
})
+
+ Context("when content has <|START_THINKING|> tags (Command-R)", func() {
+ It("should extract reasoning from START_THINKING block", func() {
+ content := "Text <|START_THINKING|>Command-R reasoning<|END_THINKING|> More"
+ reasoning, cleaned := ExtractReasoning(content)
+ Expect(reasoning).To(Equal("Command-R reasoning"))
+ Expect(cleaned).To(Equal("Text More"))
+ })
+
+ It("should handle unclosed START_THINKING block", func() {
+ content := "Before <|START_THINKING|>Incomplete reasoning"
+ reasoning, cleaned := ExtractReasoning(content)
+ Expect(reasoning).To(Equal("Incomplete reasoning"))
+ Expect(cleaned).To(Equal("Before "))
+ })
+ })
+
+ Context("when content has <|inner_prefix|> tags (Apertus)", func() {
+ It("should extract reasoning from inner_prefix block", func() {
+ content := "Text <|inner_prefix|>Apertus reasoning<|inner_suffix|> More"
+ reasoning, cleaned := ExtractReasoning(content)
+ Expect(reasoning).To(Equal("Apertus reasoning"))
+ Expect(cleaned).To(Equal("Text More"))
+ })
+ })
+
+ Context("when content has tags (Seed)", func() {
+ It("should extract reasoning from seed:think block", func() {
+ content := "Text Seed reasoning More"
+ reasoning, cleaned := ExtractReasoning(content)
+ Expect(reasoning).To(Equal("Seed reasoning"))
+ Expect(cleaned).To(Equal("Text More"))
+ })
+ })
+
+ Context("when content has <|think|> tags (Solar Open)", func() {
+ It("should extract reasoning from Solar Open think block", func() {
+ content := "Text <|think|>Solar reasoning<|end|><|begin|>assistant<|content|> More"
+ reasoning, cleaned := ExtractReasoning(content)
+ Expect(reasoning).To(Equal("Solar reasoning"))
+ Expect(cleaned).To(Equal("Text More"))
+ })
+ })
+
+ Context("when content has [THINK] tags (Magistral)", func() {
+ It("should extract reasoning from THINK block", func() {
+ content := "Text [THINK]Magistral reasoning[/THINK] More"
+ reasoning, cleaned := ExtractReasoning(content)
+ Expect(reasoning).To(Equal("Magistral reasoning"))
+ Expect(cleaned).To(Equal("Text More"))
+ })
+
+ It("should handle unclosed THINK block", func() {
+ content := "Before [THINK]Incomplete reasoning"
+ reasoning, cleaned := ExtractReasoning(content)
+ Expect(reasoning).To(Equal("Incomplete reasoning"))
+ Expect(cleaned).To(Equal("Before "))
+ })
+ })
+})
+
+var _ = Describe("DetectThinkingStartToken", func() {
+ Context("when prompt contains thinking start tokens", func() {
+ It("should detect <|START_THINKING|> at the end", func() {
+ prompt := "Some prompt text <|START_THINKING|>"
+ token := DetectThinkingStartToken(prompt)
+ Expect(token).To(Equal("<|START_THINKING|>"))
+ })
+
+ It("should detect at the end", func() {
+ prompt := "Prompt with "
+ token := DetectThinkingStartToken(prompt)
+ Expect(token).To(Equal(""))
+ })
+
+ It("should detect at the end", func() {
+ prompt := "Some text "
+ token := DetectThinkingStartToken(prompt)
+ Expect(token).To(Equal(""))
+ })
+
+ It("should detect <|inner_prefix|> at the end", func() {
+ prompt := "Prompt <|inner_prefix|>"
+ token := DetectThinkingStartToken(prompt)
+ Expect(token).To(Equal("<|inner_prefix|>"))
+ })
+
+ It("should detect at the end", func() {
+ prompt := "Text "
+ token := DetectThinkingStartToken(prompt)
+ Expect(token).To(Equal(""))
+ })
+
+ It("should detect <|think|> at the end", func() {
+ prompt := "Prompt <|think|>"
+ token := DetectThinkingStartToken(prompt)
+ Expect(token).To(Equal("<|think|>"))
+ })
+
+ It("should detect [THINK] at the end", func() {
+ prompt := "Text [THINK]"
+ token := DetectThinkingStartToken(prompt)
+ Expect(token).To(Equal("[THINK]"))
+ })
+
+ It("should handle trailing whitespace", func() {
+ prompt := "Prompt <|START_THINKING|> \n\t "
+ token := DetectThinkingStartToken(prompt)
+ Expect(token).To(Equal("<|START_THINKING|>"))
+ })
+
+ It("should detect token near the end (within last 100 chars)", func() {
+ prefix := strings.Repeat("x", 50)
+ prompt := prefix + "<|START_THINKING|>"
+ token := DetectThinkingStartToken(prompt)
+ Expect(token).To(Equal("<|START_THINKING|>"))
+ })
+
+ It("should detect token when followed by only whitespace", func() {
+ prompt := "Text \n "
+ token := DetectThinkingStartToken(prompt)
+ Expect(token).To(Equal(""))
+ })
+ })
+
+ Context("when prompt does not contain thinking tokens", func() {
+ It("should return empty string for regular prompt", func() {
+ prompt := "This is a regular prompt without thinking tokens"
+ token := DetectThinkingStartToken(prompt)
+ Expect(token).To(BeEmpty())
+ })
+
+ It("should return empty string for empty prompt", func() {
+ prompt := ""
+ token := DetectThinkingStartToken(prompt)
+ Expect(token).To(BeEmpty())
+ })
+
+ It("should detect token even when far from end (Contains check)", func() {
+ prefix := strings.Repeat("x", 150)
+ prompt := prefix + "<|START_THINKING|>"
+ token := DetectThinkingStartToken(prompt)
+ // Current implementation uses Contains, so it finds tokens anywhere
+ Expect(token).To(Equal("<|START_THINKING|>"))
+ })
+
+ It("should detect token even when followed by non-whitespace (Contains check)", func() {
+ prompt := "Text <|START_THINKING|>more text"
+ token := DetectThinkingStartToken(prompt)
+ // Current implementation uses Contains, so it finds tokens anywhere
+ Expect(token).To(Equal("<|START_THINKING|>"))
+ })
+ })
+
+ Context("when multiple tokens are present", func() {
+ It("should return the first matching token (most specific)", func() {
+ prompt := "Text <|START_THINKING|> "
+ token := DetectThinkingStartToken(prompt)
+ // Should return the first one found (order matters)
+ Expect(token).To(Equal("<|START_THINKING|>"))
+ })
+ })
+})
+
+var _ = Describe("PrependThinkingTokenIfNeeded", func() {
+ Context("when startToken is empty", func() {
+ It("should return content unchanged", func() {
+ content := "Some content"
+ result := PrependThinkingTokenIfNeeded(content, "")
+ Expect(result).To(Equal(content))
+ })
+ })
+
+ Context("when content already starts with token", func() {
+ It("should not prepend if content starts with token", func() {
+ content := "<|START_THINKING|>Reasoning content"
+ result := PrependThinkingTokenIfNeeded(content, "<|START_THINKING|>")
+ Expect(result).To(Equal(content))
+ })
+
+ It("should not prepend if content starts with token after whitespace", func() {
+ content := " Reasoning"
+ result := PrependThinkingTokenIfNeeded(content, "")
+ Expect(result).To(Equal(content))
+ })
+
+ It("should not prepend if token appears anywhere in content", func() {
+ content := "Some text Reasoning"
+ result := PrependThinkingTokenIfNeeded(content, "")
+ // With Contains check, it should not prepend
+ Expect(result).To(Equal(content))
+ })
+ })
+
+ Context("when content does not contain token", func() {
+ It("should prepend token to content", func() {
+ content := "Reasoning content"
+ result := PrependThinkingTokenIfNeeded(content, "<|START_THINKING|>")
+ Expect(result).To(Equal("<|START_THINKING|>Reasoning content"))
+ })
+
+ It("should prepend token after leading whitespace", func() {
+ content := " \n Reasoning content"
+ result := PrependThinkingTokenIfNeeded(content, "")
+ Expect(result).To(Equal(" \n Reasoning content"))
+ })
+
+ It("should handle empty content", func() {
+ content := ""
+ result := PrependThinkingTokenIfNeeded(content, "")
+ Expect(result).To(Equal(""))
+ })
+
+ It("should handle content with only whitespace", func() {
+ content := " \n\t "
+ result := PrependThinkingTokenIfNeeded(content, "<|START_THINKING|>")
+ Expect(result).To(Equal(" \n\t <|START_THINKING|>"))
+ })
+ })
+
+ Context("with different token types", func() {
+ It("should prepend <|START_THINKING|>", func() {
+ content := "Reasoning"
+ result := PrependThinkingTokenIfNeeded(content, "<|START_THINKING|>")
+ Expect(result).To(Equal("<|START_THINKING|>Reasoning"))
+ })
+
+ It("should prepend ", func() {
+ content := "Reasoning"
+ result := PrependThinkingTokenIfNeeded(content, "")
+ Expect(result).To(Equal("Reasoning"))
+ })
+
+ It("should prepend ", func() {
+ content := "Reasoning"
+ result := PrependThinkingTokenIfNeeded(content, "")
+ Expect(result).To(Equal("Reasoning"))
+ })
+
+ It("should prepend [THINK]", func() {
+ content := "Reasoning"
+ result := PrependThinkingTokenIfNeeded(content, "[THINK]")
+ Expect(result).To(Equal("[THINK]Reasoning"))
+ })
+ })
})
diff --git a/pkg/xsysinfo/gpu.go b/pkg/xsysinfo/gpu.go
index dcda6c4e6..e4d7b921d 100644
--- a/pkg/xsysinfo/gpu.go
+++ b/pkg/xsysinfo/gpu.go
@@ -569,7 +569,7 @@ func getIntelGPUTop() []GPUMemoryInfo {
cmd.Stderr = &stderr
if err := cmd.Run(); err != nil {
- xlog.Debug("intel_gpu_top failed", "error", err, "stderr", stderr.String())
+ xlog.Debug("intel_gpu_top failed", "error", err, "stderr", stderr.String(), "stdout", stdout.String())
return nil
}