diff --git a/core/config/gguf.go b/core/config/gguf.go index f63acd35f..507466d60 100644 --- a/core/config/gguf.go +++ b/core/config/gguf.go @@ -62,16 +62,23 @@ func guessGGUFFromFile(cfg *ModelConfig, f *gguf.GGUFFile, defaultCtx int) { cfg.NGPULayers = &defaultHigh } - xlog.Debug("guessDefaultsFromFile: NGPULayers set", "NGPULayers", cfg.NGPULayers) + xlog.Debug("[gguf] guessDefaultsFromFile: NGPULayers set", "NGPULayers", cfg.NGPULayers, "modelName", f.Metadata().Name) + + // identify from well known templates first, otherwise use the raw jinja template + chatTemplate, found := f.Header.MetadataKV.Get("tokenizer.chat_template") + if found { + // fill jinja template + cfg.modelTemplate = chatTemplate.ValueString() + } // template estimations if cfg.HasTemplate() { // nothing to guess here - xlog.Debug("guessDefaultsFromFile: template already set", "name", cfg.Name) + xlog.Debug("[gguf] guessDefaultsFromFile: template already set", "name", cfg.Name, "modelName", f.Metadata().Name) return } - xlog.Debug("Model file loaded", "file", cfg.ModelFileName(), "eosTokenID", f.Tokenizer().EOSTokenID, "bosTokenID", f.Tokenizer().BOSTokenID, "modelName", f.Metadata().Name, "architecture", f.Architecture().Architecture) + xlog.Debug("[gguf] Model file loaded", "file", cfg.ModelFileName(), "eosTokenID", f.Tokenizer().EOSTokenID, "bosTokenID", f.Tokenizer().BOSTokenID, "modelName", f.Metadata().Name, "architecture", f.Architecture().Architecture) // guess the name if cfg.Name == "" { @@ -83,4 +90,5 @@ func guessGGUFFromFile(cfg *ModelConfig, f *gguf.GGUFFile, defaultCtx int) { cfg.FunctionsConfig.GrammarConfig.NoGrammar = true cfg.Options = append(cfg.Options, "use_jinja:true") cfg.KnownUsecaseStrings = append(cfg.KnownUsecaseStrings, "FLAG_CHAT") + } diff --git a/core/config/model_config.go b/core/config/model_config.go index 9010c84e6..6841abf57 100644 --- a/core/config/model_config.go +++ b/core/config/model_config.go @@ -10,6 +10,7 @@ import ( "github.com/mudler/LocalAI/core/schema" "github.com/mudler/LocalAI/pkg/downloader" "github.com/mudler/LocalAI/pkg/functions" + "github.com/mudler/LocalAI/pkg/reasoning" "github.com/mudler/cogito" "gopkg.in/yaml.v3" ) @@ -30,6 +31,7 @@ type TTSConfig struct { // @Description ModelConfig represents a model configuration type ModelConfig struct { modelConfigFile string `yaml:"-" json:"-"` + modelTemplate string `yaml:"-" json:"-"` schema.PredictionOptions `yaml:"parameters,omitempty" json:"parameters,omitempty"` Name string `yaml:"name,omitempty" json:"name,omitempty"` @@ -51,6 +53,7 @@ type ModelConfig struct { ResponseFormatMap map[string]interface{} `yaml:"-" json:"-"` FunctionsConfig functions.FunctionsConfig `yaml:"function,omitempty" json:"function,omitempty"` + ReasoningConfig reasoning.Config `yaml:"reasoning,omitempty" json:"reasoning,omitempty"` FeatureFlag FeatureFlag `yaml:"feature_flags,omitempty" json:"feature_flags,omitempty"` // Feature Flag registry. We move fast, and features may break on a per model/backend basis. Registry for (usually temporary) flags that indicate aborting something early. // LLM configs (GPT4ALL, Llama.cpp, ...) @@ -521,6 +524,11 @@ func (c *ModelConfig) GetModelConfigFile() string { return c.modelConfigFile } +// GetModelTemplate returns the model's chat template if available +func (c *ModelConfig) GetModelTemplate() string { + return c.modelTemplate +} + type ModelConfigUsecase int const ( diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go index 4ece68d5c..2075a0368 100644 --- a/core/http/endpoints/openai/chat.go +++ b/core/http/endpoints/openai/chat.go @@ -13,6 +13,7 @@ import ( "github.com/mudler/LocalAI/core/http/middleware" "github.com/mudler/LocalAI/core/schema" "github.com/mudler/LocalAI/pkg/functions" + reason "github.com/mudler/LocalAI/pkg/reasoning" "github.com/mudler/LocalAI/core/templates" "github.com/mudler/LocalAI/pkg/model" @@ -38,6 +39,16 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator } responses <- initialMessage + // Detect if thinking token is already in prompt or template + // When UseTokenizerTemplate is enabled, predInput is empty, so we check the template + var template string + if config.TemplateConfig.UseTokenizerTemplate { + template = config.GetModelTemplate() + } else { + template = s + } + thinkingStartToken := reason.DetectThinkingStartToken(template) + // Track accumulated content for reasoning extraction accumulatedContent := "" lastEmittedReasoning := "" @@ -45,8 +56,12 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator _, _, err := ComputeChoices(req, s, config, cl, startupOptions, loader, func(s string, c *[]schema.Choice) {}, func(s string, tokenUsage backend.TokenUsage) bool { accumulatedContent += s - // Extract reasoning from accumulated content - currentReasoning, cleanedContent := functions.ExtractReasoning(accumulatedContent) + content := accumulatedContent + // Prepend thinking token if needed, then extract reasoning + if config.ReasoningConfig.DisableReasoningTagPrefill == nil || !*config.ReasoningConfig.DisableReasoningTagPrefill { + content = reason.PrependThinkingTokenIfNeeded(content, thinkingStartToken) + } + currentReasoning, cleanedContent := reason.ExtractReasoning(content) // Calculate new reasoning delta (what we haven't emitted yet) var reasoningDelta *string @@ -118,6 +133,15 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator return err } processTools := func(noAction string, prompt string, req *schema.OpenAIRequest, config *config.ModelConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse, extraUsage bool) error { + // Detect if thinking token is already in prompt or template + var template string + if config.TemplateConfig.UseTokenizerTemplate { + template = config.GetModelTemplate() + } else { + template = prompt + } + thinkingStartToken := reason.DetectThinkingStartToken(template) + result := "" lastEmittedCount := 0 _, tokenUsage, err := ComputeChoices(req, prompt, config, cl, startupOptions, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool { @@ -229,8 +253,12 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator if err != nil { return err } - // Extract reasoning before processing tool calls - reasoning, cleanedResult := functions.ExtractReasoning(result) + // Prepend thinking token if needed, then extract reasoning before processing tool calls + resultWithToken := result + if config.ReasoningConfig.DisableReasoningTagPrefill == nil || !*config.ReasoningConfig.DisableReasoningTagPrefill { + resultWithToken = reason.PrependThinkingTokenIfNeeded(result, thinkingStartToken) + } + reasoning, cleanedResult := reason.ExtractReasoning(resultWithToken) result = cleanedResult textContentToReturn = functions.ParseTextContent(result, config.FunctionsConfig) @@ -617,10 +645,24 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator // no streaming mode default: + // Detect if thinking token is already in prompt or template + var template string + if config.TemplateConfig.UseTokenizerTemplate { + template = config.GetModelTemplate() // TODO: this should be the parsed jinja template. But for now this is the best we can do. + } else { + template = predInput + } + thinkingStartToken := reason.DetectThinkingStartToken(template) + + xlog.Debug("Thinking start token", "thinkingStartToken", thinkingStartToken, "template", template) tokenCallback := func(s string, c *[]schema.Choice) { - // Extract reasoning from the response - reasoning, cleanedS := functions.ExtractReasoning(s) + // Prepend thinking token if needed, then extract reasoning from the response + sWithToken := s + if config.ReasoningConfig.DisableReasoningTagPrefill == nil || !*config.ReasoningConfig.DisableReasoningTagPrefill { + sWithToken = reason.PrependThinkingTokenIfNeeded(s, thinkingStartToken) + } + reasoning, cleanedS := reason.ExtractReasoning(sWithToken) s = cleanedS if !shouldUseFn { diff --git a/pkg/functions/reasoning.go b/pkg/functions/reasoning.go deleted file mode 100644 index d3cf05808..000000000 --- a/pkg/functions/reasoning.go +++ /dev/null @@ -1,114 +0,0 @@ -package functions - -import ( - "strings" -) - -// ExtractReasoning extracts reasoning content from thinking tags and returns -// both the extracted reasoning and the cleaned content (with tags removed). -// It handles ... and ... tags. -// Multiple reasoning blocks are concatenated with newlines. -func ExtractReasoning(content string) (reasoning string, cleanedContent string) { - if content == "" { - return "", content - } - - var reasoningParts []string - var cleanedParts []string - remaining := content - - // Define tag pairs to look for - tagPairs := []struct { - start string - end string - }{ - {"", ""}, - {"", ""}, - } - - // Track the last position we've processed - lastPos := 0 - - for { - // Find the earliest tag start - earliestStart := -1 - earliestEnd := -1 - isUnclosed := false - var matchedTag struct { - start string - end string - } - - for _, tagPair := range tagPairs { - startIdx := strings.Index(remaining[lastPos:], tagPair.start) - if startIdx == -1 { - continue - } - startIdx += lastPos - - // Find the corresponding end tag - endIdx := strings.Index(remaining[startIdx+len(tagPair.start):], tagPair.end) - if endIdx == -1 { - // Unclosed tag - extract what we have - if earliestStart == -1 || startIdx < earliestStart { - earliestStart = startIdx - earliestEnd = len(remaining) - isUnclosed = true - matchedTag = tagPair - } - continue - } - endIdx += startIdx + len(tagPair.start) - - // Found a complete tag pair - if earliestStart == -1 || startIdx < earliestStart { - earliestStart = startIdx - earliestEnd = endIdx + len(tagPair.end) - isUnclosed = false - matchedTag = tagPair - } - } - - if earliestStart == -1 { - // No more tags found, add remaining content - if lastPos < len(remaining) { - cleanedParts = append(cleanedParts, remaining[lastPos:]) - } - break - } - - // Add content before the tag - if earliestStart > lastPos { - cleanedParts = append(cleanedParts, remaining[lastPos:earliestStart]) - } - - // Extract reasoning content - reasoningStart := earliestStart + len(matchedTag.start) - // For unclosed tags, earliestEnd is already at the end of the string - // For closed tags, earliestEnd points to after the closing tag, so we subtract the end tag length - var reasoningEnd int - if isUnclosed { - // Unclosed tag - extract everything to the end - reasoningEnd = len(remaining) - } else { - // Closed tag - exclude the end tag - reasoningEnd = earliestEnd - len(matchedTag.end) - } - if reasoningEnd > reasoningStart { - reasoningContent := strings.TrimSpace(remaining[reasoningStart:reasoningEnd]) - if reasoningContent != "" { - reasoningParts = append(reasoningParts, reasoningContent) - } - } - - // Move past this tag - lastPos = earliestEnd - } - - // Combine reasoning parts - reasoning = strings.Join(reasoningParts, "\n\n") - // Combine cleaned content parts - cleanedContent = strings.Join(cleanedParts, "") - - return reasoning, cleanedContent -} diff --git a/pkg/model/watchdog.go b/pkg/model/watchdog.go index 95d540b45..090488a06 100644 --- a/pkg/model/watchdog.go +++ b/pkg/model/watchdog.go @@ -393,7 +393,7 @@ func (wd *WatchDog) checkMemory() { memoryType = "RAM" } - xlog.Debug("[WatchDog] Memory check", "type", memoryType, "usage_percent", aggregate.UsagePercent, "threshold_percent", thresholdPercent, "loaded_models", modelCount) + //xlog.Debug("[WatchDog] Memory check", "type", memoryType, "usage_percent", aggregate.UsagePercent, "threshold_percent", thresholdPercent, "loaded_models", modelCount) // Check if usage exceeds threshold if aggregate.UsagePercent > thresholdPercent { diff --git a/pkg/reasoning/config.go b/pkg/reasoning/config.go new file mode 100644 index 000000000..0fc23cc19 --- /dev/null +++ b/pkg/reasoning/config.go @@ -0,0 +1,5 @@ +package reasoning + +type Config struct { + DisableReasoningTagPrefill *bool `yaml:"disable_reasoning_tag_prefill,omitempty" json:"disable_reasoning_tag_prefill,omitempty"` +} diff --git a/pkg/reasoning/reasoning.go b/pkg/reasoning/reasoning.go new file mode 100644 index 000000000..6add81e75 --- /dev/null +++ b/pkg/reasoning/reasoning.go @@ -0,0 +1,208 @@ +package reasoning + +import ( + "strings" +) + +// DetectThinkingStartToken checks if the prompt or template contains a thinking start token +// and returns the detected token. This indicates that the model's prompt template +// already includes the thinking token, so the model output will start with reasoning +// content without an explicit opening tag. +// Returns the detected token if found, empty string otherwise. +// Common tokens checked (in order of specificity - longer first): +// Based on llama.cpp's chat-parser.cpp implementations: +// - <|START_THINKING|> (Command-R models) +// - <|inner_prefix|> (Apertus models) +// - (Seed models) +// - (DeepSeek, Granite, ExaOne models) +// - <|think|> (Solar Open models) +// - (General thinking tag) +// - (GLM models) +// - [THINK] (Magistral models) +func DetectThinkingStartToken(prompt string) string { + // Common thinking start tokens (in order of specificity - longer first) + // Based on llama.cpp's chat-parser.cpp implementations + thinkingStartTokens := []string{ + "<|START_THINKING|>", // Command-R models + "<|inner_prefix|>", // Apertus models + "", // Seed models + "", // DeepSeek, Granite, ExaOne models + "<|think|>", // Solar Open models + "", // General thinking tag + "[THINK]", // Magistral models + } + + // Check if prompt ends with any of these tokens (allowing for trailing whitespace/newlines) + trimmedPrompt := strings.TrimRight(prompt, " \t\n\r") + for _, token := range thinkingStartTokens { + if strings.Contains(trimmedPrompt, token) { + return token + } + } + + // Also check if any of these tokens appear near the end (within last 100 chars) + // This handles cases where there might be stop tokens or other content after + if len(trimmedPrompt) > 100 { + lastPart := trimmedPrompt[len(trimmedPrompt)-100:] + for _, token := range thinkingStartTokens { + if idx := strings.LastIndex(lastPart, token); idx != -1 { + // Check if this is the last meaningful content (only whitespace after) + afterToken := lastPart[idx+len(token):] + if strings.TrimSpace(afterToken) == "" { + return token + } + } + } + } + + return "" +} + +// PrependThinkingTokenIfNeeded prepends the thinking start token to content if it was +// detected in the prompt. This allows the standard extraction logic to work correctly +// for models where the thinking token is already in the prompt. +func PrependThinkingTokenIfNeeded(content string, startToken string) string { + if startToken == "" { + return content + } + + // Check if content already starts with the token (allowing for leading whitespace) + trimmed := strings.TrimLeftFunc(content, func(r rune) bool { + return r == ' ' || r == '\t' || r == '\n' || r == '\r' + }) + + // If content already starts with the token, don't prepend + if strings.Contains(trimmed, startToken) { + return content + } + + // Find where leading whitespace ends + whitespaceEnd := 0 + for whitespaceEnd < len(content) { + r := content[whitespaceEnd] + if r != ' ' && r != '\t' && r != '\n' && r != '\r' { + break + } + whitespaceEnd++ + } + + // Prepend the token after whitespace to make it look like normal tagged content + if whitespaceEnd > 0 { + return content[:whitespaceEnd] + startToken + content[whitespaceEnd:] + } + return startToken + content +} + +// ExtractReasoning extracts reasoning content from thinking tags and returns +// both the extracted reasoning and the cleaned content (with tags removed). +// It handles ... and ... tags. +// Multiple reasoning blocks are concatenated with newlines. +func ExtractReasoning(content string) (reasoning string, cleanedContent string) { + if content == "" { + return "", content + } + + var reasoningParts []string + var cleanedParts []string + remaining := content + + // Define tag pairs to look for (matching llama.cpp's chat-parser.cpp) + tagPairs := []struct { + start string + end string + }{ + {"<|START_THINKING|>", "<|END_THINKING|>"}, // Command-R models + {"<|inner_prefix|>", "<|inner_suffix|>"}, // Apertus models + {"", ""}, // Seed models + {"", ""}, // DeepSeek, Granite, ExaOne models + {"<|think|>", "<|end|><|begin|>assistant<|content|>"}, // Solar Open models (complex end) + {"", ""}, // General thinking tag + {"[THINK]", "[/THINK]"}, // Magistral models + } + + // Track the last position we've processed + lastPos := 0 + + for { + // Find the earliest tag start + earliestStart := -1 + earliestEnd := -1 + isUnclosed := false + var matchedTag struct { + start string + end string + } + + for _, tagPair := range tagPairs { + startIdx := strings.Index(remaining[lastPos:], tagPair.start) + if startIdx == -1 { + continue + } + startIdx += lastPos + + // Find the corresponding end tag + endIdx := strings.Index(remaining[startIdx+len(tagPair.start):], tagPair.end) + if endIdx == -1 { + // Unclosed tag - extract what we have + if earliestStart == -1 || startIdx < earliestStart { + earliestStart = startIdx + earliestEnd = len(remaining) + isUnclosed = true + matchedTag = tagPair + } + continue + } + endIdx += startIdx + len(tagPair.start) + + // Found a complete tag pair + if earliestStart == -1 || startIdx < earliestStart { + earliestStart = startIdx + earliestEnd = endIdx + len(tagPair.end) + isUnclosed = false + matchedTag = tagPair + } + } + + if earliestStart == -1 { + // No more tags found, add remaining content + if lastPos < len(remaining) { + cleanedParts = append(cleanedParts, remaining[lastPos:]) + } + break + } + + // Add content before the tag + if earliestStart > lastPos { + cleanedParts = append(cleanedParts, remaining[lastPos:earliestStart]) + } + + // Extract reasoning content + reasoningStart := earliestStart + len(matchedTag.start) + // For unclosed tags, earliestEnd is already at the end of the string + // For closed tags, earliestEnd points to after the closing tag, so we subtract the end tag length + var reasoningEnd int + if isUnclosed { + // Unclosed tag - extract everything to the end + reasoningEnd = len(remaining) + } else { + // Closed tag - exclude the end tag + reasoningEnd = earliestEnd - len(matchedTag.end) + } + if reasoningEnd > reasoningStart { + reasoningContent := strings.TrimSpace(remaining[reasoningStart:reasoningEnd]) + if reasoningContent != "" { + reasoningParts = append(reasoningParts, reasoningContent) + } + } + + // Move past this tag + lastPos = earliestEnd + } + + // Combine reasoning parts + reasoning = strings.Join(reasoningParts, "\n\n") + // Combine cleaned content parts + cleanedContent = strings.Join(cleanedParts, "") + + return reasoning, cleanedContent +} diff --git a/pkg/reasoning/reasoning_suite_test.go b/pkg/reasoning/reasoning_suite_test.go new file mode 100644 index 000000000..a1e03ab0a --- /dev/null +++ b/pkg/reasoning/reasoning_suite_test.go @@ -0,0 +1,13 @@ +package reasoning_test + +import ( + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +func TestReasoning(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "Reasoning test suite") +} diff --git a/pkg/functions/reasoning_test.go b/pkg/reasoning/reasoning_test.go similarity index 53% rename from pkg/functions/reasoning_test.go rename to pkg/reasoning/reasoning_test.go index 3f7d07541..f66eca55e 100644 --- a/pkg/functions/reasoning_test.go +++ b/pkg/reasoning/reasoning_test.go @@ -1,9 +1,9 @@ -package functions_test +package reasoning_test import ( "strings" - . "github.com/mudler/LocalAI/pkg/functions" + . "github.com/mudler/LocalAI/pkg/reasoning" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" ) @@ -258,4 +258,249 @@ var _ = Describe("ExtractReasoning", func() { Expect(cleaned).To(Equal("Text More")) }) }) + + Context("when content has <|START_THINKING|> tags (Command-R)", func() { + It("should extract reasoning from START_THINKING block", func() { + content := "Text <|START_THINKING|>Command-R reasoning<|END_THINKING|> More" + reasoning, cleaned := ExtractReasoning(content) + Expect(reasoning).To(Equal("Command-R reasoning")) + Expect(cleaned).To(Equal("Text More")) + }) + + It("should handle unclosed START_THINKING block", func() { + content := "Before <|START_THINKING|>Incomplete reasoning" + reasoning, cleaned := ExtractReasoning(content) + Expect(reasoning).To(Equal("Incomplete reasoning")) + Expect(cleaned).To(Equal("Before ")) + }) + }) + + Context("when content has <|inner_prefix|> tags (Apertus)", func() { + It("should extract reasoning from inner_prefix block", func() { + content := "Text <|inner_prefix|>Apertus reasoning<|inner_suffix|> More" + reasoning, cleaned := ExtractReasoning(content) + Expect(reasoning).To(Equal("Apertus reasoning")) + Expect(cleaned).To(Equal("Text More")) + }) + }) + + Context("when content has tags (Seed)", func() { + It("should extract reasoning from seed:think block", func() { + content := "Text Seed reasoning More" + reasoning, cleaned := ExtractReasoning(content) + Expect(reasoning).To(Equal("Seed reasoning")) + Expect(cleaned).To(Equal("Text More")) + }) + }) + + Context("when content has <|think|> tags (Solar Open)", func() { + It("should extract reasoning from Solar Open think block", func() { + content := "Text <|think|>Solar reasoning<|end|><|begin|>assistant<|content|> More" + reasoning, cleaned := ExtractReasoning(content) + Expect(reasoning).To(Equal("Solar reasoning")) + Expect(cleaned).To(Equal("Text More")) + }) + }) + + Context("when content has [THINK] tags (Magistral)", func() { + It("should extract reasoning from THINK block", func() { + content := "Text [THINK]Magistral reasoning[/THINK] More" + reasoning, cleaned := ExtractReasoning(content) + Expect(reasoning).To(Equal("Magistral reasoning")) + Expect(cleaned).To(Equal("Text More")) + }) + + It("should handle unclosed THINK block", func() { + content := "Before [THINK]Incomplete reasoning" + reasoning, cleaned := ExtractReasoning(content) + Expect(reasoning).To(Equal("Incomplete reasoning")) + Expect(cleaned).To(Equal("Before ")) + }) + }) +}) + +var _ = Describe("DetectThinkingStartToken", func() { + Context("when prompt contains thinking start tokens", func() { + It("should detect <|START_THINKING|> at the end", func() { + prompt := "Some prompt text <|START_THINKING|>" + token := DetectThinkingStartToken(prompt) + Expect(token).To(Equal("<|START_THINKING|>")) + }) + + It("should detect at the end", func() { + prompt := "Prompt with " + token := DetectThinkingStartToken(prompt) + Expect(token).To(Equal("")) + }) + + It("should detect at the end", func() { + prompt := "Some text " + token := DetectThinkingStartToken(prompt) + Expect(token).To(Equal("")) + }) + + It("should detect <|inner_prefix|> at the end", func() { + prompt := "Prompt <|inner_prefix|>" + token := DetectThinkingStartToken(prompt) + Expect(token).To(Equal("<|inner_prefix|>")) + }) + + It("should detect at the end", func() { + prompt := "Text " + token := DetectThinkingStartToken(prompt) + Expect(token).To(Equal("")) + }) + + It("should detect <|think|> at the end", func() { + prompt := "Prompt <|think|>" + token := DetectThinkingStartToken(prompt) + Expect(token).To(Equal("<|think|>")) + }) + + It("should detect [THINK] at the end", func() { + prompt := "Text [THINK]" + token := DetectThinkingStartToken(prompt) + Expect(token).To(Equal("[THINK]")) + }) + + It("should handle trailing whitespace", func() { + prompt := "Prompt <|START_THINKING|> \n\t " + token := DetectThinkingStartToken(prompt) + Expect(token).To(Equal("<|START_THINKING|>")) + }) + + It("should detect token near the end (within last 100 chars)", func() { + prefix := strings.Repeat("x", 50) + prompt := prefix + "<|START_THINKING|>" + token := DetectThinkingStartToken(prompt) + Expect(token).To(Equal("<|START_THINKING|>")) + }) + + It("should detect token when followed by only whitespace", func() { + prompt := "Text \n " + token := DetectThinkingStartToken(prompt) + Expect(token).To(Equal("")) + }) + }) + + Context("when prompt does not contain thinking tokens", func() { + It("should return empty string for regular prompt", func() { + prompt := "This is a regular prompt without thinking tokens" + token := DetectThinkingStartToken(prompt) + Expect(token).To(BeEmpty()) + }) + + It("should return empty string for empty prompt", func() { + prompt := "" + token := DetectThinkingStartToken(prompt) + Expect(token).To(BeEmpty()) + }) + + It("should detect token even when far from end (Contains check)", func() { + prefix := strings.Repeat("x", 150) + prompt := prefix + "<|START_THINKING|>" + token := DetectThinkingStartToken(prompt) + // Current implementation uses Contains, so it finds tokens anywhere + Expect(token).To(Equal("<|START_THINKING|>")) + }) + + It("should detect token even when followed by non-whitespace (Contains check)", func() { + prompt := "Text <|START_THINKING|>more text" + token := DetectThinkingStartToken(prompt) + // Current implementation uses Contains, so it finds tokens anywhere + Expect(token).To(Equal("<|START_THINKING|>")) + }) + }) + + Context("when multiple tokens are present", func() { + It("should return the first matching token (most specific)", func() { + prompt := "Text <|START_THINKING|> " + token := DetectThinkingStartToken(prompt) + // Should return the first one found (order matters) + Expect(token).To(Equal("<|START_THINKING|>")) + }) + }) +}) + +var _ = Describe("PrependThinkingTokenIfNeeded", func() { + Context("when startToken is empty", func() { + It("should return content unchanged", func() { + content := "Some content" + result := PrependThinkingTokenIfNeeded(content, "") + Expect(result).To(Equal(content)) + }) + }) + + Context("when content already starts with token", func() { + It("should not prepend if content starts with token", func() { + content := "<|START_THINKING|>Reasoning content" + result := PrependThinkingTokenIfNeeded(content, "<|START_THINKING|>") + Expect(result).To(Equal(content)) + }) + + It("should not prepend if content starts with token after whitespace", func() { + content := " Reasoning" + result := PrependThinkingTokenIfNeeded(content, "") + Expect(result).To(Equal(content)) + }) + + It("should not prepend if token appears anywhere in content", func() { + content := "Some text Reasoning" + result := PrependThinkingTokenIfNeeded(content, "") + // With Contains check, it should not prepend + Expect(result).To(Equal(content)) + }) + }) + + Context("when content does not contain token", func() { + It("should prepend token to content", func() { + content := "Reasoning content" + result := PrependThinkingTokenIfNeeded(content, "<|START_THINKING|>") + Expect(result).To(Equal("<|START_THINKING|>Reasoning content")) + }) + + It("should prepend token after leading whitespace", func() { + content := " \n Reasoning content" + result := PrependThinkingTokenIfNeeded(content, "") + Expect(result).To(Equal(" \n Reasoning content")) + }) + + It("should handle empty content", func() { + content := "" + result := PrependThinkingTokenIfNeeded(content, "") + Expect(result).To(Equal("")) + }) + + It("should handle content with only whitespace", func() { + content := " \n\t " + result := PrependThinkingTokenIfNeeded(content, "<|START_THINKING|>") + Expect(result).To(Equal(" \n\t <|START_THINKING|>")) + }) + }) + + Context("with different token types", func() { + It("should prepend <|START_THINKING|>", func() { + content := "Reasoning" + result := PrependThinkingTokenIfNeeded(content, "<|START_THINKING|>") + Expect(result).To(Equal("<|START_THINKING|>Reasoning")) + }) + + It("should prepend ", func() { + content := "Reasoning" + result := PrependThinkingTokenIfNeeded(content, "") + Expect(result).To(Equal("Reasoning")) + }) + + It("should prepend ", func() { + content := "Reasoning" + result := PrependThinkingTokenIfNeeded(content, "") + Expect(result).To(Equal("Reasoning")) + }) + + It("should prepend [THINK]", func() { + content := "Reasoning" + result := PrependThinkingTokenIfNeeded(content, "[THINK]") + Expect(result).To(Equal("[THINK]Reasoning")) + }) + }) }) diff --git a/pkg/xsysinfo/gpu.go b/pkg/xsysinfo/gpu.go index dcda6c4e6..e4d7b921d 100644 --- a/pkg/xsysinfo/gpu.go +++ b/pkg/xsysinfo/gpu.go @@ -569,7 +569,7 @@ func getIntelGPUTop() []GPUMemoryInfo { cmd.Stderr = &stderr if err := cmd.Run(); err != nil { - xlog.Debug("intel_gpu_top failed", "error", err, "stderr", stderr.String()) + xlog.Debug("intel_gpu_top failed", "error", err, "stderr", stderr.String(), "stdout", stdout.String()) return nil }