mirror of
https://github.com/mudler/LocalAI.git
synced 2026-01-29 00:32:44 -05:00
feat(openresponses): Support reasoning blocks (#8133)
* feat(openresponses): support reasoning blocks Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * allow to disable reasoning, refactor common logic Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Add option to only strip reasoning Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Add configurations for custom reasoning tokens Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
committed by
GitHub
parent
34e054f607
commit
c491c6ca90
@@ -1,5 +1,15 @@
|
||||
package reasoning
|
||||
|
||||
type Config struct {
|
||||
DisableReasoningTagPrefill *bool `yaml:"disable_reasoning_tag_prefill,omitempty" json:"disable_reasoning_tag_prefill,omitempty"`
|
||||
// TagPair represents a start/end tag pair for reasoning extraction
|
||||
type TagPair struct {
|
||||
Start string `yaml:"start" json:"start"`
|
||||
End string `yaml:"end" json:"end"`
|
||||
}
|
||||
|
||||
type Config struct {
|
||||
DisableReasoningTagPrefill *bool `yaml:"disable_reasoning_tag_prefill,omitempty" json:"disable_reasoning_tag_prefill,omitempty"`
|
||||
DisableReasoning *bool `yaml:"disable,omitempty" json:"disable,omitempty"`
|
||||
StripReasoningOnly *bool `yaml:"strip_reasoning_only,omitempty" json:"strip_reasoning_only,omitempty"`
|
||||
ThinkingStartTokens []string `yaml:"thinking_start_tokens,omitempty" json:"thinking_start_tokens,omitempty"`
|
||||
TagPairs []TagPair `yaml:"tag_pairs,omitempty" json:"tag_pairs,omitempty"`
|
||||
}
|
||||
|
||||
@@ -17,12 +17,12 @@ import (
|
||||
// - <think> (DeepSeek, Granite, ExaOne models)
|
||||
// - <|think|> (Solar Open models)
|
||||
// - <thinking> (General thinking tag)
|
||||
// - <think> (GLM models)
|
||||
// - [THINK] (Magistral models)
|
||||
func DetectThinkingStartToken(prompt string) string {
|
||||
// Custom tokens from config are checked first, then default tokens.
|
||||
func DetectThinkingStartToken(prompt string, config *Config) string {
|
||||
// Common thinking start tokens (in order of specificity - longer first)
|
||||
// Based on llama.cpp's chat-parser.cpp implementations
|
||||
thinkingStartTokens := []string{
|
||||
defaultTokens := []string{
|
||||
"<|START_THINKING|>", // Command-R models
|
||||
"<|inner_prefix|>", // Apertus models
|
||||
"<seed:think>", // Seed models
|
||||
@@ -32,6 +32,13 @@ func DetectThinkingStartToken(prompt string) string {
|
||||
"[THINK]", // Magistral models
|
||||
}
|
||||
|
||||
// Merge custom tokens with default tokens (custom tokens first for priority)
|
||||
var thinkingStartTokens []string
|
||||
if config != nil && len(config.ThinkingStartTokens) > 0 {
|
||||
thinkingStartTokens = append(thinkingStartTokens, config.ThinkingStartTokens...)
|
||||
}
|
||||
thinkingStartTokens = append(thinkingStartTokens, defaultTokens...)
|
||||
|
||||
// Check if prompt ends with any of these tokens (allowing for trailing whitespace/newlines)
|
||||
trimmedPrompt := strings.TrimRight(prompt, " \t\n\r")
|
||||
for _, token := range thinkingStartTokens {
|
||||
@@ -58,6 +65,28 @@ func DetectThinkingStartToken(prompt string) string {
|
||||
return ""
|
||||
}
|
||||
|
||||
// ExtractReasoningWithConfig extracts reasoning from content with the given config.
|
||||
// If reasoning is disabled, it returns the original content.
|
||||
// If thinking start token prefill is enabled, it prepends the thinking start token to the content.
|
||||
// It returns the extracted reasoning and the cleaned content.
|
||||
func ExtractReasoningWithConfig(content, thinkingStartToken string, config Config) (reasoning string, cleanedContent string) {
|
||||
cleanedContent = content
|
||||
// If reasoning is not disabled, prepend the thinking start token if needed and extract reasoning
|
||||
if config.DisableReasoning == nil || !*config.DisableReasoning {
|
||||
// If thinking start token prefill is not disabled, prepend the thinking start token
|
||||
if config.DisableReasoningTagPrefill == nil || !*config.DisableReasoningTagPrefill {
|
||||
cleanedContent = PrependThinkingTokenIfNeeded(cleanedContent, thinkingStartToken)
|
||||
}
|
||||
// Extract reasoning from the cleaned content
|
||||
reasoning, cleanedContent = ExtractReasoning(cleanedContent, &config)
|
||||
if config.StripReasoningOnly != nil && *config.StripReasoningOnly {
|
||||
reasoning = ""
|
||||
}
|
||||
}
|
||||
|
||||
return reasoning, cleanedContent
|
||||
}
|
||||
|
||||
// PrependThinkingTokenIfNeeded prepends the thinking start token to content if it was
|
||||
// detected in the prompt. This allows the standard extraction logic to work correctly
|
||||
// for models where the thinking token is already in the prompt.
|
||||
@@ -97,7 +126,8 @@ func PrependThinkingTokenIfNeeded(content string, startToken string) string {
|
||||
// both the extracted reasoning and the cleaned content (with tags removed).
|
||||
// It handles <thinking>...</thinking> and <think>...</think> tags.
|
||||
// Multiple reasoning blocks are concatenated with newlines.
|
||||
func ExtractReasoning(content string) (reasoning string, cleanedContent string) {
|
||||
// Custom tag pairs from config are checked first, then default tag pairs.
|
||||
func ExtractReasoning(content string, config *Config) (reasoning string, cleanedContent string) {
|
||||
if content == "" {
|
||||
return "", content
|
||||
}
|
||||
@@ -106,8 +136,8 @@ func ExtractReasoning(content string) (reasoning string, cleanedContent string)
|
||||
var cleanedParts []string
|
||||
remaining := content
|
||||
|
||||
// Define tag pairs to look for (matching llama.cpp's chat-parser.cpp)
|
||||
tagPairs := []struct {
|
||||
// Define default tag pairs to look for (matching llama.cpp's chat-parser.cpp)
|
||||
defaultTagPairs := []struct {
|
||||
start string
|
||||
end string
|
||||
}{
|
||||
@@ -120,6 +150,26 @@ func ExtractReasoning(content string) (reasoning string, cleanedContent string)
|
||||
{"[THINK]", "[/THINK]"}, // Magistral models
|
||||
}
|
||||
|
||||
// Merge custom tag pairs with default tag pairs (custom pairs first for priority)
|
||||
var tagPairs []struct {
|
||||
start string
|
||||
end string
|
||||
}
|
||||
if config != nil && len(config.TagPairs) > 0 {
|
||||
for _, pair := range config.TagPairs {
|
||||
if pair.Start != "" && pair.End != "" {
|
||||
tagPairs = append(tagPairs, struct {
|
||||
start string
|
||||
end string
|
||||
}{pair.Start, pair.End})
|
||||
}
|
||||
}
|
||||
}
|
||||
// Add default tag pairs
|
||||
for _, pair := range defaultTagPairs {
|
||||
tagPairs = append(tagPairs, pair)
|
||||
}
|
||||
|
||||
// Track the last position we've processed
|
||||
lastPos := 0
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user