mirror of
https://github.com/mudler/LocalAI.git
synced 2026-04-18 13:58:07 -04:00
fix(reasoning): accumulate and strip reasoning tags from autoparser results (#9227)
fix(reasoning): acccumulate and strip reasoning tags from autoparser results Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
committed by
GitHub
parent
6f304d1201
commit
6d9d77d590
@@ -86,7 +86,11 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
|
||||
|
||||
// Prefer pre-parsed chat deltas from C++ autoparser when available
|
||||
if tokenUsage.HasChatDeltaContent() {
|
||||
reasoningDelta, contentDelta = tokenUsage.ChatDeltaReasoningAndContent()
|
||||
rawReasoning, cd := tokenUsage.ChatDeltaReasoningAndContent()
|
||||
contentDelta = cd
|
||||
// Strip reasoning tags (e.g. <|channel>thought / <channel|>) that
|
||||
// the C++ autoparser includes as part of reasoning content.
|
||||
reasoningDelta = extractor.ProcessChatDeltaReasoning(rawReasoning)
|
||||
// Keep extractor state consistent for fallback
|
||||
extractor.ProcessToken(s)
|
||||
} else {
|
||||
@@ -149,7 +153,11 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
|
||||
|
||||
// Prefer pre-parsed chat deltas from C++ autoparser when available
|
||||
if usage.HasChatDeltaContent() {
|
||||
reasoningDelta, contentDelta = usage.ChatDeltaReasoningAndContent()
|
||||
rawReasoning, cd := usage.ChatDeltaReasoningAndContent()
|
||||
contentDelta = cd
|
||||
// Strip reasoning tags (e.g. <|channel>thought / <channel|>) that
|
||||
// the C++ autoparser includes as part of reasoning content.
|
||||
reasoningDelta = extractor.ProcessChatDeltaReasoning(rawReasoning)
|
||||
// Keep extractor state consistent for fallback
|
||||
extractor.ProcessToken(s)
|
||||
} else {
|
||||
|
||||
@@ -1823,7 +1823,9 @@ func handleOpenResponsesStream(c echo.Context, responseID string, createdAt int6
|
||||
var reasoningDelta, contentDelta string
|
||||
// Prefer pre-parsed chat deltas from C++ autoparser when available
|
||||
if tokenUsage.HasChatDeltaContent() {
|
||||
reasoningDelta, contentDelta = tokenUsage.ChatDeltaReasoningAndContent()
|
||||
rawReasoning, cd := tokenUsage.ChatDeltaReasoningAndContent()
|
||||
contentDelta = cd
|
||||
reasoningDelta = extractor.ProcessChatDeltaReasoning(rawReasoning)
|
||||
extractor.ProcessToken(token) // keep state consistent
|
||||
} else {
|
||||
reasoningDelta, contentDelta = extractor.ProcessToken(token)
|
||||
@@ -2350,7 +2352,9 @@ func handleOpenResponsesStream(c echo.Context, responseID string, createdAt int6
|
||||
var reasoningDelta, contentDelta string
|
||||
// Prefer pre-parsed chat deltas from C++ autoparser when available
|
||||
if tokenUsage.HasChatDeltaContent() {
|
||||
reasoningDelta, contentDelta = tokenUsage.ChatDeltaReasoningAndContent()
|
||||
rawReasoning, cd := tokenUsage.ChatDeltaReasoningAndContent()
|
||||
contentDelta = cd
|
||||
reasoningDelta = extractor.ProcessChatDeltaReasoning(rawReasoning)
|
||||
extractor.ProcessToken(token) // keep state consistent
|
||||
} else {
|
||||
reasoningDelta, contentDelta = extractor.ProcessToken(token)
|
||||
|
||||
@@ -21,6 +21,12 @@ type ReasoningExtractor struct {
|
||||
lastReasoning string
|
||||
lastCleaned string
|
||||
suppressReasoning bool
|
||||
|
||||
// ChatDelta reasoning accumulator — used by ProcessChatDeltaReasoning
|
||||
// to strip reasoning tags (e.g. <|channel>thought, <channel|>) that
|
||||
// the C++ autoparser includes in reasoning_content deltas.
|
||||
cdReasoningAccum string
|
||||
cdLastStrippedReasoning string
|
||||
}
|
||||
|
||||
// NewReasoningExtractor creates a new extractor for the given thinking token and config.
|
||||
@@ -64,6 +70,61 @@ func (e *ReasoningExtractor) ProcessToken(token string) (reasoningDelta, content
|
||||
return reasoningDelta, contentDelta
|
||||
}
|
||||
|
||||
// ProcessChatDeltaReasoning accumulates raw reasoning text from C++ autoparser
|
||||
// ChatDeltas, strips any embedded reasoning tags (e.g. <|channel>thought …
|
||||
// <channel|> for Gemma 4), and returns only the new stripped delta.
|
||||
// This prevents tag tokens from leaking into the reasoning field of SSE chunks.
|
||||
//
|
||||
// When the C++ autoparser already strips tags (e.g. <think> models), the text
|
||||
// passes through unchanged — ExtractReasoning finds no tags so we use the raw text.
|
||||
func (e *ReasoningExtractor) ProcessChatDeltaReasoning(rawDelta string) string {
|
||||
if rawDelta == "" {
|
||||
return ""
|
||||
}
|
||||
e.cdReasoningAccum += rawDelta
|
||||
|
||||
// Try to strip reasoning tags from accumulated ChatDelta reasoning.
|
||||
stripped, cleaned := ExtractReasoning(e.cdReasoningAccum, &e.config)
|
||||
|
||||
if stripped == "" {
|
||||
// ExtractReasoning found no reasoning content. This happens when:
|
||||
// a) A complete start tag was found but has no content after it yet
|
||||
// (cleaned == "" because everything is inside the unclosed tag)
|
||||
// → keep buffering
|
||||
// b) We're accumulating a partial multi-token start tag
|
||||
// (e.g. "<|channel>" before "thought" arrives)
|
||||
// → keep buffering
|
||||
// c) No tags at all — C++ already stripped them
|
||||
// → pass through the raw text as-is
|
||||
if cleaned == "" && strings.TrimSpace(e.cdReasoningAccum) != "" {
|
||||
// Case (a): tag found, unclosed, no content yet
|
||||
stripped = ""
|
||||
} else if e.thinkingStartToken != "" &&
|
||||
len(strings.TrimSpace(e.cdReasoningAccum)) < len(e.thinkingStartToken) &&
|
||||
strings.HasPrefix(e.thinkingStartToken, strings.TrimSpace(e.cdReasoningAccum)) {
|
||||
// Case (b): partial start tag prefix
|
||||
stripped = ""
|
||||
} else {
|
||||
// Case (c): no tags found — text is already clean from C++
|
||||
stripped = e.cdReasoningAccum
|
||||
}
|
||||
}
|
||||
|
||||
// Compute delta from stripped reasoning
|
||||
var delta string
|
||||
if len(stripped) > len(e.cdLastStrippedReasoning) && strings.HasPrefix(stripped, e.cdLastStrippedReasoning) {
|
||||
delta = stripped[len(e.cdLastStrippedReasoning):]
|
||||
} else if stripped != e.cdLastStrippedReasoning && stripped != "" {
|
||||
delta = stripped
|
||||
}
|
||||
e.cdLastStrippedReasoning = stripped
|
||||
|
||||
if e.suppressReasoning {
|
||||
return ""
|
||||
}
|
||||
return delta
|
||||
}
|
||||
|
||||
// Reasoning returns the total accumulated reasoning after streaming.
|
||||
func (e *ReasoningExtractor) Reasoning() string {
|
||||
return e.lastReasoning
|
||||
@@ -84,6 +145,8 @@ func (e *ReasoningExtractor) Reset() {
|
||||
e.accumulated = ""
|
||||
e.lastReasoning = ""
|
||||
e.lastCleaned = ""
|
||||
e.cdReasoningAccum = ""
|
||||
e.cdLastStrippedReasoning = ""
|
||||
}
|
||||
|
||||
// ResetAndSuppressReasoning clears state and suppresses future reasoning deltas.
|
||||
@@ -95,6 +158,8 @@ func (e *ReasoningExtractor) ResetAndSuppressReasoning() {
|
||||
e.accumulated = ""
|
||||
e.lastReasoning = ""
|
||||
e.lastCleaned = ""
|
||||
e.cdReasoningAccum = ""
|
||||
e.cdLastStrippedReasoning = ""
|
||||
e.suppressReasoning = true
|
||||
}
|
||||
|
||||
|
||||
@@ -195,4 +195,91 @@ var _ = Describe("ReasoningExtractor", func() {
|
||||
Expect(ext.CleanedContent()).To(Equal("visible content"))
|
||||
})
|
||||
})
|
||||
|
||||
Context("ProcessChatDeltaReasoning with Gemma 4 tags", func() {
|
||||
It("should strip <|channel>thought and <channel|> tags from streaming deltas", func() {
|
||||
ext := NewReasoningExtractor("<|channel>thought", Config{})
|
||||
|
||||
// Simulate C++ autoparser sending tag tokens as reasoning
|
||||
d1 := ext.ProcessChatDeltaReasoning("<|channel>")
|
||||
Expect(d1).To(BeEmpty(), "start tag prefix should be buffered, not emitted")
|
||||
|
||||
d2 := ext.ProcessChatDeltaReasoning("thought")
|
||||
Expect(d2).To(BeEmpty(), "start tag suffix should be buffered, not emitted")
|
||||
|
||||
d3 := ext.ProcessChatDeltaReasoning("\n")
|
||||
Expect(d3).To(BeEmpty(), "newline after start tag should not emit yet")
|
||||
|
||||
d4 := ext.ProcessChatDeltaReasoning("The")
|
||||
Expect(d4).To(Equal("The"))
|
||||
|
||||
d5 := ext.ProcessChatDeltaReasoning(" user")
|
||||
Expect(d5).To(Equal(" user"))
|
||||
|
||||
d6 := ext.ProcessChatDeltaReasoning(" asks")
|
||||
Expect(d6).To(Equal(" asks"))
|
||||
|
||||
// Trailing newline gets TrimSpaced by ExtractReasoning,
|
||||
// so it appears delayed with the next non-whitespace token
|
||||
d7 := ext.ProcessChatDeltaReasoning("\n")
|
||||
Expect(d7).To(BeEmpty(), "trailing newline is buffered by TrimSpace")
|
||||
|
||||
d8 := ext.ProcessChatDeltaReasoning("2+2=4")
|
||||
Expect(d8).To(Equal("\n2+2=4"), "delayed newline emitted with next content")
|
||||
|
||||
d9 := ext.ProcessChatDeltaReasoning("<channel|>")
|
||||
Expect(d9).To(BeEmpty(), "close tag should be consumed, not emitted")
|
||||
})
|
||||
|
||||
It("should handle empty deltas", func() {
|
||||
ext := NewReasoningExtractor("<|channel>thought", Config{})
|
||||
d := ext.ProcessChatDeltaReasoning("")
|
||||
Expect(d).To(BeEmpty())
|
||||
})
|
||||
|
||||
It("should pass through reasoning without tags unchanged", func() {
|
||||
ext := NewReasoningExtractor("<think>", Config{})
|
||||
|
||||
// When C++ autoparser already strips tags (e.g. <think> models),
|
||||
// reasoning arrives clean — just pass it through.
|
||||
d1 := ext.ProcessChatDeltaReasoning("I need to")
|
||||
Expect(d1).To(Equal("I need to"))
|
||||
|
||||
d2 := ext.ProcessChatDeltaReasoning(" think carefully")
|
||||
Expect(d2).To(Equal(" think carefully"))
|
||||
})
|
||||
|
||||
It("should strip <think> tags if C++ autoparser includes them", func() {
|
||||
ext := NewReasoningExtractor("<think>", Config{})
|
||||
|
||||
d1 := ext.ProcessChatDeltaReasoning("<think>")
|
||||
Expect(d1).To(BeEmpty())
|
||||
|
||||
d2 := ext.ProcessChatDeltaReasoning("reasoning")
|
||||
Expect(d2).To(Equal("reasoning"))
|
||||
|
||||
d3 := ext.ProcessChatDeltaReasoning("</think>")
|
||||
Expect(d3).To(BeEmpty())
|
||||
})
|
||||
|
||||
It("should respect suppressReasoning", func() {
|
||||
ext := NewReasoningExtractor("<|channel>thought", Config{})
|
||||
ext.ResetAndSuppressReasoning()
|
||||
|
||||
d := ext.ProcessChatDeltaReasoning("some reasoning")
|
||||
Expect(d).To(BeEmpty())
|
||||
})
|
||||
|
||||
It("should reset ChatDelta state on Reset", func() {
|
||||
ext := NewReasoningExtractor("<|channel>thought", Config{})
|
||||
|
||||
ext.ProcessChatDeltaReasoning("<|channel>thought")
|
||||
ext.ProcessChatDeltaReasoning("\nfirst reasoning")
|
||||
ext.Reset()
|
||||
|
||||
// After reset, should start fresh
|
||||
d := ext.ProcessChatDeltaReasoning("clean reasoning")
|
||||
Expect(d).To(Equal("clean reasoning"))
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
Reference in New Issue
Block a user