package reasoning import "strings" // ReasoningExtractor tracks streaming reasoning extraction state, computing // incremental deltas so callers don't need to duplicate the ~30-line // accumulated-content / last-emitted tracking logic. // // Usage: // // extractor := NewReasoningExtractor(thinkingStartToken, cfg) // // In your streaming token callback: // reasoningDelta, contentDelta := extractor.ProcessToken(token) // // After streaming completes: // finalReasoning := extractor.Reasoning() // finalContent := extractor.CleanedContent() type ReasoningExtractor struct { thinkingStartToken string config Config accumulated string lastReasoning string lastCleaned string suppressReasoning bool // ChatDelta reasoning accumulator — used by ProcessChatDeltaReasoning // to strip reasoning tags (e.g. <|channel>thought, ) that // the C++ autoparser includes in reasoning_content deltas. cdReasoningAccum string cdLastStrippedReasoning string } // NewReasoningExtractor creates a new extractor for the given thinking token and config. func NewReasoningExtractor(thinkingStartToken string, cfg Config) *ReasoningExtractor { return &ReasoningExtractor{ thinkingStartToken: thinkingStartToken, config: cfg, } } // ProcessToken processes a new streaming token and returns the reasoning // and content deltas (the new portions not yet emitted). func (e *ReasoningExtractor) ProcessToken(token string) (reasoningDelta, contentDelta string) { e.accumulated += token currentReasoning, cleanedContent := ExtractReasoningWithConfig(e.accumulated, e.thinkingStartToken, e.config) // Calculate reasoning delta if currentReasoning != e.lastReasoning { if len(currentReasoning) > len(e.lastReasoning) && strings.HasPrefix(currentReasoning, e.lastReasoning) { reasoningDelta = currentReasoning[len(e.lastReasoning):] } else if currentReasoning != "" { // Reasoning changed in a non-append way, emit the full current reasoning reasoningDelta = currentReasoning } e.lastReasoning = currentReasoning } // Calculate content delta if len(cleanedContent) > len(e.lastCleaned) && strings.HasPrefix(cleanedContent, e.lastCleaned) { contentDelta = cleanedContent[len(e.lastCleaned):] e.lastCleaned = cleanedContent } else if cleanedContent != e.lastCleaned { contentDelta = cleanedContent e.lastCleaned = cleanedContent } if e.suppressReasoning { reasoningDelta = "" } return reasoningDelta, contentDelta } // ProcessChatDeltaReasoning accumulates raw reasoning text from C++ autoparser // ChatDeltas, strips any embedded reasoning tags (e.g. <|channel>thought … // for Gemma 4), and returns only the new stripped delta. // This prevents tag tokens from leaking into the reasoning field of SSE chunks. // // When the C++ autoparser already strips tags (e.g. models), the text // passes through unchanged — ExtractReasoning finds no tags so we use the raw text. func (e *ReasoningExtractor) ProcessChatDeltaReasoning(rawDelta string) string { if rawDelta == "" { return "" } e.cdReasoningAccum += rawDelta // Try to strip reasoning tags from accumulated ChatDelta reasoning. stripped, cleaned := ExtractReasoning(e.cdReasoningAccum, &e.config) if stripped == "" { // ExtractReasoning found no reasoning content. This happens when: // a) A complete start tag was found but has no content after it yet // (cleaned == "" because everything is inside the unclosed tag) // → keep buffering // b) We're accumulating a partial multi-token start tag // (e.g. "<|channel>" before "thought" arrives) // → keep buffering // c) No tags at all — C++ already stripped them // → pass through the raw text as-is if cleaned == "" && strings.TrimSpace(e.cdReasoningAccum) != "" { // Case (a): tag found, unclosed, no content yet stripped = "" } else if e.thinkingStartToken != "" && len(strings.TrimSpace(e.cdReasoningAccum)) < len(e.thinkingStartToken) && strings.HasPrefix(e.thinkingStartToken, strings.TrimSpace(e.cdReasoningAccum)) { // Case (b): partial start tag prefix stripped = "" } else { // Case (c): no tags found — text is already clean from C++ stripped = e.cdReasoningAccum } } // Compute delta from stripped reasoning var delta string if len(stripped) > len(e.cdLastStrippedReasoning) && strings.HasPrefix(stripped, e.cdLastStrippedReasoning) { delta = stripped[len(e.cdLastStrippedReasoning):] } else if stripped != e.cdLastStrippedReasoning && stripped != "" { delta = stripped } e.cdLastStrippedReasoning = stripped if e.suppressReasoning { return "" } return delta } // Reasoning returns the total accumulated reasoning after streaming. func (e *ReasoningExtractor) Reasoning() string { return e.lastReasoning } // CleanedContent returns the total accumulated content (reasoning stripped). func (e *ReasoningExtractor) CleanedContent() string { return e.lastCleaned } // Accumulated returns the total raw accumulated content. func (e *ReasoningExtractor) Accumulated() string { return e.accumulated } // Reset clears the extractor state for reuse. func (e *ReasoningExtractor) Reset() { e.accumulated = "" e.lastReasoning = "" e.lastCleaned = "" e.cdReasoningAccum = "" e.cdLastStrippedReasoning = "" } // ResetAndSuppressReasoning clears state and suppresses future reasoning deltas. // ProcessToken() still extracts reasoning internally (CleanedContent works), // but returns empty reasoningDelta — reasoning is not surfaced to the caller. // This is used on retry after streaming: reasoning from the first attempt was // already sent to the client; re-streaming it would cause duplicates. func (e *ReasoningExtractor) ResetAndSuppressReasoning() { e.accumulated = "" e.lastReasoning = "" e.lastCleaned = "" e.cdReasoningAccum = "" e.cdLastStrippedReasoning = "" e.suppressReasoning = true } // Suppressed returns whether reasoning delta suppression is active. func (e *ReasoningExtractor) Suppressed() bool { return e.suppressReasoning }