diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go
index 2075a0368..b35f2bab8 100644
--- a/core/http/endpoints/openai/chat.go
+++ b/core/http/endpoints/openai/chat.go
@@ -47,7 +47,7 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
 		} else {
 			template = s
 		}
-		thinkingStartToken := reason.DetectThinkingStartToken(template)
+		thinkingStartToken := reason.DetectThinkingStartToken(template, &config.ReasoningConfig)
 
 		// Track accumulated content for reasoning extraction
 		accumulatedContent := ""
@@ -56,12 +56,8 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
 
 		_, _, err := ComputeChoices(req, s, config, cl, startupOptions, loader, func(s string, c *[]schema.Choice) {}, func(s string, tokenUsage backend.TokenUsage) bool {
 			accumulatedContent += s
-			content := accumulatedContent
-			// Prepend thinking token if needed, then extract reasoning
-			if config.ReasoningConfig.DisableReasoningTagPrefill == nil || !*config.ReasoningConfig.DisableReasoningTagPrefill {
-				content = reason.PrependThinkingTokenIfNeeded(content, thinkingStartToken)
-			}
-			currentReasoning, cleanedContent := reason.ExtractReasoning(content)
+
+			currentReasoning, cleanedContent := reason.ExtractReasoningWithConfig(accumulatedContent, thinkingStartToken, config.ReasoningConfig)
 
 			// Calculate new reasoning delta (what we haven't emitted yet)
 			var reasoningDelta *string
@@ -140,7 +136,7 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
 		} else {
 			template = prompt
 		}
-		thinkingStartToken := reason.DetectThinkingStartToken(template)
+		thinkingStartToken := reason.DetectThinkingStartToken(template, &config.ReasoningConfig)
 
 		result := ""
 		lastEmittedCount := 0
@@ -254,12 +250,7 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
 			return err
 		}
 		// Prepend thinking token if needed, then extract reasoning before processing tool calls
-		resultWithToken := result
-		if config.ReasoningConfig.DisableReasoningTagPrefill == nil || !*config.ReasoningConfig.DisableReasoningTagPrefill {
-			resultWithToken = reason.PrependThinkingTokenIfNeeded(result, thinkingStartToken)
-		}
-		reasoning, cleanedResult := reason.ExtractReasoning(resultWithToken)
-		result = cleanedResult
+		reasoning, result := reason.ExtractReasoningWithConfig(result, thinkingStartToken, config.ReasoningConfig)
 
 		textContentToReturn = functions.ParseTextContent(result, config.FunctionsConfig)
 		result = functions.CleanupLLMResult(result, config.FunctionsConfig)
@@ -652,18 +643,13 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
 			} else {
 				template = predInput
 			}
-			thinkingStartToken := reason.DetectThinkingStartToken(template)
+			thinkingStartToken := reason.DetectThinkingStartToken(template, &config.ReasoningConfig)
 
 			xlog.Debug("Thinking start token", "thinkingStartToken", thinkingStartToken, "template", template)
 
 			tokenCallback := func(s string, c *[]schema.Choice) {
 				// Prepend thinking token if needed, then extract reasoning from the response
-				sWithToken := s
-				if config.ReasoningConfig.DisableReasoningTagPrefill == nil || !*config.ReasoningConfig.DisableReasoningTagPrefill {
-					sWithToken = reason.PrependThinkingTokenIfNeeded(s, thinkingStartToken)
-				}
-				reasoning, cleanedS := reason.ExtractReasoning(sWithToken)
-				s = cleanedS
+				reasoning, s := reason.ExtractReasoningWithConfig(s, thinkingStartToken, config.ReasoningConfig)
 
 				if !shouldUseFn {
 					// no function is called, just reply and use stop as finish reason
diff --git a/core/http/endpoints/openresponses/responses.go b/core/http/endpoints/openresponses/responses.go
index 337978506..aa2702dcf 100644
--- a/core/http/endpoints/openresponses/responses.go
+++ b/core/http/endpoints/openresponses/responses.go
@@ -6,6 +6,7 @@ import (
 	"errors"
 	"fmt"
 	"net"
+	"strings"
 	"time"
 
 	"github.com/google/uuid"
@@ -18,6 +19,7 @@ import (
 	"github.com/mudler/LocalAI/core/templates"
 	"github.com/mudler/LocalAI/pkg/functions"
 	"github.com/mudler/LocalAI/pkg/model"
+	reason "github.com/mudler/LocalAI/pkg/reasoning"
 	"github.com/mudler/LocalAI/pkg/utils"
 	"github.com/mudler/cogito"
 	"github.com/mudler/xlog"
@@ -1330,13 +1332,37 @@ func handleOpenResponsesNonStream(c echo.Context, responseID string, createdAt i
 	result := backend.Finetune(*cfg, predInput, prediction.Response)
 	xlog.Debug("Open Responses - Raw model result", "result", result, "shouldUseFn", shouldUseFn)
 
+	// Detect if thinking token is already in prompt or template
+	var template string
+	if cfg.TemplateConfig.UseTokenizerTemplate {
+		template = cfg.GetModelTemplate()
+	} else {
+		template = predInput
+	}
+	thinkingStartToken := reason.DetectThinkingStartToken(template, &cfg.ReasoningConfig)
+
+	// Extract reasoning from result before cleaning
+	reasoningContent, cleanedResult := reason.ExtractReasoningWithConfig(result, thinkingStartToken, cfg.ReasoningConfig)
+
 	// Parse tool calls if using functions
 	var outputItems []schema.ORItemField
 	var toolCalls []schema.ToolCall
 
+	// Add reasoning item if reasoning was found (reasoning comes first per spec)
+	if reasoningContent != "" {
+		reasoningItem := schema.ORItemField{
+			Type:    "reasoning",
+			ID:      fmt.Sprintf("reasoning_%s", uuid.New().String()),
+			Status:  "completed",
+			Content: []schema.ORContentPart{makeOutputTextPart(reasoningContent)},
+		}
+		outputItems = append(outputItems, reasoningItem)
+		xlog.Debug("Open Responses - Extracted reasoning", "reasoning_length", len(reasoningContent))
+	}
+
 	if shouldUseFn {
-		// Clean up the result first (handle reasoning tags, etc.)
-		cleanedResult := functions.CleanupLLMResult(result, cfg.FunctionsConfig)
+		// Clean up the result (already extracted reasoning above)
+		cleanedResult = functions.CleanupLLMResult(cleanedResult, cfg.FunctionsConfig)
 		xlog.Debug("Open Responses - Cleaned result", "cleanedResult", cleanedResult)
 
 		funcCallResults := functions.ParseFunctionCall(cleanedResult, cfg.FunctionsConfig)
@@ -1398,28 +1424,46 @@ func handleOpenResponsesNonStream(c echo.Context, responseID string, createdAt i
 			})
 		}
 
-		// If we have no output items but the model did produce output, include the raw result as a message
+		// If we have no output items but the model did produce output, include the cleaned result as a message
 		// This handles cases where the function call parsing failed but we still have model output
-		if len(outputItems) == 0 && result != "" {
-			xlog.Debug("Open Responses - No parsed output, falling back to raw result")
+		// Note: reasoning item may already be added above
+		hasMessageItem := false
+		for _, item := range outputItems {
+			if item.Type == "message" {
+				hasMessageItem = true
+				break
+			}
+		}
+		if !hasMessageItem && cleanedResult != "" {
+			xlog.Debug("Open Responses - No parsed output, falling back to cleaned result")
 			outputItems = append(outputItems, schema.ORItemField{
 				Type:    "message",
 				ID:      fmt.Sprintf("msg_%s", uuid.New().String()),
 				Status:  "completed",
 				Role:    "assistant",
-				Content: []schema.ORContentPart{makeOutputTextPartWithLogprobs(result, prediction.Logprobs)},
+				Content: []schema.ORContentPart{makeOutputTextPartWithLogprobs(cleanedResult, prediction.Logprobs)},
 			})
 		}
 	} else {
 		// Simple text response (include logprobs if available)
-		outputItems = []schema.ORItemField{
-			{
-				Type:    "message",
-				ID:      fmt.Sprintf("msg_%s", uuid.New().String()),
-				Status:  "completed",
-				Role:    "assistant",
-				Content: []schema.ORContentPart{makeOutputTextPartWithLogprobs(result, prediction.Logprobs)},
-			},
+		// Note: reasoning item may already be added above
+		messageItem := schema.ORItemField{
+			Type:    "message",
+			ID:      fmt.Sprintf("msg_%s", uuid.New().String()),
+			Status:  "completed",
+			Role:    "assistant",
+			Content: []schema.ORContentPart{makeOutputTextPartWithLogprobs(cleanedResult, prediction.Logprobs)},
+		}
+		outputItems = append(outputItems, messageItem)
+	}
+
+	// Calculate reasoning tokens (approximate: character count / 4)
+	reasoningTokens := 0
+	if reasoningContent != "" {
+		// Simple estimation: ~4 characters per token
+		reasoningTokens = len(reasoningContent) / 4
+		if reasoningTokens == 0 && len(reasoningContent) > 0 {
+			reasoningTokens = 1
 		}
 	}
 
@@ -1429,6 +1473,9 @@ func handleOpenResponsesNonStream(c echo.Context, responseID string, createdAt i
 		InputTokens:  prediction.Usage.Prompt,
 		OutputTokens: prediction.Usage.Completion,
 		TotalTokens:  prediction.Usage.Prompt + prediction.Usage.Completion,
+		OutputTokensDetails: &schema.OROutputTokensDetails{
+			ReasoningTokens: reasoningTokens,
+		},
 	}, shouldStore)
 
 	// Store response for future reference (if enabled)
@@ -1484,6 +1531,15 @@ func handleOpenResponsesStream(c echo.Context, responseID string, createdAt int6
 		}
 	}
 
+	// Detect if thinking token is already in prompt or template
+	var template string
+	if cfg.TemplateConfig.UseTokenizerTemplate {
+		template = cfg.GetModelTemplate()
+	} else {
+		template = predInput
+	}
+	thinkingStartToken := reason.DetectThinkingStartToken(template, &cfg.ReasoningConfig)
+
 	// Track state for streaming
 	var currentMessageID string
 	var currentContentIndex int
@@ -1492,6 +1548,14 @@ func handleOpenResponsesStream(c echo.Context, responseID string, createdAt int6
 	outputIndex := 0
 	inToolCallMode := false
 
+	// Track reasoning state for streaming
+	var currentReasoningID string
+	var currentReasoningContentIndex int
+	var accumulatedContent string
+	var lastEmittedReasoning string
+	var lastEmittedCleanedContent string
+	var reasoningTokens int
+
 	// Collect all output items for storage
 	var collectedOutputItems []schema.ORItemField
 
@@ -1646,52 +1710,133 @@ func handleOpenResponsesStream(c echo.Context, responseID string, createdAt int6
 				return true
 			}
 
-			// If no tool calls detected yet, emit text delta
+			// If no tool calls detected yet, handle reasoning and text
 			if !inToolCallMode {
-				if currentMessageID == "" {
-					// Emit output_item.added for message
-					currentMessageID = fmt.Sprintf("msg_%s", uuid.New().String())
-					messageItem := &schema.ORItemField{
-						Type:    "message",
-						ID:      currentMessageID,
-						Status:  "in_progress",
-						Role:    "assistant",
-						Content: []schema.ORContentPart{},
-					}
-					sendSSEEvent(c, &schema.ORStreamEvent{
-						Type:           "response.output_item.added",
-						SequenceNumber: sequenceNumber,
-						OutputIndex:    &outputIndex,
-						Item:           messageItem,
-					})
-					sequenceNumber++
+				accumulatedContent += token
+				currentReasoning, cleanedContent := reason.ExtractReasoningWithConfig(accumulatedContent, thinkingStartToken, cfg.ReasoningConfig)
 
-					// Emit content_part.added
-					currentContentIndex = 0
-					emptyPart := makeOutputTextPart("")
+				// Handle reasoning item
+				if currentReasoning != "" {
+					// Check if we need to create reasoning item
+					if currentReasoningID == "" {
+						outputIndex++
+						currentReasoningID = fmt.Sprintf("reasoning_%s", uuid.New().String())
+						reasoningItem := &schema.ORItemField{
+							Type:   "reasoning",
+							ID:     currentReasoningID,
+							Status: "in_progress",
+						}
+						sendSSEEvent(c, &schema.ORStreamEvent{
+							Type:           "response.output_item.added",
+							SequenceNumber: sequenceNumber,
+							OutputIndex:    &outputIndex,
+							Item:           reasoningItem,
+						})
+						sequenceNumber++
+
+						// Emit content_part.added for reasoning
+						currentReasoningContentIndex = 0
+						emptyPart := makeOutputTextPart("")
+						sendSSEEvent(c, &schema.ORStreamEvent{
+							Type:           "response.content_part.added",
+							SequenceNumber: sequenceNumber,
+							ItemID:         currentReasoningID,
+							OutputIndex:    &outputIndex,
+							ContentIndex:   &currentReasoningContentIndex,
+							Part:           &emptyPart,
+						})
+						sequenceNumber++
+					}
+
+					// Calculate reasoning delta
+					var reasoningDelta string
+					if len(currentReasoning) > len(lastEmittedReasoning) && strings.HasPrefix(currentReasoning, lastEmittedReasoning) {
+						reasoningDelta = currentReasoning[len(lastEmittedReasoning):]
+						lastEmittedReasoning = currentReasoning
+					} else if currentReasoning != lastEmittedReasoning {
+						reasoningDelta = currentReasoning
+						lastEmittedReasoning = currentReasoning
+					}
+
+					// Emit reasoning delta if there's new content
+					if reasoningDelta != "" {
+						sendSSEEvent(c, &schema.ORStreamEvent{
+							Type:           "response.output_text.delta",
+							SequenceNumber: sequenceNumber,
+							ItemID:         currentReasoningID,
+							OutputIndex:    &outputIndex,
+							ContentIndex:   &currentReasoningContentIndex,
+							Delta:          strPtr(reasoningDelta),
+							Logprobs:       emptyLogprobs(),
+						})
+						sequenceNumber++
+						c.Response().Flush()
+					}
+				}
+
+				// Handle message content (cleaned content without reasoning tags)
+				var deltaContent string
+				if len(cleanedContent) > len(lastEmittedCleanedContent) && strings.HasPrefix(cleanedContent, lastEmittedCleanedContent) {
+					deltaContent = cleanedContent[len(lastEmittedCleanedContent):]
+					lastEmittedCleanedContent = cleanedContent
+				} else if cleanedContent != lastEmittedCleanedContent {
+					if lastEmittedCleanedContent == "" {
+						deltaContent = cleanedContent
+						lastEmittedCleanedContent = cleanedContent
+					} else {
+						deltaContent = cleanedContent
+						lastEmittedCleanedContent = cleanedContent
+					}
+				}
+
+				// Only emit message content if there's actual content (not just reasoning)
+				if deltaContent != "" {
+					if currentMessageID == "" {
+						// Emit output_item.added for message
+						outputIndex++
+						currentMessageID = fmt.Sprintf("msg_%s", uuid.New().String())
+						messageItem := &schema.ORItemField{
+							Type:    "message",
+							ID:      currentMessageID,
+							Status:  "in_progress",
+							Role:    "assistant",
+							Content: []schema.ORContentPart{},
+						}
+						sendSSEEvent(c, &schema.ORStreamEvent{
+							Type:           "response.output_item.added",
+							SequenceNumber: sequenceNumber,
+							OutputIndex:    &outputIndex,
+							Item:           messageItem,
+						})
+						sequenceNumber++
+
+						// Emit content_part.added
+						currentContentIndex = 0
+						emptyPart := makeOutputTextPart("")
+						sendSSEEvent(c, &schema.ORStreamEvent{
+							Type:           "response.content_part.added",
+							SequenceNumber: sequenceNumber,
+							ItemID:         currentMessageID,
+							OutputIndex:    &outputIndex,
+							ContentIndex:   &currentContentIndex,
+							Part:           &emptyPart,
+						})
+						sequenceNumber++
+					}
+
+					// Emit text delta
 					sendSSEEvent(c, &schema.ORStreamEvent{
-						Type:           "response.content_part.added",
+						Type:           "response.output_text.delta",
 						SequenceNumber: sequenceNumber,
 						ItemID:         currentMessageID,
 						OutputIndex:    &outputIndex,
 						ContentIndex:   &currentContentIndex,
-						Part:           &emptyPart,
+						Delta:          strPtr(deltaContent),
+						Logprobs:       emptyLogprobs(),
 					})
 					sequenceNumber++
+					c.Response().Flush()
 				}
-
-				// Emit text delta
-				sendSSEEvent(c, &schema.ORStreamEvent{
-					Type:           "response.output_text.delta",
-					SequenceNumber: sequenceNumber,
-					ItemID:         currentMessageID,
-					OutputIndex:    &outputIndex,
-					ContentIndex:   &currentContentIndex,
-					Delta:          strPtr(token),
-					Logprobs:       emptyLogprobs(),
-				})
-				sequenceNumber++
-				c.Response().Flush()
 			}
 			return true
 		}
@@ -1754,7 +1899,62 @@ func handleOpenResponsesStream(c echo.Context, responseID string, createdAt int6
 		}
 
 		result := backend.Finetune(*cfg, predInput, prediction.Response)
-		cleanedResult := functions.CleanupLLMResult(result, cfg.FunctionsConfig)
+
+		// Extract reasoning from final result
+		finalReasoning, finalCleanedResult := reason.ExtractReasoningWithConfig(result, thinkingStartToken, cfg.ReasoningConfig)
+
+		// Close reasoning item if it exists and wasn't closed yet
+		if currentReasoningID != "" && finalReasoning != "" {
+			// Emit output_text.done for reasoning
+			sendSSEEvent(c, &schema.ORStreamEvent{
+				Type:           "response.output_text.done",
+				SequenceNumber: sequenceNumber,
+				ItemID:         currentReasoningID,
+				OutputIndex:    &outputIndex,
+				ContentIndex:   &currentReasoningContentIndex,
+				Text:           strPtr(finalReasoning),
+				Logprobs:       emptyLogprobs(),
+			})
+			sequenceNumber++
+
+			// Emit content_part.done for reasoning
+			reasoningPart := makeOutputTextPart(finalReasoning)
+			sendSSEEvent(c, &schema.ORStreamEvent{
+				Type:           "response.content_part.done",
+				SequenceNumber: sequenceNumber,
+				ItemID:         currentReasoningID,
+				OutputIndex:    &outputIndex,
+				ContentIndex:   &currentReasoningContentIndex,
+				Part:           &reasoningPart,
+			})
+			sequenceNumber++
+
+			// Emit output_item.done for reasoning
+			reasoningItem := &schema.ORItemField{
+				Type:    "reasoning",
+				ID:      currentReasoningID,
+				Status:  "completed",
+				Content: []schema.ORContentPart{reasoningPart},
+			}
+			sendSSEEvent(c, &schema.ORStreamEvent{
+				Type:           "response.output_item.done",
+				SequenceNumber: sequenceNumber,
+				OutputIndex:    &outputIndex,
+				Item:           reasoningItem,
+			})
+			sequenceNumber++
+
+			// Collect reasoning item for storage
+			collectedOutputItems = append(collectedOutputItems, *reasoningItem)
+
+			// Calculate reasoning tokens
+			reasoningTokens = len(finalReasoning) / 4
+			if reasoningTokens == 0 && len(finalReasoning) > 0 {
+				reasoningTokens = 1
+			}
+		}
+
+		cleanedResult := functions.CleanupLLMResult(finalCleanedResult, cfg.FunctionsConfig)
 		xlog.Debug("Open Responses Stream - Cleaned result", "cleanedResult", cleanedResult)
 
 		parsedToolCalls := functions.ParseFunctionCall(cleanedResult, cfg.FunctionsConfig)
@@ -1789,10 +1989,10 @@ func handleOpenResponsesStream(c echo.Context, responseID string, createdAt int6
 		// Convert prediction logprobs for streaming events
 		streamEventLogprobs := convertLogprobsForStreaming(prediction.Logprobs)
 
-		// If we have no output but the model did produce something, use the raw result
-		if textContent == "" && len(toolCalls) == 0 && result != "" {
-			xlog.Debug("Open Responses Stream - No parsed output, using raw result")
-			textContent = result
+		// If we have no output but the model did produce something, use the cleaned result (without reasoning tags)
+		if textContent == "" && len(toolCalls) == 0 && finalCleanedResult != "" {
+			xlog.Debug("Open Responses Stream - No parsed output, using cleaned result")
+			textContent = finalCleanedResult
 		}
 
 		// Close message if we have text content
@@ -1875,8 +2075,18 @@ func handleOpenResponsesStream(c echo.Context, responseID string, createdAt int6
 			collectedOutputItems = append(collectedOutputItems, *functionCallItem)
 		}
 
-		// Build final response with all items (include logprobs)
+		// Build final response with all items (include reasoning first, then messages, then tool calls)
 		var allOutputItems []schema.ORItemField
+		// Add reasoning item if it exists
+		if currentReasoningID != "" && finalReasoning != "" {
+			allOutputItems = append(allOutputItems, schema.ORItemField{
+				Type:    "reasoning",
+				ID:      currentReasoningID,
+				Status:  "completed",
+				Content: []schema.ORContentPart{makeOutputTextPart(finalReasoning)},
+			})
+		}
+		// Add message item
 		if currentMessageID != "" && textContent != "" {
 			allOutputItems = append(allOutputItems, schema.ORItemField{
 				Type:    "message",
@@ -1886,6 +2096,7 @@ func handleOpenResponsesStream(c echo.Context, responseID string, createdAt int6
 				Content: []schema.ORContentPart{makeOutputTextPartWithLogprobs(textContent, prediction.Logprobs)},
 			})
 		}
+		// Add tool call items
 		for _, tc := range toolCalls {
 			toolCallID := fmt.Sprintf("fc_%s", uuid.New().String())
 			allOutputItems = append(allOutputItems, schema.ORItemField{
@@ -1904,6 +2115,9 @@ func handleOpenResponsesStream(c echo.Context, responseID string, createdAt int6
 			InputTokens:  prediction.Usage.Prompt,
 			OutputTokens: prediction.Usage.Completion,
 			TotalTokens:  prediction.Usage.Prompt + prediction.Usage.Completion,
+			OutputTokensDetails: &schema.OROutputTokensDetails{
+				ReasoningTokens: reasoningTokens,
+			},
 		}, shouldStore)
 
 		sendSSEEvent(c, &schema.ORStreamEvent{
@@ -1956,22 +2170,102 @@ func handleOpenResponsesStream(c echo.Context, responseID string, createdAt int6
 	})
 	sequenceNumber++
 
-	// Stream text deltas
+	// Stream text deltas with reasoning extraction
 	tokenCallback := func(token string, tokenUsage backend.TokenUsage) bool {
 		accumulatedText += token
+		accumulatedContent += token
+		// Prepend thinking token if needed, then extract reasoning
+		currentReasoning, cleanedContent := reason.ExtractReasoningWithConfig(accumulatedContent, thinkingStartToken, cfg.ReasoningConfig)
 
-		// Emit text delta
-		sendSSEEvent(c, &schema.ORStreamEvent{
-			Type:           "response.output_text.delta",
-			SequenceNumber: sequenceNumber,
-			ItemID:         currentMessageID,
-			OutputIndex:    &outputIndex,
-			ContentIndex:   &currentContentIndex,
-			Delta:          strPtr(token),
-			Logprobs:       emptyLogprobs(),
-		})
-		sequenceNumber++
-		c.Response().Flush()
+		// Handle reasoning item
+		if currentReasoning != "" {
+			// Check if we need to create reasoning item
+			if currentReasoningID == "" {
+				outputIndex++
+				currentReasoningID = fmt.Sprintf("reasoning_%s", uuid.New().String())
+				reasoningItem := &schema.ORItemField{
+					Type:   "reasoning",
+					ID:     currentReasoningID,
+					Status: "in_progress",
+				}
+				sendSSEEvent(c, &schema.ORStreamEvent{
+					Type:           "response.output_item.added",
+					SequenceNumber: sequenceNumber,
+					OutputIndex:    &outputIndex,
+					Item:           reasoningItem,
+				})
+				sequenceNumber++
+
+				// Emit content_part.added for reasoning
+				currentReasoningContentIndex = 0
+				emptyPart := makeOutputTextPart("")
+				sendSSEEvent(c, &schema.ORStreamEvent{
+					Type:           "response.content_part.added",
+					SequenceNumber: sequenceNumber,
+					ItemID:         currentReasoningID,
+					OutputIndex:    &outputIndex,
+					ContentIndex:   &currentReasoningContentIndex,
+					Part:           &emptyPart,
+				})
+				sequenceNumber++
+			}
+
+			// Calculate reasoning delta
+			var reasoningDelta string
+			if len(currentReasoning) > len(lastEmittedReasoning) && strings.HasPrefix(currentReasoning, lastEmittedReasoning) {
+				reasoningDelta = currentReasoning[len(lastEmittedReasoning):]
+				lastEmittedReasoning = currentReasoning
+			} else if currentReasoning != lastEmittedReasoning {
+				reasoningDelta = currentReasoning
+				lastEmittedReasoning = currentReasoning
+			}
+
+			// Emit reasoning delta if there's new content
+			if reasoningDelta != "" {
+				sendSSEEvent(c, &schema.ORStreamEvent{
+					Type:           "response.output_text.delta",
+					SequenceNumber: sequenceNumber,
+					ItemID:         currentReasoningID,
+					OutputIndex:    &outputIndex,
+					ContentIndex:   &currentReasoningContentIndex,
+					Delta:          strPtr(reasoningDelta),
+					Logprobs:       emptyLogprobs(),
+				})
+				sequenceNumber++
+				c.Response().Flush()
+			}
+		}
+
+		// Handle message content (cleaned content without reasoning tags)
+		var deltaContent string
+		if len(cleanedContent) > len(lastEmittedCleanedContent) && strings.HasPrefix(cleanedContent, lastEmittedCleanedContent) {
+			deltaContent = cleanedContent[len(lastEmittedCleanedContent):]
+			lastEmittedCleanedContent = cleanedContent
+		} else if cleanedContent != lastEmittedCleanedContent {
+			if lastEmittedCleanedContent == "" {
+				deltaContent = cleanedContent
+				lastEmittedCleanedContent = cleanedContent
+			} else {
+				deltaContent = cleanedContent
+				lastEmittedCleanedContent = cleanedContent
+			}
+		}
+
+		// Only emit message content if there's actual content (not just reasoning)
+		if deltaContent != "" {
+			// Emit text delta
+			sendSSEEvent(c, &schema.ORStreamEvent{
+				Type:           "response.output_text.delta",
+				SequenceNumber: sequenceNumber,
+				ItemID:         currentMessageID,
+				OutputIndex:    &outputIndex,
+				ContentIndex:   &currentContentIndex,
+				Delta:          strPtr(deltaContent),
+				Logprobs:       emptyLogprobs(),
+			})
+			sequenceNumber++
+			c.Response().Flush()
+		}
 		return true
 	}
 
@@ -2034,6 +2328,62 @@ func handleOpenResponsesStream(c echo.Context, responseID string, createdAt int6
 
 	result := backend.Finetune(*cfg, predInput, prediction.Response)
 
+	// Extract reasoning from final result for non-tool-call path
+	finalReasoning, finalCleanedResult := reason.ExtractReasoningWithConfig(result, thinkingStartToken, cfg.ReasoningConfig)
+
+	// Close reasoning item if it exists and wasn't closed yet
+	if currentReasoningID != "" && finalReasoning != "" {
+		// Emit output_text.done for reasoning
+		sendSSEEvent(c, &schema.ORStreamEvent{
+			Type:           "response.output_text.done",
+			SequenceNumber: sequenceNumber,
+			ItemID:         currentReasoningID,
+			OutputIndex:    &outputIndex,
+			ContentIndex:   &currentReasoningContentIndex,
+			Text:           strPtr(finalReasoning),
+			Logprobs:       emptyLogprobs(),
+		})
+		sequenceNumber++
+
+		// Emit content_part.done for reasoning
+		reasoningPart := makeOutputTextPart(finalReasoning)
+		sendSSEEvent(c, &schema.ORStreamEvent{
+			Type:           "response.content_part.done",
+			SequenceNumber: sequenceNumber,
+			ItemID:         currentReasoningID,
+			OutputIndex:    &outputIndex,
+			ContentIndex:   &currentReasoningContentIndex,
+			Part:           &reasoningPart,
+		})
+		sequenceNumber++
+
+		// Emit output_item.done for reasoning
+		reasoningItem := &schema.ORItemField{
+			Type:    "reasoning",
+			ID:      currentReasoningID,
+			Status:  "completed",
+			Content: []schema.ORContentPart{reasoningPart},
+		}
+		sendSSEEvent(c, &schema.ORStreamEvent{
+			Type:           "response.output_item.done",
+			SequenceNumber: sequenceNumber,
+			OutputIndex:    &outputIndex,
+			Item:           reasoningItem,
+		})
+		sequenceNumber++
+
+		// Collect reasoning item for storage
+		collectedOutputItems = append(collectedOutputItems, *reasoningItem)
+
+		// Calculate reasoning tokens
+		reasoningTokens = len(finalReasoning) / 4
+		if reasoningTokens == 0 && len(finalReasoning) > 0 {
+			reasoningTokens = 1
+		}
+	}
+
+	result = finalCleanedResult
+
 	// Convert prediction logprobs for streaming events
 	mcpStreamLogprobs := convertLogprobsForStreaming(prediction.Logprobs)
 
@@ -2075,17 +2425,35 @@ func handleOpenResponsesStream(c echo.Context, responseID string, createdAt int6
 	// Emit response.completed
 	now := time.Now().Unix()
 
-	// Collect final output items (use collected items if available, otherwise use messageItem)
+	// Collect final output items (reasoning first, then message)
 	var finalOutputItems []schema.ORItemField
+	// Add reasoning item if it exists
+	if currentReasoningID != "" && finalReasoning != "" {
+		finalOutputItems = append(finalOutputItems, schema.ORItemField{
+			Type:    "reasoning",
+			ID:      currentReasoningID,
+			Status:  "completed",
+			Content: []schema.ORContentPart{makeOutputTextPart(finalReasoning)},
+		})
+	}
+	// Add message item
 	if len(collectedOutputItems) > 0 {
-		finalOutputItems = collectedOutputItems
+		// Use collected items (may include reasoning already)
+		for _, item := range collectedOutputItems {
+			if item.Type == "message" {
+				finalOutputItems = append(finalOutputItems, item)
+			}
+		}
 	} else {
-		finalOutputItems = []schema.ORItemField{*messageItem}
+		finalOutputItems = append(finalOutputItems, *messageItem)
 	}
 	responseCompleted := buildORResponse(responseID, createdAt, &now, "completed", input, finalOutputItems, &schema.ORUsage{
 		InputTokens:  prediction.Usage.Prompt,
 		OutputTokens: prediction.Usage.Completion,
 		TotalTokens:  prediction.Usage.Prompt + prediction.Usage.Completion,
+		OutputTokensDetails: &schema.OROutputTokensDetails{
+			ReasoningTokens: reasoningTokens,
+		},
 	}, shouldStore)
 	sendSSEEvent(c, &schema.ORStreamEvent{
 		Type:           "response.completed",
diff --git a/core/schema/openresponses.go b/core/schema/openresponses.go
index f6283ed97..b5a81f413 100644
--- a/core/schema/openresponses.go
+++ b/core/schema/openresponses.go
@@ -93,7 +93,12 @@ type ORItemParam struct {
 	// Function call output fields
 	Output interface{} `json:"output,omitempty"` // string or []ORContentPart
 
+	// Reasoning fields (for type == "reasoning")
+	Summary         []ORContentPart `json:"summary,omitempty"`          // Array of summary parts
+	EncryptedContent *string        `json:"encrypted_content,omitempty"` // Provider-specific encrypted content
+
 	// Note: For item_reference type, use the ID field above to reference the item
+	// Note: For reasoning type, Content field (from message fields) contains the raw reasoning content
 }
 
 // ORContentPart represents a content block (discriminated union by type)
diff --git a/docs/content/advanced/model-configuration.md b/docs/content/advanced/model-configuration.md
index b4d3b3e9e..6796a354c 100644
--- a/docs/content/advanced/model-configuration.md
+++ b/docs/content/advanced/model-configuration.md
@@ -397,6 +397,83 @@ Agent/autonomous agent configuration:
 | `agent.enable_mcp_prompts` | bool | Enable MCP prompts |
 | `agent.enable_plan_re_evaluator` | bool | Enable plan re-evaluation |
 
+## Reasoning Configuration
+
+Configure how reasoning tags are extracted and processed from model output. Reasoning tags are used by models like DeepSeek, Command-R, and others to include internal reasoning steps in their responses.
+
+| Field | Type | Default | Description |
+|-------|------|---------|-------------|
+| `reasoning.disable` | bool | `false` | When `true`, disables reasoning extraction entirely. The original content is returned without any processing. |
+| `reasoning.disable_reasoning_tag_prefill` | bool | `false` | When `true`, disables automatic prepending of thinking start tokens. Use this when your model already includes reasoning tags in its output format. |
+| `reasoning.strip_reasoning_only` | bool | `false` | When `true`, extracts and removes reasoning tags from content but discards the reasoning text. Useful when you want to clean reasoning tags from output without storing the reasoning content. |
+| `reasoning.thinking_start_tokens` | array | `[]` | List of custom thinking start tokens to detect in prompts. Custom tokens are checked before default tokens. |
+| `reasoning.tag_pairs` | array | `[]` | List of custom tag pairs for reasoning extraction. Each entry has `start` and `end` fields. Custom pairs are checked before default pairs. |
+
+### Reasoning Tag Formats
+
+The reasoning extraction supports multiple tag formats used by different models:
+
+- `<thinking>...</thinking>` - General thinking tag
+- `<think>...</think>` - DeepSeek, Granite, ExaOne, GLM models
+- `<|START_THINKING|>...<|END_THINKING|>` - Command-R models
+- `<|inner_prefix|>...<|inner_suffix|>` - Apertus models
+- `<seed:think>...</seed:think>` - Seed models
+- `<|think|>...<|end|><|begin|>assistant<|content|>` - Solar Open models
+- `[THINK]...[/THINK]` - Magistral models
+
+### Examples
+
+**Disable reasoning extraction:**
+```yaml
+reasoning:
+  disable: true
+```
+
+**Extract reasoning but don't prepend tags:**
+```yaml
+reasoning:
+  disable_reasoning_tag_prefill: true
+```
+
+**Strip reasoning tags without storing reasoning content:**
+```yaml
+reasoning:
+  strip_reasoning_only: true
+```
+
+**Complete example with reasoning configuration:**
+```yaml
+name: deepseek-model
+backend: llama-cpp
+parameters:
+  model: deepseek.gguf
+
+reasoning:
+  disable: false
+  disable_reasoning_tag_prefill: false
+  strip_reasoning_only: false
+```
+
+**Example with custom tokens and tag pairs:**
+```yaml
+name: custom-reasoning-model
+backend: llama-cpp
+parameters:
+  model: custom.gguf
+
+reasoning:
+  thinking_start_tokens:
+    - "<custom:think>"
+    - "<my:reasoning>"
+  tag_pairs:
+    - start: "<custom:think>"
+      end: "</custom:think>"
+    - start: "<my:reasoning>"
+      end: "</my:reasoning>"
+```
+
+**Note:** Custom tokens and tag pairs are checked before the default ones, giving them priority. This allows you to override default behavior or add support for new reasoning tag formats.
+
 ## Pipeline Configuration
 
 Define pipelines for audio-to-audio processing:
diff --git a/pkg/reasoning/config.go b/pkg/reasoning/config.go
index 0fc23cc19..040be828d 100644
--- a/pkg/reasoning/config.go
+++ b/pkg/reasoning/config.go
@@ -1,5 +1,15 @@
 package reasoning
 
-type Config struct {
-	DisableReasoningTagPrefill *bool `yaml:"disable_reasoning_tag_prefill,omitempty" json:"disable_reasoning_tag_prefill,omitempty"`
+// TagPair represents a start/end tag pair for reasoning extraction
+type TagPair struct {
+	Start string `yaml:"start" json:"start"`
+	End   string `yaml:"end" json:"end"`
+}
+
+type Config struct {
+	DisableReasoningTagPrefill *bool     `yaml:"disable_reasoning_tag_prefill,omitempty" json:"disable_reasoning_tag_prefill,omitempty"`
+	DisableReasoning           *bool     `yaml:"disable,omitempty" json:"disable,omitempty"`
+	StripReasoningOnly         *bool     `yaml:"strip_reasoning_only,omitempty" json:"strip_reasoning_only,omitempty"`
+	ThinkingStartTokens        []string  `yaml:"thinking_start_tokens,omitempty" json:"thinking_start_tokens,omitempty"`
+	TagPairs                   []TagPair `yaml:"tag_pairs,omitempty" json:"tag_pairs,omitempty"`
 }
diff --git a/pkg/reasoning/reasoning.go b/pkg/reasoning/reasoning.go
index 6add81e75..b61b2ea1d 100644
--- a/pkg/reasoning/reasoning.go
+++ b/pkg/reasoning/reasoning.go
@@ -17,12 +17,12 @@ import (
 // - <think>    (DeepSeek, Granite, ExaOne models)
 // - <|think|>               (Solar Open models)
 // - <thinking>              (General thinking tag)
-// - <think>                 (GLM models)
 // - [THINK]                 (Magistral models)
-func DetectThinkingStartToken(prompt string) string {
+// Custom tokens from config are checked first, then default tokens.
+func DetectThinkingStartToken(prompt string, config *Config) string {
 	// Common thinking start tokens (in order of specificity - longer first)
 	// Based on llama.cpp's chat-parser.cpp implementations
-	thinkingStartTokens := []string{
+	defaultTokens := []string{
 		"<|START_THINKING|>", // Command-R models
 		"<|inner_prefix|>",   // Apertus models
 		"<seed:think>",       // Seed models
@@ -32,6 +32,13 @@ func DetectThinkingStartToken(prompt string) string {
 		"[THINK]",            // Magistral models
 	}
 
+	// Merge custom tokens with default tokens (custom tokens first for priority)
+	var thinkingStartTokens []string
+	if config != nil && len(config.ThinkingStartTokens) > 0 {
+		thinkingStartTokens = append(thinkingStartTokens, config.ThinkingStartTokens...)
+	}
+	thinkingStartTokens = append(thinkingStartTokens, defaultTokens...)
+
 	// Check if prompt ends with any of these tokens (allowing for trailing whitespace/newlines)
 	trimmedPrompt := strings.TrimRight(prompt, " \t\n\r")
 	for _, token := range thinkingStartTokens {
@@ -58,6 +65,28 @@ func DetectThinkingStartToken(prompt string) string {
 	return ""
 }
 
+// ExtractReasoningWithConfig extracts reasoning from content with the given config.
+// If reasoning is disabled, it returns the original content.
+// If thinking start token prefill is enabled, it prepends the thinking start token to the content.
+// It returns the extracted reasoning and the cleaned content.
+func ExtractReasoningWithConfig(content, thinkingStartToken string, config Config) (reasoning string, cleanedContent string) {
+	cleanedContent = content
+	// If reasoning is not disabled, prepend the thinking start token if needed and extract reasoning
+	if config.DisableReasoning == nil || !*config.DisableReasoning {
+		// If thinking start token prefill is not disabled, prepend the thinking start token
+		if config.DisableReasoningTagPrefill == nil || !*config.DisableReasoningTagPrefill {
+			cleanedContent = PrependThinkingTokenIfNeeded(cleanedContent, thinkingStartToken)
+		}
+		// Extract reasoning from the cleaned content
+		reasoning, cleanedContent = ExtractReasoning(cleanedContent, &config)
+		if config.StripReasoningOnly != nil && *config.StripReasoningOnly {
+			reasoning = ""
+		}
+	}
+
+	return reasoning, cleanedContent
+}
+
 // PrependThinkingTokenIfNeeded prepends the thinking start token to content if it was
 // detected in the prompt. This allows the standard extraction logic to work correctly
 // for models where the thinking token is already in the prompt.
@@ -97,7 +126,8 @@ func PrependThinkingTokenIfNeeded(content string, startToken string) string {
 // both the extracted reasoning and the cleaned content (with tags removed).
 // It handles <thinking>...</thinking> and <think>...</think> tags.
 // Multiple reasoning blocks are concatenated with newlines.
-func ExtractReasoning(content string) (reasoning string, cleanedContent string) {
+// Custom tag pairs from config are checked first, then default tag pairs.
+func ExtractReasoning(content string, config *Config) (reasoning string, cleanedContent string) {
 	if content == "" {
 		return "", content
 	}
@@ -106,8 +136,8 @@ func ExtractReasoning(content string) (reasoning string, cleanedContent string)
 	var cleanedParts []string
 	remaining := content
 
-	// Define tag pairs to look for (matching llama.cpp's chat-parser.cpp)
-	tagPairs := []struct {
+	// Define default tag pairs to look for (matching llama.cpp's chat-parser.cpp)
+	defaultTagPairs := []struct {
 		start string
 		end   string
 	}{
@@ -120,6 +150,26 @@ func ExtractReasoning(content string) (reasoning string, cleanedContent string)
 		{"[THINK]", "[/THINK]"},                               // Magistral models
 	}
 
+	// Merge custom tag pairs with default tag pairs (custom pairs first for priority)
+	var tagPairs []struct {
+		start string
+		end   string
+	}
+	if config != nil && len(config.TagPairs) > 0 {
+		for _, pair := range config.TagPairs {
+			if pair.Start != "" && pair.End != "" {
+				tagPairs = append(tagPairs, struct {
+					start string
+					end   string
+				}{pair.Start, pair.End})
+			}
+		}
+	}
+	// Add default tag pairs
+	for _, pair := range defaultTagPairs {
+		tagPairs = append(tagPairs, pair)
+	}
+
 	// Track the last position we've processed
 	lastPos := 0
 
diff --git a/pkg/reasoning/reasoning_test.go b/pkg/reasoning/reasoning_test.go
index f66eca55e..290576b7e 100644
--- a/pkg/reasoning/reasoning_test.go
+++ b/pkg/reasoning/reasoning_test.go
@@ -12,21 +12,21 @@ var _ = Describe("ExtractReasoning", func() {
 	Context("when content has no reasoning tags", func() {
 		It("should return empty reasoning and original content", func() {
 			content := "This is regular content without any tags."
-			reasoning, cleaned := ExtractReasoning(content)
+			reasoning, cleaned := ExtractReasoning(content, nil)
 			Expect(reasoning).To(BeEmpty())
 			Expect(cleaned).To(Equal(content))
 		})
 
 		It("should handle empty string", func() {
 			content := ""
-			reasoning, cleaned := ExtractReasoning(content)
+			reasoning, cleaned := ExtractReasoning(content, nil)
 			Expect(reasoning).To(BeEmpty())
 			Expect(cleaned).To(BeEmpty())
 		})
 
 		It("should handle content with only whitespace", func() {
 			content := "   \n\t  "
-			reasoning, cleaned := ExtractReasoning(content)
+			reasoning, cleaned := ExtractReasoning(content, nil)
 			Expect(reasoning).To(BeEmpty())
 			Expect(cleaned).To(Equal(content))
 		})
@@ -35,42 +35,42 @@ var _ = Describe("ExtractReasoning", func() {
 	Context("when content has <thinking> tags", func() {
 		It("should extract reasoning from single thinking block", func() {
 			content := "Some text <thinking>This is my reasoning</thinking> More text"
-			reasoning, cleaned := ExtractReasoning(content)
+			reasoning, cleaned := ExtractReasoning(content, nil)
 			Expect(reasoning).To(Equal("This is my reasoning"))
 			Expect(cleaned).To(Equal("Some text  More text"))
 		})
 
 		It("should extract reasoning and preserve surrounding content", func() {
 			content := "Before <thinking>Reasoning here</thinking> After"
-			reasoning, cleaned := ExtractReasoning(content)
+			reasoning, cleaned := ExtractReasoning(content, nil)
 			Expect(reasoning).To(Equal("Reasoning here"))
 			Expect(cleaned).To(Equal("Before  After"))
 		})
 
 		It("should handle thinking block at the start", func() {
 			content := "<thinking>Start reasoning</thinking> Regular content"
-			reasoning, cleaned := ExtractReasoning(content)
+			reasoning, cleaned := ExtractReasoning(content, nil)
 			Expect(reasoning).To(Equal("Start reasoning"))
 			Expect(cleaned).To(Equal(" Regular content"))
 		})
 
 		It("should handle thinking block at the end", func() {
 			content := "Regular content <thinking>End reasoning</thinking>"
-			reasoning, cleaned := ExtractReasoning(content)
+			reasoning, cleaned := ExtractReasoning(content, nil)
 			Expect(reasoning).To(Equal("End reasoning"))
 			Expect(cleaned).To(Equal("Regular content "))
 		})
 
 		It("should handle only thinking block", func() {
 			content := "<thinking>Only reasoning</thinking>"
-			reasoning, cleaned := ExtractReasoning(content)
+			reasoning, cleaned := ExtractReasoning(content, nil)
 			Expect(reasoning).To(Equal("Only reasoning"))
 			Expect(cleaned).To(BeEmpty())
 		})
 
 		It("should trim whitespace from reasoning content", func() {
 			content := "Text <thinking>  \n  Reasoning with spaces  \n  </thinking> More"
-			reasoning, cleaned := ExtractReasoning(content)
+			reasoning, cleaned := ExtractReasoning(content, nil)
 			Expect(reasoning).To(Equal("Reasoning with spaces"))
 			Expect(cleaned).To(Equal("Text  More"))
 		})
@@ -79,21 +79,21 @@ var _ = Describe("ExtractReasoning", func() {
 	Context("when content has <think> tags", func() {
 		It("should extract reasoning from redacted_reasoning block", func() {
 			content := "Text <think>Redacted reasoning</think> More"
-			reasoning, cleaned := ExtractReasoning(content)
+			reasoning, cleaned := ExtractReasoning(content, nil)
 			Expect(reasoning).To(Equal("Redacted reasoning"))
 			Expect(cleaned).To(Equal("Text  More"))
 		})
 
 		It("should handle redacted_reasoning with multiline content", func() {
 			content := "Before <think>Line 1\nLine 2\nLine 3</think> After"
-			reasoning, cleaned := ExtractReasoning(content)
+			reasoning, cleaned := ExtractReasoning(content, nil)
 			Expect(reasoning).To(Equal("Line 1\nLine 2\nLine 3"))
 			Expect(cleaned).To(Equal("Before  After"))
 		})
 
 		It("should handle redacted_reasoning with complex content", func() {
 			content := "Start <think>Complex reasoning\nwith\nmultiple\nlines</think> End"
-			reasoning, cleaned := ExtractReasoning(content)
+			reasoning, cleaned := ExtractReasoning(content, nil)
 			Expect(reasoning).To(Equal("Complex reasoning\nwith\nmultiple\nlines"))
 			Expect(cleaned).To(Equal("Start  End"))
 		})
@@ -102,14 +102,14 @@ var _ = Describe("ExtractReasoning", func() {
 	Context("when content has multiple reasoning blocks", func() {
 		It("should concatenate multiple thinking blocks with newlines", func() {
 			content := "Text <thinking>First</thinking> Middle <thinking>Second</thinking> End"
-			reasoning, cleaned := ExtractReasoning(content)
+			reasoning, cleaned := ExtractReasoning(content, nil)
 			Expect(reasoning).To(Equal("First\n\nSecond"))
 			Expect(cleaned).To(Equal("Text  Middle  End"))
 		})
 
 		It("should handle multiple different tag types", func() {
 			content := "A <thinking>One</thinking> B <think>Two</think> C <think>Three</think> D"
-			reasoning, cleaned := ExtractReasoning(content)
+			reasoning, cleaned := ExtractReasoning(content, nil)
 			Expect(reasoning).To(ContainSubstring("One"))
 			Expect(reasoning).To(ContainSubstring("Two"))
 			Expect(reasoning).To(ContainSubstring("Three"))
@@ -118,7 +118,7 @@ var _ = Describe("ExtractReasoning", func() {
 
 		It("should handle nested tags correctly (extracts first match)", func() {
 			content := "Text <thinking>Outer <think>Inner</think></thinking> More"
-			reasoning, cleaned := ExtractReasoning(content)
+			reasoning, cleaned := ExtractReasoning(content, nil)
 			// Should extract the outer thinking block
 			Expect(reasoning).To(ContainSubstring("Outer"))
 			Expect(reasoning).To(ContainSubstring("Inner"))
@@ -129,28 +129,28 @@ var _ = Describe("ExtractReasoning", func() {
 	Context("when content has unclosed reasoning tags", func() {
 		It("should extract unclosed thinking block", func() {
 			content := "Text <thinking>Unclosed reasoning"
-			reasoning, cleaned := ExtractReasoning(content)
+			reasoning, cleaned := ExtractReasoning(content, nil)
 			Expect(reasoning).To(Equal("Unclosed reasoning"))
 			Expect(cleaned).To(Equal("Text "))
 		})
 
 		It("should extract unclosed think block", func() {
 			content := "Before <think>Incomplete"
-			reasoning, cleaned := ExtractReasoning(content)
+			reasoning, cleaned := ExtractReasoning(content, nil)
 			Expect(reasoning).To(Equal("Incomplete"))
 			Expect(cleaned).To(Equal("Before "))
 		})
 
 		It("should extract unclosed redacted_reasoning block", func() {
 			content := "Start <think>Partial reasoning content"
-			reasoning, cleaned := ExtractReasoning(content)
+			reasoning, cleaned := ExtractReasoning(content, nil)
 			Expect(reasoning).To(Equal("Partial reasoning content"))
 			Expect(cleaned).To(Equal("Start "))
 		})
 
 		It("should handle unclosed tag at the end", func() {
 			content := "Regular content <thinking>Unclosed at end"
-			reasoning, cleaned := ExtractReasoning(content)
+			reasoning, cleaned := ExtractReasoning(content, nil)
 			Expect(reasoning).To(Equal("Unclosed at end"))
 			Expect(cleaned).To(Equal("Regular content "))
 		})
@@ -159,14 +159,14 @@ var _ = Describe("ExtractReasoning", func() {
 	Context("when content has empty reasoning blocks", func() {
 		It("should ignore empty thinking block", func() {
 			content := "Text <thinking></thinking> More"
-			reasoning, cleaned := ExtractReasoning(content)
+			reasoning, cleaned := ExtractReasoning(content, nil)
 			Expect(reasoning).To(BeEmpty())
 			Expect(cleaned).To(Equal("Text  More"))
 		})
 
 		It("should ignore thinking block with only whitespace", func() {
 			content := "Text <thinking>   \n\t  </thinking> More"
-			reasoning, cleaned := ExtractReasoning(content)
+			reasoning, cleaned := ExtractReasoning(content, nil)
 			Expect(reasoning).To(BeEmpty())
 			Expect(cleaned).To(Equal("Text  More"))
 		})
@@ -175,28 +175,28 @@ var _ = Describe("ExtractReasoning", func() {
 	Context("when content has reasoning tags with special characters", func() {
 		It("should handle reasoning with newlines", func() {
 			content := "Before <thinking>Line 1\nLine 2\nLine 3</thinking> After"
-			reasoning, cleaned := ExtractReasoning(content)
+			reasoning, cleaned := ExtractReasoning(content, nil)
 			Expect(reasoning).To(Equal("Line 1\nLine 2\nLine 3"))
 			Expect(cleaned).To(Equal("Before  After"))
 		})
 
 		It("should handle reasoning with code blocks", func() {
 			content := "Text <thinking>Reasoning with ```code``` blocks</thinking> More"
-			reasoning, cleaned := ExtractReasoning(content)
+			reasoning, cleaned := ExtractReasoning(content, nil)
 			Expect(reasoning).To(Equal("Reasoning with ```code``` blocks"))
 			Expect(cleaned).To(Equal("Text  More"))
 		})
 
 		It("should handle reasoning with JSON", func() {
 			content := "Before <think>{\"key\": \"value\"}</think> After"
-			reasoning, cleaned := ExtractReasoning(content)
+			reasoning, cleaned := ExtractReasoning(content, nil)
 			Expect(reasoning).To(Equal("{\"key\": \"value\"}"))
 			Expect(cleaned).To(Equal("Before  After"))
 		})
 
 		It("should handle reasoning with HTML-like content", func() {
 			content := "Text <thinking>Reasoning with <tags> inside</thinking> More"
-			reasoning, cleaned := ExtractReasoning(content)
+			reasoning, cleaned := ExtractReasoning(content, nil)
 			Expect(reasoning).To(Equal("Reasoning with <tags> inside"))
 			Expect(cleaned).To(Equal("Text  More"))
 		})
@@ -205,7 +205,7 @@ var _ = Describe("ExtractReasoning", func() {
 	Context("when content has reasoning mixed with regular content", func() {
 		It("should preserve content order correctly", func() {
 			content := "Start <thinking>Reasoning</thinking> Middle <think>More reasoning</think> End"
-			reasoning, cleaned := ExtractReasoning(content)
+			reasoning, cleaned := ExtractReasoning(content, nil)
 			Expect(reasoning).To(ContainSubstring("Reasoning"))
 			Expect(reasoning).To(ContainSubstring("More reasoning"))
 			Expect(cleaned).To(Equal("Start  Middle  End"))
@@ -213,7 +213,7 @@ var _ = Describe("ExtractReasoning", func() {
 
 		It("should handle reasoning in the middle of a sentence", func() {
 			content := "This is a <thinking>reasoning</thinking> sentence."
-			reasoning, cleaned := ExtractReasoning(content)
+			reasoning, cleaned := ExtractReasoning(content, nil)
 			Expect(reasoning).To(Equal("reasoning"))
 			Expect(cleaned).To(Equal("This is a  sentence."))
 		})
@@ -222,21 +222,21 @@ var _ = Describe("ExtractReasoning", func() {
 	Context("edge cases", func() {
 		It("should handle content with only opening tag", func() {
 			content := "<thinking>"
-			reasoning, cleaned := ExtractReasoning(content)
+			reasoning, cleaned := ExtractReasoning(content, nil)
 			Expect(reasoning).To(BeEmpty())
 			Expect(cleaned).To(Equal(""))
 		})
 
 		It("should handle content with only closing tag", func() {
 			content := "</thinking>"
-			reasoning, cleaned := ExtractReasoning(content)
+			reasoning, cleaned := ExtractReasoning(content, nil)
 			Expect(reasoning).To(BeEmpty())
 			Expect(cleaned).To(Equal("</thinking>"))
 		})
 
 		It("should handle mismatched tags", func() {
 			content := "<thinking>Content</think>"
-			reasoning, cleaned := ExtractReasoning(content)
+			reasoning, cleaned := ExtractReasoning(content, nil)
 			// Should extract unclosed thinking block
 			Expect(reasoning).To(ContainSubstring("Content"))
 			Expect(cleaned).To(Equal(""))
@@ -245,7 +245,7 @@ var _ = Describe("ExtractReasoning", func() {
 		It("should handle very long reasoning content", func() {
 			longReasoning := strings.Repeat("This is reasoning content. ", 100)
 			content := "Text <thinking>" + longReasoning + "</thinking> More"
-			reasoning, cleaned := ExtractReasoning(content)
+			reasoning, cleaned := ExtractReasoning(content, nil)
 			// TrimSpace is applied, so we need to account for that
 			Expect(reasoning).To(Equal(strings.TrimSpace(longReasoning)))
 			Expect(cleaned).To(Equal("Text  More"))
@@ -253,7 +253,7 @@ var _ = Describe("ExtractReasoning", func() {
 
 		It("should handle reasoning with unicode characters", func() {
 			content := "Text <thinking>Reasoning with 中文 and emoji 🧠</thinking> More"
-			reasoning, cleaned := ExtractReasoning(content)
+			reasoning, cleaned := ExtractReasoning(content, nil)
 			Expect(reasoning).To(Equal("Reasoning with 中文 and emoji 🧠"))
 			Expect(cleaned).To(Equal("Text  More"))
 		})
@@ -262,14 +262,14 @@ var _ = Describe("ExtractReasoning", func() {
 	Context("when content has <|START_THINKING|> tags (Command-R)", func() {
 		It("should extract reasoning from START_THINKING block", func() {
 			content := "Text <|START_THINKING|>Command-R reasoning<|END_THINKING|> More"
-			reasoning, cleaned := ExtractReasoning(content)
+			reasoning, cleaned := ExtractReasoning(content, nil)
 			Expect(reasoning).To(Equal("Command-R reasoning"))
 			Expect(cleaned).To(Equal("Text  More"))
 		})
 
 		It("should handle unclosed START_THINKING block", func() {
 			content := "Before <|START_THINKING|>Incomplete reasoning"
-			reasoning, cleaned := ExtractReasoning(content)
+			reasoning, cleaned := ExtractReasoning(content, nil)
 			Expect(reasoning).To(Equal("Incomplete reasoning"))
 			Expect(cleaned).To(Equal("Before "))
 		})
@@ -278,7 +278,7 @@ var _ = Describe("ExtractReasoning", func() {
 	Context("when content has <|inner_prefix|> tags (Apertus)", func() {
 		It("should extract reasoning from inner_prefix block", func() {
 			content := "Text <|inner_prefix|>Apertus reasoning<|inner_suffix|> More"
-			reasoning, cleaned := ExtractReasoning(content)
+			reasoning, cleaned := ExtractReasoning(content, nil)
 			Expect(reasoning).To(Equal("Apertus reasoning"))
 			Expect(cleaned).To(Equal("Text  More"))
 		})
@@ -287,7 +287,7 @@ var _ = Describe("ExtractReasoning", func() {
 	Context("when content has <seed:think> tags (Seed)", func() {
 		It("should extract reasoning from seed:think block", func() {
 			content := "Text <seed:think>Seed reasoning</seed:think> More"
-			reasoning, cleaned := ExtractReasoning(content)
+			reasoning, cleaned := ExtractReasoning(content, nil)
 			Expect(reasoning).To(Equal("Seed reasoning"))
 			Expect(cleaned).To(Equal("Text  More"))
 		})
@@ -296,7 +296,7 @@ var _ = Describe("ExtractReasoning", func() {
 	Context("when content has <|think|> tags (Solar Open)", func() {
 		It("should extract reasoning from Solar Open think block", func() {
 			content := "Text <|think|>Solar reasoning<|end|><|begin|>assistant<|content|> More"
-			reasoning, cleaned := ExtractReasoning(content)
+			reasoning, cleaned := ExtractReasoning(content, nil)
 			Expect(reasoning).To(Equal("Solar reasoning"))
 			Expect(cleaned).To(Equal("Text  More"))
 		})
@@ -305,14 +305,14 @@ var _ = Describe("ExtractReasoning", func() {
 	Context("when content has [THINK] tags (Magistral)", func() {
 		It("should extract reasoning from THINK block", func() {
 			content := "Text [THINK]Magistral reasoning[/THINK] More"
-			reasoning, cleaned := ExtractReasoning(content)
+			reasoning, cleaned := ExtractReasoning(content, nil)
 			Expect(reasoning).To(Equal("Magistral reasoning"))
 			Expect(cleaned).To(Equal("Text  More"))
 		})
 
 		It("should handle unclosed THINK block", func() {
 			content := "Before [THINK]Incomplete reasoning"
-			reasoning, cleaned := ExtractReasoning(content)
+			reasoning, cleaned := ExtractReasoning(content, nil)
 			Expect(reasoning).To(Equal("Incomplete reasoning"))
 			Expect(cleaned).To(Equal("Before "))
 		})
@@ -323,62 +323,62 @@ var _ = Describe("DetectThinkingStartToken", func() {
 	Context("when prompt contains thinking start tokens", func() {
 		It("should detect <|START_THINKING|> at the end", func() {
 			prompt := "Some prompt text <|START_THINKING|>"
-			token := DetectThinkingStartToken(prompt)
+			token := DetectThinkingStartToken(prompt, nil)
 			Expect(token).To(Equal("<|START_THINKING|>"))
 		})
 
 		It("should detect <think> at the end", func() {
 			prompt := "Prompt with <think>"
-			token := DetectThinkingStartToken(prompt)
+			token := DetectThinkingStartToken(prompt, nil)
 			Expect(token).To(Equal("<think>"))
 		})
 
 		It("should detect <thinking> at the end", func() {
 			prompt := "Some text <thinking>"
-			token := DetectThinkingStartToken(prompt)
+			token := DetectThinkingStartToken(prompt, nil)
 			Expect(token).To(Equal("<thinking>"))
 		})
 
 		It("should detect <|inner_prefix|> at the end", func() {
 			prompt := "Prompt <|inner_prefix|>"
-			token := DetectThinkingStartToken(prompt)
+			token := DetectThinkingStartToken(prompt, nil)
 			Expect(token).To(Equal("<|inner_prefix|>"))
 		})
 
 		It("should detect <seed:think> at the end", func() {
 			prompt := "Text <seed:think>"
-			token := DetectThinkingStartToken(prompt)
+			token := DetectThinkingStartToken(prompt, nil)
 			Expect(token).To(Equal("<seed:think>"))
 		})
 
 		It("should detect <|think|> at the end", func() {
 			prompt := "Prompt <|think|>"
-			token := DetectThinkingStartToken(prompt)
+			token := DetectThinkingStartToken(prompt, nil)
 			Expect(token).To(Equal("<|think|>"))
 		})
 
 		It("should detect [THINK] at the end", func() {
 			prompt := "Text [THINK]"
-			token := DetectThinkingStartToken(prompt)
+			token := DetectThinkingStartToken(prompt, nil)
 			Expect(token).To(Equal("[THINK]"))
 		})
 
 		It("should handle trailing whitespace", func() {
 			prompt := "Prompt <|START_THINKING|>   \n\t  "
-			token := DetectThinkingStartToken(prompt)
+			token := DetectThinkingStartToken(prompt, nil)
 			Expect(token).To(Equal("<|START_THINKING|>"))
 		})
 
 		It("should detect token near the end (within last 100 chars)", func() {
 			prefix := strings.Repeat("x", 50)
 			prompt := prefix + "<|START_THINKING|>"
-			token := DetectThinkingStartToken(prompt)
+			token := DetectThinkingStartToken(prompt, nil)
 			Expect(token).To(Equal("<|START_THINKING|>"))
 		})
 
 		It("should detect token when followed by only whitespace", func() {
 			prompt := "Text <think>   \n  "
-			token := DetectThinkingStartToken(prompt)
+			token := DetectThinkingStartToken(prompt, nil)
 			Expect(token).To(Equal("<think>"))
 		})
 	})
@@ -386,27 +386,27 @@ var _ = Describe("DetectThinkingStartToken", func() {
 	Context("when prompt does not contain thinking tokens", func() {
 		It("should return empty string for regular prompt", func() {
 			prompt := "This is a regular prompt without thinking tokens"
-			token := DetectThinkingStartToken(prompt)
+			token := DetectThinkingStartToken(prompt, nil)
 			Expect(token).To(BeEmpty())
 		})
 
 		It("should return empty string for empty prompt", func() {
 			prompt := ""
-			token := DetectThinkingStartToken(prompt)
+			token := DetectThinkingStartToken(prompt, nil)
 			Expect(token).To(BeEmpty())
 		})
 
 		It("should detect token even when far from end (Contains check)", func() {
 			prefix := strings.Repeat("x", 150)
 			prompt := prefix + "<|START_THINKING|>"
-			token := DetectThinkingStartToken(prompt)
+			token := DetectThinkingStartToken(prompt, nil)
 			// Current implementation uses Contains, so it finds tokens anywhere
 			Expect(token).To(Equal("<|START_THINKING|>"))
 		})
 
 		It("should detect token even when followed by non-whitespace (Contains check)", func() {
 			prompt := "Text <|START_THINKING|>more text"
-			token := DetectThinkingStartToken(prompt)
+			token := DetectThinkingStartToken(prompt, nil)
 			// Current implementation uses Contains, so it finds tokens anywhere
 			Expect(token).To(Equal("<|START_THINKING|>"))
 		})
@@ -415,7 +415,7 @@ var _ = Describe("DetectThinkingStartToken", func() {
 	Context("when multiple tokens are present", func() {
 		It("should return the first matching token (most specific)", func() {
 			prompt := "Text <|START_THINKING|> <thinking>"
-			token := DetectThinkingStartToken(prompt)
+			token := DetectThinkingStartToken(prompt, nil)
 			// Should return the first one found (order matters)
 			Expect(token).To(Equal("<|START_THINKING|>"))
 		})
@@ -504,3 +504,641 @@ var _ = Describe("PrependThinkingTokenIfNeeded", func() {
 		})
 	})
 })
+
+var _ = Describe("ExtractReasoningWithConfig", func() {
+	Context("when reasoning is disabled", func() {
+		It("should return original content when DisableReasoning is true", func() {
+			content := "Some text <thinking>Reasoning</thinking> More text"
+			config := Config{DisableReasoning: boolPtr(true)}
+			reasoning, cleaned := ExtractReasoningWithConfig(content, "<thinking>", config)
+			Expect(reasoning).To(BeEmpty())
+			Expect(cleaned).To(Equal(content))
+		})
+
+		It("should return original content even with thinking start token when DisableReasoning is true", func() {
+			content := "Reasoning content"
+			config := Config{DisableReasoning: boolPtr(true)}
+			reasoning, cleaned := ExtractReasoningWithConfig(content, "<|START_THINKING|>", config)
+			Expect(reasoning).To(BeEmpty())
+			Expect(cleaned).To(Equal(content))
+		})
+
+		It("should return original content even with tag prefill disabled when DisableReasoning is true", func() {
+			content := "Some content"
+			config := Config{
+				DisableReasoning:           boolPtr(true),
+				DisableReasoningTagPrefill: boolPtr(false),
+			}
+			reasoning, cleaned := ExtractReasoningWithConfig(content, "<thinking>", config)
+			Expect(reasoning).To(BeEmpty())
+			Expect(cleaned).To(Equal(content))
+		})
+	})
+
+	Context("when reasoning is enabled (DisableReasoning is nil or false)", func() {
+		Context("when tag prefill is enabled (DisableReasoningTagPrefill is nil or false)", func() {
+			It("should prepend token and extract reasoning when both configs are nil", func() {
+				content := "Reasoning content"
+				config := Config{}
+				reasoning, cleaned := ExtractReasoningWithConfig(content, "<thinking>", config)
+				// Token is prepended, then extracted
+				Expect(reasoning).To(Equal("Reasoning content"))
+				Expect(cleaned).To(BeEmpty())
+			})
+
+			It("should prepend token and extract reasoning when DisableReasoning is false", func() {
+				content := "Some reasoning"
+				config := Config{DisableReasoning: boolPtr(false)}
+				reasoning, cleaned := ExtractReasoningWithConfig(content, "<think>", config)
+				Expect(reasoning).To(Equal("Some reasoning"))
+				Expect(cleaned).To(BeEmpty())
+			})
+
+			It("should prepend token and extract reasoning when DisableReasoningTagPrefill is false", func() {
+				content := "My reasoning"
+				config := Config{DisableReasoningTagPrefill: boolPtr(false)}
+				reasoning, cleaned := ExtractReasoningWithConfig(content, "<|START_THINKING|>", config)
+				Expect(reasoning).To(Equal("My reasoning"))
+				Expect(cleaned).To(BeEmpty())
+			})
+
+			It("should prepend token to content with existing tags and extract", func() {
+				content := "Before <thinking>Existing reasoning</thinking> After"
+				config := Config{}
+				reasoning, cleaned := ExtractReasoningWithConfig(content, "<thinking>", config)
+				// Should extract existing reasoning, token prepend doesn't affect already tagged content
+				Expect(reasoning).To(Equal("Existing reasoning"))
+				Expect(cleaned).To(Equal("Before  After"))
+			})
+
+			It("should prepend token and extract from content that becomes tagged", func() {
+				content := "Pure reasoning without tags"
+				config := Config{}
+				reasoning, cleaned := ExtractReasoningWithConfig(content, "<thinking>", config)
+				// Token is prepended, making it <thinking>Pure reasoning without tags</thinking>
+				// But since there's no closing tag, it extracts as unclosed
+				Expect(reasoning).To(Equal("Pure reasoning without tags"))
+				Expect(cleaned).To(BeEmpty())
+			})
+
+			It("should handle empty token when tag prefill is enabled", func() {
+				content := "Some content <thinking>Reasoning</thinking> More"
+				config := Config{}
+				reasoning, cleaned := ExtractReasoningWithConfig(content, "", config)
+				// No token to prepend, just extract existing reasoning
+				Expect(reasoning).To(Equal("Reasoning"))
+				Expect(cleaned).To(Equal("Some content  More"))
+			})
+
+			It("should prepend token after leading whitespace", func() {
+				content := "   \n  Reasoning content"
+				config := Config{}
+				reasoning, cleaned := ExtractReasoningWithConfig(content, "<thinking>", config)
+				Expect(reasoning).To(Equal("Reasoning content"))
+				Expect(cleaned).To(Equal("   \n  "))
+			})
+		})
+
+		Context("when tag prefill is disabled (DisableReasoningTagPrefill is true)", func() {
+			It("should extract reasoning without prepending token when DisableReasoningTagPrefill is true", func() {
+				content := "Some text <thinking>Reasoning</thinking> More text"
+				config := Config{DisableReasoningTagPrefill: boolPtr(true)}
+				reasoning, cleaned := ExtractReasoningWithConfig(content, "<thinking>", config)
+				Expect(reasoning).To(Equal("Reasoning"))
+				Expect(cleaned).To(Equal("Some text  More text"))
+			})
+
+			It("should not prepend token to content without tags when DisableReasoningTagPrefill is true", func() {
+				content := "Pure content without tags"
+				config := Config{DisableReasoningTagPrefill: boolPtr(true)}
+				reasoning, cleaned := ExtractReasoningWithConfig(content, "<thinking>", config)
+				// No token prepended, no tags to extract
+				Expect(reasoning).To(BeEmpty())
+				Expect(cleaned).To(Equal(content))
+			})
+
+			It("should extract multiple reasoning blocks without prepending when DisableReasoningTagPrefill is true", func() {
+				content := "A <thinking>First</thinking> B <think>Second</think> C"
+				config := Config{DisableReasoningTagPrefill: boolPtr(true)}
+				reasoning, cleaned := ExtractReasoningWithConfig(content, "<thinking>", config)
+				Expect(reasoning).To(ContainSubstring("First"))
+				Expect(reasoning).To(ContainSubstring("Second"))
+				Expect(cleaned).To(Equal("A  B  C"))
+			})
+
+			It("should handle empty token when tag prefill is disabled", func() {
+				content := "Text <thinking>Reasoning</thinking> More"
+				config := Config{DisableReasoningTagPrefill: boolPtr(true)}
+				reasoning, cleaned := ExtractReasoningWithConfig(content, "", config)
+				Expect(reasoning).To(Equal("Reasoning"))
+				Expect(cleaned).To(Equal("Text  More"))
+			})
+		})
+	})
+
+	Context("edge cases", func() {
+		It("should handle empty content with default config", func() {
+			content := ""
+			config := Config{}
+			reasoning, cleaned := ExtractReasoningWithConfig(content, "<thinking>", config)
+			Expect(reasoning).To(BeEmpty())
+			Expect(cleaned).To(BeEmpty())
+		})
+
+		It("should handle empty content when reasoning is disabled", func() {
+			content := ""
+			config := Config{DisableReasoning: boolPtr(true)}
+			reasoning, cleaned := ExtractReasoningWithConfig(content, "<thinking>", config)
+			Expect(reasoning).To(BeEmpty())
+			Expect(cleaned).To(BeEmpty())
+		})
+
+		It("should handle empty token with content containing tags", func() {
+			content := "Text <thinking>Reasoning</thinking> More"
+			config := Config{}
+			reasoning, cleaned := ExtractReasoningWithConfig(content, "", config)
+			Expect(reasoning).To(Equal("Reasoning"))
+			Expect(cleaned).To(Equal("Text  More"))
+		})
+
+		It("should handle content with only whitespace when reasoning is enabled", func() {
+			content := "   \n\t  "
+			config := Config{}
+			reasoning, cleaned := ExtractReasoningWithConfig(content, "<thinking>", config)
+			// Token is prepended after whitespace, then extracted as unclosed
+			Expect(reasoning).To(BeEmpty())
+			Expect(cleaned).To(Equal("   \n\t  "))
+		})
+
+		It("should handle content with only whitespace when reasoning is disabled", func() {
+			content := "   \n\t  "
+			config := Config{DisableReasoning: boolPtr(true)}
+			reasoning, cleaned := ExtractReasoningWithConfig(content, "<thinking>", config)
+			Expect(reasoning).To(BeEmpty())
+			Expect(cleaned).To(Equal(content))
+		})
+
+		It("should handle unclosed reasoning tags with tag prefill enabled", func() {
+			content := "Some text <thinking>Unclosed"
+			config := Config{}
+			reasoning, cleaned := ExtractReasoningWithConfig(content, "<thinking>", config)
+			Expect(reasoning).To(Equal("Unclosed"))
+			Expect(cleaned).To(Equal("Some text "))
+		})
+
+		It("should handle different token types with config", func() {
+			content := "Reasoning content"
+			config := Config{}
+			reasoning, cleaned := ExtractReasoningWithConfig(content, "<|START_THINKING|>", config)
+			Expect(reasoning).To(Equal("Reasoning content"))
+			Expect(cleaned).To(BeEmpty())
+		})
+
+		It("should handle content that already contains the token", func() {
+			content := "<thinking>Already tagged</thinking>"
+			config := Config{}
+			reasoning, cleaned := ExtractReasoningWithConfig(content, "<thinking>", config)
+			// Token already present, should not prepend, just extract
+			Expect(reasoning).To(Equal("Already tagged"))
+			Expect(cleaned).To(BeEmpty())
+		})
+
+		It("should handle complex reasoning with multiline content and tag prefill", func() {
+			content := "Before\n<thinking>Line 1\nLine 2\nLine 3</thinking>\nAfter"
+			config := Config{}
+			reasoning, cleaned := ExtractReasoningWithConfig(content, "<thinking>", config)
+			Expect(reasoning).To(Equal("Line 1\nLine 2\nLine 3"))
+			Expect(cleaned).To(Equal("Before\n\nAfter"))
+		})
+	})
+
+	Context("config combinations", func() {
+		It("should handle nil DisableReasoning and nil DisableReasoningTagPrefill", func() {
+			content := "Reasoning"
+			config := Config{}
+			reasoning, cleaned := ExtractReasoningWithConfig(content, "<thinking>", config)
+			Expect(reasoning).To(Equal("Reasoning"))
+			Expect(cleaned).To(BeEmpty())
+		})
+
+		It("should handle false DisableReasoning and true DisableReasoningTagPrefill", func() {
+			content := "Text <thinking>Reasoning</thinking> More"
+			config := Config{
+				DisableReasoning:           boolPtr(false),
+				DisableReasoningTagPrefill: boolPtr(true),
+			}
+			reasoning, cleaned := ExtractReasoningWithConfig(content, "<thinking>", config)
+			Expect(reasoning).To(Equal("Reasoning"))
+			Expect(cleaned).To(Equal("Text  More"))
+		})
+
+		It("should handle true DisableReasoning regardless of DisableReasoningTagPrefill", func() {
+			content := "Some content <thinking>Reasoning</thinking>"
+			config := Config{
+				DisableReasoning:           boolPtr(true),
+				DisableReasoningTagPrefill: boolPtr(false),
+			}
+			reasoning, cleaned := ExtractReasoningWithConfig(content, "<thinking>", config)
+			Expect(reasoning).To(BeEmpty())
+			Expect(cleaned).To(Equal(content))
+		})
+	})
+
+	Context("when StripReasoningOnly is enabled", func() {
+		It("should strip reasoning but keep cleaned content when StripReasoningOnly is true", func() {
+			content := "Some text <thinking>Reasoning content</thinking> More text"
+			config := Config{StripReasoningOnly: boolPtr(true)}
+			reasoning, cleaned := ExtractReasoningWithConfig(content, "<thinking>", config)
+			Expect(reasoning).To(BeEmpty())
+			Expect(cleaned).To(Equal("Some text  More text"))
+		})
+
+		It("should strip reasoning from multiple blocks when StripReasoningOnly is true", func() {
+			content := "A <thinking>First</thinking> B <thinking>Second</thinking> C"
+			config := Config{StripReasoningOnly: boolPtr(true)}
+			reasoning, cleaned := ExtractReasoningWithConfig(content, "<thinking>", config)
+			Expect(reasoning).To(BeEmpty())
+			Expect(cleaned).To(Equal("A  B  C"))
+		})
+
+		It("should strip reasoning from different tag types when StripReasoningOnly is true", func() {
+			content := "Before <thinking>One</thinking> Middle <think>Two</think> After"
+			config := Config{StripReasoningOnly: boolPtr(true)}
+			reasoning, cleaned := ExtractReasoningWithConfig(content, "<thinking>", config)
+			Expect(reasoning).To(BeEmpty())
+			Expect(cleaned).To(Equal("Before  Middle  After"))
+		})
+
+		It("should strip reasoning but preserve content when StripReasoningOnly is true", func() {
+			content := "Regular content <thinking>Reasoning</thinking>"
+			config := Config{StripReasoningOnly: boolPtr(true)}
+			reasoning, cleaned := ExtractReasoningWithConfig(content, "<thinking>", config)
+			Expect(reasoning).To(BeEmpty())
+			Expect(cleaned).To(Equal("Regular content "))
+		})
+
+		It("should strip reasoning from unclosed tags when StripReasoningOnly is true", func() {
+			content := "Text <thinking>Unclosed reasoning"
+			config := Config{StripReasoningOnly: boolPtr(true)}
+			reasoning, cleaned := ExtractReasoningWithConfig(content, "<thinking>", config)
+			Expect(reasoning).To(BeEmpty())
+			Expect(cleaned).To(Equal("Text "))
+		})
+
+		It("should strip reasoning from Command-R tags when StripReasoningOnly is true", func() {
+			content := "Before <|START_THINKING|>Command-R reasoning<|END_THINKING|> After"
+			config := Config{StripReasoningOnly: boolPtr(true)}
+			reasoning, cleaned := ExtractReasoningWithConfig(content, "<|START_THINKING|>", config)
+			Expect(reasoning).To(BeEmpty())
+			Expect(cleaned).To(Equal("Before  After"))
+		})
+
+		It("should strip reasoning from Apertus tags when StripReasoningOnly is true", func() {
+			content := "Text <|inner_prefix|>Apertus reasoning<|inner_suffix|> More"
+			config := Config{StripReasoningOnly: boolPtr(true)}
+			reasoning, cleaned := ExtractReasoningWithConfig(content, "<|inner_prefix|>", config)
+			Expect(reasoning).To(BeEmpty())
+			Expect(cleaned).To(Equal("Text  More"))
+		})
+
+		It("should strip reasoning from Seed tags when StripReasoningOnly is true", func() {
+			content := "Before <seed:think>Seed reasoning</seed:think> After"
+			config := Config{StripReasoningOnly: boolPtr(true)}
+			reasoning, cleaned := ExtractReasoningWithConfig(content, "<seed:think>", config)
+			Expect(reasoning).To(BeEmpty())
+			Expect(cleaned).To(Equal("Before  After"))
+		})
+
+		It("should strip reasoning from Magistral tags when StripReasoningOnly is true", func() {
+			content := "Text [THINK]Magistral reasoning[/THINK] More"
+			config := Config{StripReasoningOnly: boolPtr(true)}
+			reasoning, cleaned := ExtractReasoningWithConfig(content, "[THINK]", config)
+			Expect(reasoning).To(BeEmpty())
+			Expect(cleaned).To(Equal("Text  More"))
+		})
+
+		It("should strip reasoning with multiline content when StripReasoningOnly is true", func() {
+			content := "Start <thinking>Line 1\nLine 2\nLine 3</thinking> End"
+			config := Config{StripReasoningOnly: boolPtr(true)}
+			reasoning, cleaned := ExtractReasoningWithConfig(content, "<thinking>", config)
+			Expect(reasoning).To(BeEmpty())
+			Expect(cleaned).To(Equal("Start  End"))
+		})
+
+		It("should handle content with only reasoning tags when StripReasoningOnly is true", func() {
+			content := "<thinking>Only reasoning</thinking>"
+			config := Config{StripReasoningOnly: boolPtr(true)}
+			reasoning, cleaned := ExtractReasoningWithConfig(content, "<thinking>", config)
+			Expect(reasoning).To(BeEmpty())
+			Expect(cleaned).To(BeEmpty())
+		})
+
+		It("should handle empty reasoning blocks when StripReasoningOnly is true", func() {
+			content := "Text <thinking></thinking> More"
+			config := Config{StripReasoningOnly: boolPtr(true)}
+			reasoning, cleaned := ExtractReasoningWithConfig(content, "<thinking>", config)
+			Expect(reasoning).To(BeEmpty())
+			Expect(cleaned).To(Equal("Text  More"))
+		})
+
+		It("should handle content without reasoning tags when StripReasoningOnly is true", func() {
+			content := "Regular content without tags"
+			config := Config{
+				StripReasoningOnly:         boolPtr(true),
+				DisableReasoningTagPrefill: boolPtr(true),
+			}
+			reasoning, cleaned := ExtractReasoningWithConfig(content, "<thinking>", config)
+			Expect(reasoning).To(BeEmpty())
+			Expect(cleaned).To(Equal(content))
+		})
+
+		It("should handle content without reasoning tags when StripReasoningOnly is true", func() {
+			content := "Regular content without tags"
+			config := Config{
+				StripReasoningOnly: boolPtr(true),
+			}
+			reasoning, cleaned := ExtractReasoningWithConfig(content, "", config)
+			Expect(reasoning).To(BeEmpty())
+			Expect(cleaned).To(Equal(content))
+		})
+
+		It("should handle content without reasoning tags when StripReasoningOnly is true", func() {
+			content := "Regular content without tags"
+			config := Config{
+				StripReasoningOnly: boolPtr(true),
+			}
+			thinkingStartToken := DetectThinkingStartToken("template_without_thinking_tag", &config)
+			reasoning, cleaned := ExtractReasoningWithConfig(content, thinkingStartToken, config)
+			Expect(reasoning).To(BeEmpty())
+			Expect(cleaned).To(Equal(content))
+		})
+
+		It("should handle content without reasoning tags when StripReasoningOnly is true", func() {
+			content := "foo</think>Regular content without tags"
+			config := Config{}
+			thinkingStartToken := DetectThinkingStartToken("<think>", &config)
+			reasoning, cleaned := ExtractReasoningWithConfig(content, thinkingStartToken, config)
+			Expect(reasoning).To(Equal("foo"))
+			Expect(cleaned).To(Equal("Regular content without tags"))
+		})
+
+		It("should strip reasoning when StripReasoningOnly is true and tag prefill is enabled", func() {
+			content := "Reasoning content"
+			config := Config{
+				StripReasoningOnly:         boolPtr(true),
+				DisableReasoningTagPrefill: boolPtr(false),
+			}
+			reasoning, cleaned := ExtractReasoningWithConfig(content, "<thinking>", config)
+			Expect(reasoning).To(BeEmpty())
+			Expect(cleaned).To(BeEmpty())
+		})
+
+		It("should strip reasoning when StripReasoningOnly is true and tag prefill is disabled", func() {
+			content := "Text <thinking>Reasoning</thinking> More"
+			config := Config{
+				StripReasoningOnly:         boolPtr(true),
+				DisableReasoningTagPrefill: boolPtr(true),
+			}
+			reasoning, cleaned := ExtractReasoningWithConfig(content, "<thinking>", config)
+			Expect(reasoning).To(BeEmpty())
+			Expect(cleaned).To(Equal("Text  More"))
+		})
+
+		It("should not strip reasoning when StripReasoningOnly is false", func() {
+			content := "Text <thinking>Reasoning</thinking> More"
+			config := Config{StripReasoningOnly: boolPtr(false)}
+			reasoning, cleaned := ExtractReasoningWithConfig(content, "<thinking>", config)
+			Expect(reasoning).To(Equal("Reasoning"))
+			Expect(cleaned).To(Equal("Text  More"))
+		})
+
+		It("should not strip reasoning when StripReasoningOnly is nil", func() {
+			content := "Text <thinking>Reasoning</thinking> More"
+			config := Config{StripReasoningOnly: nil}
+			reasoning, cleaned := ExtractReasoningWithConfig(content, "<thinking>", config)
+			Expect(reasoning).To(Equal("Reasoning"))
+			Expect(cleaned).To(Equal("Text  More"))
+		})
+
+		It("should strip reasoning but not affect DisableReasoning behavior", func() {
+			content := "Text <thinking>Reasoning</thinking> More"
+			config := Config{
+				DisableReasoning:   boolPtr(true),
+				StripReasoningOnly: boolPtr(true),
+			}
+			reasoning, cleaned := ExtractReasoningWithConfig(content, "<thinking>", config)
+			// When DisableReasoning is true, reasoning extraction doesn't happen at all
+			Expect(reasoning).To(BeEmpty())
+			Expect(cleaned).To(Equal(content))
+		})
+
+		It("should handle complex content with reasoning and regular text when StripReasoningOnly is true", func() {
+			content := "Start <thinking>Reasoning</thinking> Middle <think>More reasoning</think> End"
+			config := Config{StripReasoningOnly: boolPtr(true)}
+			reasoning, cleaned := ExtractReasoningWithConfig(content, "<thinking>", config)
+			Expect(reasoning).To(BeEmpty())
+			Expect(cleaned).To(Equal("Start  Middle  End"))
+		})
+
+		It("should handle reasoning with special characters when StripReasoningOnly is true", func() {
+			content := "Before <thinking>Reasoning with ```code``` and {\"json\": true}</thinking> After"
+			config := Config{StripReasoningOnly: boolPtr(true)}
+			reasoning, cleaned := ExtractReasoningWithConfig(content, "<thinking>", config)
+			Expect(reasoning).To(BeEmpty())
+			Expect(cleaned).To(Equal("Before  After"))
+		})
+
+		It("should handle reasoning with unicode when StripReasoningOnly is true", func() {
+			content := "Text <thinking>Reasoning with 中文 and emoji 🧠</thinking> More"
+			config := Config{StripReasoningOnly: boolPtr(true)}
+			reasoning, cleaned := ExtractReasoningWithConfig(content, "<thinking>", config)
+			Expect(reasoning).To(BeEmpty())
+			Expect(cleaned).To(Equal("Text  More"))
+		})
+	})
+})
+
+var _ = Describe("Custom Thinking Start Tokens", func() {
+	Context("when custom thinking start tokens are provided", func() {
+		It("should detect custom thinking start token", func() {
+			prompt := "Some prompt <custom:think>"
+			config := &Config{ThinkingStartTokens: []string{"<custom:think>"}}
+			token := DetectThinkingStartToken(prompt, config)
+			Expect(token).To(Equal("<custom:think>"))
+		})
+
+		It("should prioritize custom tokens over default tokens", func() {
+			prompt := "Text <thinking> <custom:think>"
+			config := &Config{ThinkingStartTokens: []string{"<custom:think>"}}
+			token := DetectThinkingStartToken(prompt, config)
+			// Custom token should be found first even if default token appears later
+			Expect(token).To(Equal("<custom:think>"))
+		})
+
+		It("should detect multiple custom tokens (first match)", func() {
+			prompt := "Prompt <token1> <token2>"
+			config := &Config{ThinkingStartTokens: []string{"<token1>", "<token2>"}}
+			token := DetectThinkingStartToken(prompt, config)
+			Expect(token).To(Equal("<token1>"))
+		})
+
+		It("should fall back to default tokens if custom tokens not found", func() {
+			prompt := "Text <thinking>"
+			config := &Config{ThinkingStartTokens: []string{"<custom:think>"}}
+			token := DetectThinkingStartToken(prompt, config)
+			Expect(token).To(Equal("<thinking>"))
+		})
+
+		It("should handle empty custom tokens list", func() {
+			prompt := "Text <thinking>"
+			config := &Config{ThinkingStartTokens: []string{}}
+			token := DetectThinkingStartToken(prompt, config)
+			Expect(token).To(Equal("<thinking>"))
+		})
+
+		It("should handle nil config (use defaults only)", func() {
+			prompt := "Text <thinking>"
+			token := DetectThinkingStartToken(prompt, nil)
+			Expect(token).To(Equal("<thinking>"))
+		})
+	})
+})
+
+var _ = Describe("Custom Tag Pairs", func() {
+	Context("when custom tag pairs are provided", func() {
+		It("should extract reasoning from custom tag pair", func() {
+			content := "Text <custom:think>Custom reasoning</custom:think> More"
+			config := &Config{TagPairs: []TagPair{{Start: "<custom:think>", End: "</custom:think>"}}}
+			reasoning, cleaned := ExtractReasoning(content, config)
+			Expect(reasoning).To(Equal("Custom reasoning"))
+			Expect(cleaned).To(Equal("Text  More"))
+		})
+
+		It("should prioritize custom tag pairs over default pairs", func() {
+			content := "Text <custom:think>Custom</custom:think> <thinking>Default</thinking> More"
+			config := &Config{TagPairs: []TagPair{{Start: "<custom:think>", End: "</custom:think>"}}}
+			reasoning, cleaned := ExtractReasoning(content, config)
+			// Should extract both, but custom comes first
+			Expect(reasoning).To(ContainSubstring("Custom"))
+			Expect(reasoning).To(ContainSubstring("Default"))
+			Expect(cleaned).To(Equal("Text   More"))
+		})
+
+		It("should handle multiple custom tag pairs", func() {
+			content := "A <tag1>First</tag1> B <tag2>Second</tag2> C"
+			config := &Config{
+				TagPairs: []TagPair{
+					{Start: "<tag1>", End: "</tag1>"},
+					{Start: "<tag2>", End: "</tag2>"},
+				},
+			}
+			reasoning, cleaned := ExtractReasoning(content, config)
+			Expect(reasoning).To(ContainSubstring("First"))
+			Expect(reasoning).To(ContainSubstring("Second"))
+			Expect(cleaned).To(Equal("A  B  C"))
+		})
+
+		It("should handle custom tag pairs with complex end tags", func() {
+			content := "Text <start>Reasoning<end><begin>assistant</begin> More"
+			config := &Config{TagPairs: []TagPair{{Start: "<start>", End: "<end><begin>assistant</begin>"}}}
+			reasoning, cleaned := ExtractReasoning(content, config)
+			Expect(reasoning).To(Equal("Reasoning"))
+			Expect(cleaned).To(Equal("Text  More"))
+		})
+
+		It("should handle unclosed custom tag pairs", func() {
+			content := "Text <custom:think>Unclosed reasoning"
+			config := &Config{TagPairs: []TagPair{{Start: "<custom:think>", End: "</custom:think>"}}}
+			reasoning, cleaned := ExtractReasoning(content, config)
+			Expect(reasoning).To(Equal("Unclosed reasoning"))
+			Expect(cleaned).To(Equal("Text "))
+		})
+
+		It("should ignore invalid tag pairs (empty start or end)", func() {
+			content := "Text <valid>Content</valid> More"
+			config := &Config{
+				TagPairs: []TagPair{
+					{Start: "", End: "</invalid>"},      // Invalid: empty start
+					{Start: "<invalid>", End: ""},       // Invalid: empty end
+					{Start: "<valid>", End: "</valid>"}, // Valid
+				},
+			}
+			reasoning, cleaned := ExtractReasoning(content, config)
+			Expect(reasoning).To(Equal("Content"))
+			Expect(cleaned).To(Equal("Text  More"))
+		})
+
+		It("should fall back to default tag pairs if custom pairs not found", func() {
+			content := "Text <thinking>Default reasoning</thinking> More"
+			config := &Config{TagPairs: []TagPair{{Start: "<custom:think>", End: "</custom:think>"}}}
+			reasoning, cleaned := ExtractReasoning(content, config)
+			Expect(reasoning).To(Equal("Default reasoning"))
+			Expect(cleaned).To(Equal("Text  More"))
+		})
+
+		It("should handle empty custom tag pairs list", func() {
+			content := "Text <thinking>Reasoning</thinking> More"
+			config := &Config{TagPairs: []TagPair{}}
+			reasoning, cleaned := ExtractReasoning(content, config)
+			Expect(reasoning).To(Equal("Reasoning"))
+			Expect(cleaned).To(Equal("Text  More"))
+		})
+
+		It("should handle nil config (use defaults only)", func() {
+			content := "Text <thinking>Reasoning</thinking> More"
+			reasoning, cleaned := ExtractReasoning(content, nil)
+			Expect(reasoning).To(Equal("Reasoning"))
+			Expect(cleaned).To(Equal("Text  More"))
+		})
+
+		It("should handle custom tag pairs with special characters", func() {
+			content := "Text <[think]>Reasoning</[think]> More"
+			config := &Config{TagPairs: []TagPair{{Start: "<[think]>", End: "</[think]>"}}}
+			reasoning, cleaned := ExtractReasoning(content, config)
+			Expect(reasoning).To(Equal("Reasoning"))
+			Expect(cleaned).To(Equal("Text  More"))
+		})
+
+		It("should handle custom tag pairs with multiline content", func() {
+			content := "Before <custom>Line 1\nLine 2\nLine 3</custom> After"
+			config := &Config{TagPairs: []TagPair{{Start: "<custom>", End: "</custom>"}}}
+			reasoning, cleaned := ExtractReasoning(content, config)
+			Expect(reasoning).To(Equal("Line 1\nLine 2\nLine 3"))
+			Expect(cleaned).To(Equal("Before  After"))
+		})
+	})
+})
+
+var _ = Describe("Custom Tokens and Tag Pairs Integration", func() {
+	Context("when both custom tokens and tag pairs are provided", func() {
+		It("should use custom thinking start token and custom tag pair together", func() {
+			content := "Reasoning content"
+			config := Config{
+				ThinkingStartTokens: []string{"<custom:think>"},
+				TagPairs:            []TagPair{{Start: "<custom:think>", End: "</custom:think>"}},
+			}
+			// First detect the token
+			token := DetectThinkingStartToken("Prompt <custom:think>", &config)
+			Expect(token).To(Equal("<custom:think>"))
+			// Then extract with the custom tag pair
+			reasoning, cleaned := ExtractReasoningWithConfig(content, token, config)
+			Expect(reasoning).To(Equal("Reasoning content"))
+			Expect(cleaned).To(BeEmpty())
+		})
+
+		It("should work with ExtractReasoningWithConfig and custom config", func() {
+			content := "Text <custom:think>Reasoning</custom:think> More"
+			config := Config{
+				TagPairs: []TagPair{{Start: "<custom:think>", End: "</custom:think>"}},
+			}
+			reasoning, cleaned := ExtractReasoningWithConfig(content, "", config)
+			Expect(reasoning).To(Equal("Reasoning"))
+			Expect(cleaned).To(Equal("Text  More"))
+		})
+	})
+})
+
+// Helper function to create bool pointers for test configs
+func boolPtr(b bool) *bool {
+	return &b
+}