diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go
index 48e86d42e..5b9b5ed13 100644
--- a/core/http/endpoints/openai/chat.go
+++ b/core/http/endpoints/openai/chat.go
@@ -103,7 +103,12 @@ func applyAutoparserOverride(
 	// blocks like "<think></think>" that some models emit when reasoning
 	// is disabled.
 	if deltaReasoning == "" && deltaContent != "" {
-		deltaReasoning, deltaContent = reason.ExtractReasoningWithConfig(deltaContent, thinkingStartToken, reasoningConfig)
+		// Complete-response extraction: only honor a prefilled <think> start
+		// token when deltaContent actually closes the reasoning block. Without
+		// it the model answered directly and the whole answer must stay in
+		// content rather than be swallowed as unclosed reasoning. See
+		// reason.ExtractReasoningComplete.
+		deltaReasoning, deltaContent = reason.ExtractReasoningComplete(deltaContent, thinkingStartToken, reasoningConfig)
 	}
 	xlog.Debug("[ChatDeltas] non-SSE no-tools: overriding result with C++ autoparser deltas",
 		"content_len", len(deltaContent), "reasoning_len", len(deltaReasoning))
diff --git a/core/http/endpoints/openai/chat_test.go b/core/http/endpoints/openai/chat_test.go
index ccdbe6850..f5aa35690 100644
--- a/core/http/endpoints/openai/chat_test.go
+++ b/core/http/endpoints/openai/chat_test.go
@@ -186,6 +186,86 @@ var _ = Describe("applyAutoparserOverride", func() {
 			Expect(result).To(Equal(existing))
 		})
 	})
+
+	// Regression tests for the prefilled-thinking-token path (thinkingStartToken
+	// != ""). This is the configuration the gallery qwen3 family runs in: the
+	// chat template injects <think> into the prompt, so DetectThinkingStartToken
+	// returns "<think>" and the model's output begins *inside* a reasoning block
+	// — it emits a closing </think> but no opening tag.
+	//
+	// The defensive Go-side fallback prepends the start token so the standard
+	// extractor can pair it with the model's </think>. But on a *complete*
+	// response that contains NO closing tag (the model answered directly with no
+	// reasoning at all), prepending <think> manufactures an unclosed block that
+	// swallows the entire answer into reasoning, leaving content empty. That is
+	// the bug: short/direct answers (session names, JSON summaries) come back
+	// with an empty content field.
+	Context("autoparser delivered content with empty reasoning and a prefilled thinking token", func() {
+		const startToken = "<think>"
+
+		It("keeps a tag-less direct answer as content instead of swallowing it as reasoning", func() {
+			// Model answered directly: no <think>, no </think> anywhere.
+			chatDeltas := []*pb.ChatDelta{
+				{Content: "hello", ReasoningContent: ""},
+			}
+
+			result := applyAutoparserOverride(chatDeltas, startToken, reason.Config{}, nil)
+
+			Expect(result).To(HaveLen(1))
+			Expect(result[0].Message.Content).ToNot(BeNil())
+			Expect(*(result[0].Message.Content.(*string))).To(Equal("hello"),
+				"a complete answer with no closing reasoning tag must stay in content")
+			Expect(result[0].Message.Reasoning).To(BeNil(),
+				"no reasoning block was emitted, so Reasoning must not be set")
+		})
+
+		It("keeps a tag-less JSON answer as content (the summary case)", func() {
+			raw := `{"short":"Tests pass","long":"go test ./... succeeded."}`
+			chatDeltas := []*pb.ChatDelta{
+				{Content: raw, ReasoningContent: ""},
+			}
+
+			result := applyAutoparserOverride(chatDeltas, startToken, reason.Config{}, nil)
+
+			Expect(result).To(HaveLen(1))
+			Expect(*(result[0].Message.Content.(*string))).To(Equal(raw))
+			Expect(result[0].Message.Reasoning).To(BeNil())
+		})
+
+		It("still splits reasoning when the model emits the closing tag (prefill paired with </think>)", func() {
+			// The legitimate prefill case: <think> was in the prompt, so the
+			// output carries only the closing tag. The closing tag is the proof
+			// that a reasoning block exists, so extraction must run.
+			raw := "The user wants a greeting.\n</think>\n\nHello there!"
+			chatDeltas := []*pb.ChatDelta{
+				{Content: raw, ReasoningContent: ""},
+			}
+
+			result := applyAutoparserOverride(chatDeltas, startToken, reason.Config{}, nil)
+
+			Expect(result).To(HaveLen(1))
+			content := *(result[0].Message.Content.(*string))
+			Expect(content).To(ContainSubstring("Hello there!"))
+			Expect(content).ToNot(ContainSubstring("</think>"))
+			Expect(content).ToNot(ContainSubstring("The user wants a greeting"))
+			Expect(result[0].Message.Reasoning).ToNot(BeNil())
+			Expect(*result[0].Message.Reasoning).To(ContainSubstring("The user wants a greeting"))
+		})
+
+		It("still splits a fully-tagged <think>…</think> block with a prefill token set", func() {
+			raw := "<think>Reasoning here.</think>Final answer."
+			chatDeltas := []*pb.ChatDelta{
+				{Content: raw, ReasoningContent: ""},
+			}
+
+			result := applyAutoparserOverride(chatDeltas, startToken, reason.Config{}, nil)
+
+			Expect(result).To(HaveLen(1))
+			Expect(*(result[0].Message.Content.(*string))).To(Equal("Final answer."))
+			Expect(result[0].Message.Reasoning).ToNot(BeNil())
+			Expect(*result[0].Message.Reasoning).To(ContainSubstring("Reasoning here"))
+		})
+	})
 })
 
 var _ = Describe("mergeToolCallDeltas", func() {
diff --git a/core/http/endpoints/openai/realtime.go b/core/http/endpoints/openai/realtime.go
index 0d638a909..9bd40679c 100644
--- a/core/http/endpoints/openai/realtime.go
+++ b/core/http/endpoints/openai/realtime.go
@@ -1579,7 +1579,7 @@ func triggerResponseAtTurn(ctx context.Context, session *Session, conv *Conversa
 		// ExtractReasoningWithConfig is a no-op when no tag pair matches,
 		// so it's safe to apply unconditionally in the no-reasoning branch.
 		if deltaReasoning == "" && deltaContent != "" {
-			deltaReasoning, deltaContent = reasoning.ExtractReasoningWithConfig(deltaContent, thinkingStartToken, config.ReasoningConfig)
+			deltaReasoning, deltaContent = reasoning.ExtractReasoningComplete(deltaContent, thinkingStartToken, config.ReasoningConfig)
 		}
 		reasoningText = deltaReasoning
 		responseWithoutReasoning = deltaContent
@@ -1587,7 +1587,7 @@ func triggerResponseAtTurn(ctx context.Context, session *Session, conv *Conversa
 		cleanedResponse = deltaContent
 		toolCalls = deltaToolCalls
 	} else {
-		reasoningText, responseWithoutReasoning = reasoning.ExtractReasoningWithConfig(rawResponse, thinkingStartToken, config.ReasoningConfig)
+		reasoningText, responseWithoutReasoning = reasoning.ExtractReasoningComplete(rawResponse, thinkingStartToken, config.ReasoningConfig)
 		textContent = functions.ParseTextContent(responseWithoutReasoning, config.FunctionsConfig)
 		cleanedResponse = functions.CleanupLLMResult(responseWithoutReasoning, config.FunctionsConfig)
 		toolCalls = functions.ParseFunctionCall(cleanedResponse, config.FunctionsConfig)
diff --git a/core/http/endpoints/openresponses/responses.go b/core/http/endpoints/openresponses/responses.go
index 2b986cc61..916380d01 100644
--- a/core/http/endpoints/openresponses/responses.go
+++ b/core/http/endpoints/openresponses/responses.go
@@ -1356,7 +1356,7 @@ func handleOpenResponsesNonStream(c echo.Context, responseID string, createdAt i
 	thinkingStartToken := reason.DetectThinkingStartToken(template, &cfg.ReasoningConfig)
 
 	// Extract reasoning from result before cleaning
-	reasoningContent, cleanedResult := reason.ExtractReasoningWithConfig(result, thinkingStartToken, cfg.ReasoningConfig)
+	reasoningContent, cleanedResult := reason.ExtractReasoningComplete(result, thinkingStartToken, cfg.ReasoningConfig)
 
 	// Parse tool calls if using functions
 	var outputItems []schema.ORItemField
@@ -1996,7 +1996,7 @@ func handleOpenResponsesStream(c echo.Context, responseID string, createdAt int6
 				finalCleanedResult = extractor.CleanedContent()
 			}
 			if finalReasoning == "" && finalCleanedResult == "" {
-				finalReasoning, finalCleanedResult = reason.ExtractReasoningWithConfig(result, thinkingStartToken, cfg.ReasoningConfig)
+				finalReasoning, finalCleanedResult = reason.ExtractReasoningComplete(result, thinkingStartToken, cfg.ReasoningConfig)
 			}
 
 			// Close reasoning item if it exists and wasn't closed yet
@@ -2493,7 +2493,7 @@ func handleOpenResponsesStream(c echo.Context, responseID string, createdAt int6
 		finalCleanedResult = extractor.CleanedContent()
 	}
 	if finalReasoning == "" && finalCleanedResult == "" {
-		finalReasoning, finalCleanedResult = reason.ExtractReasoningWithConfig(result, thinkingStartToken, cfg.ReasoningConfig)
+		finalReasoning, finalCleanedResult = reason.ExtractReasoningComplete(result, thinkingStartToken, cfg.ReasoningConfig)
 	}
 
 	// Close reasoning item if it exists and wasn't closed yet
diff --git a/pkg/reasoning/reasoning.go b/pkg/reasoning/reasoning.go
index e9920af5d..108276c3d 100644
--- a/pkg/reasoning/reasoning.go
+++ b/pkg/reasoning/reasoning.go
@@ -89,6 +89,35 @@ func ExtractReasoningWithConfig(content, thinkingStartToken string, config Confi
 	return reasoning, cleanedContent
 }
 
+// ExtractReasoningComplete extracts reasoning from a COMPLETE (non-streaming)
+// model response. It behaves like ExtractReasoningWithConfig except that it only
+// honors a prefilled thinking start token when the response actually contains
+// the matching closing tag.
+//
+// Rationale: when a chat template injects the start token into the prompt (so
+// DetectThinkingStartToken returns e.g. "<think>"), the model's output begins
+// inside a reasoning block and carries only the closing tag. The defensive
+// fallback prepends the start token so the extractor can pair it with that
+// close tag. But on a COMPLETE response with no closing tag, the model answered
+// directly with no reasoning at all — prepending the start token would
+// manufacture an unclosed block that swallows the entire answer into reasoning,
+// leaving content empty (breaking short/direct answers such as session names or
+// JSON summaries). Genuine reasoning tags already present in the content still
+// extract, because dropping the synthetic prefill does not affect them.
+//
+// Streaming callers must keep using ExtractReasoningWithConfig: mid-stream an
+// as-yet-unclosed block is legitimate and its tokens should surface as
+// reasoning deltas as they arrive.
+func ExtractReasoningComplete(content, thinkingStartToken string, config Config) (reasoning string, cleanedContent string) {
+	startToken := thinkingStartToken
+	if startToken != "" {
+		if end := ClosingTokenForStart(startToken, &config); end == "" || !strings.Contains(content, end) {
+			startToken = ""
+		}
+	}
+	return ExtractReasoningWithConfig(content, startToken, config)
+}
+
 // PrependThinkingTokenIfNeeded prepends the thinking start token to content if it was
 // detected in the prompt. This allows the standard extraction logic to work correctly
 // for models where the thinking token is already in the prompt.
@@ -131,6 +160,48 @@ func PrependThinkingTokenIfNeeded(content string, startToken string) string {
 	return startToken + content
 }
 
+// defaultReasoningTagPairs are the built-in start/end reasoning tag pairs,
+// matching llama.cpp's chat-parser.cpp. Kept at package scope so that
+// ExtractReasoning and ClosingTokenForStart share a single source of truth.
+var defaultReasoningTagPairs = []TagPair{
+	{Start: "<|START_THINKING|>", End: "<|END_THINKING|>"},            // Command-R models
+	{Start: "<|inner_prefix|>", End: "<|inner_suffix|>"},              // Apertus models
+	{Start: "<seed:think>", End: "</seed:think>"},                     // Seed models
+	{Start: "<think>", End: "</think>"},                               // DeepSeek, Granite, ExaOne models
+	{Start: "<|think|>", End: "<|end|><|begin|>assistant<|content|>"}, // Solar Open models (complex end)
+	{Start: "<|channel>thought", End: "<channel|>"},                   // Gemma 4 models
+	{Start: "<thinking>", End: "</thinking>"},                         // General thinking tag
+	{Start: "[THINK]", End: "[/THINK]"},                               // Magistral models
+}
+
+// ClosingTokenForStart returns the closing reasoning tag that pairs with the
+// given start token, searching custom config TagPairs first then the built-in
+// defaults. Returns "" when startToken is empty or unrecognized.
+//
+// Used by the non-streaming autoparser fallback to decide whether a complete
+// response that began with a prefilled thinking token actually closed its
+// reasoning block: only then is synthesizing the start token (so the standard
+// extractor can pair it with the model's close tag) safe. A complete response
+// with no closing tag is a direct answer, not unclosed reasoning.
+func ClosingTokenForStart(startToken string, config *Config) string {
+	if startToken == "" {
+		return ""
+	}
+	if config != nil {
+		for _, pair := range config.TagPairs {
+			if pair.Start == startToken {
+				return pair.End
+			}
+		}
+	}
+	for _, pair := range defaultReasoningTagPairs {
+		if pair.Start == startToken {
+			return pair.End
+		}
+	}
+	return ""
+}
+
 // ExtractReasoning extracts reasoning content from thinking tags and returns
 // both the extracted reasoning and the cleaned content (with tags removed).
 // It handles <thinking>...</thinking> and <think>...</think> tags.
@@ -145,22 +216,7 @@ func ExtractReasoning(content string, config *Config) (reasoning string, cleaned
 	var cleanedParts []string
 	remaining := content
 
-	// Define default tag pairs to look for (matching llama.cpp's chat-parser.cpp)
-	defaultTagPairs := []struct {
-		start string
-		end   string
-	}{
-		{"<|START_THINKING|>", "<|END_THINKING|>"},            // Command-R models
-		{"<|inner_prefix|>", "<|inner_suffix|>"},              // Apertus models
-		{"<seed:think>", "</seed:think>"},                     // Seed models
-		{"<think>", "</think>"},                               // DeepSeek, Granite, ExaOne models
-		{"<|think|>", "<|end|><|begin|>assistant<|content|>"}, // Solar Open models (complex end)
-		{"<|channel>thought", "<channel|>"},                    // Gemma 4 models
-		{"<thinking>", "</thinking>"},                         // General thinking tag
-		{"[THINK]", "[/THINK]"},                               // Magistral models
-	}
-
-	// Merge custom tag pairs with default tag pairs (custom pairs first for priority)
+	// Merge custom tag pairs (highest priority) with the built-in defaults.
 	var tagPairs []struct {
 		start string
 		end   string
@@ -175,9 +231,11 @@ func ExtractReasoning(content string, config *Config) (reasoning string, cleaned
 			}
 		}
 	}
-	// Add default tag pairs
-	for _, pair := range defaultTagPairs {
-		tagPairs = append(tagPairs, pair)
+	for _, pair := range defaultReasoningTagPairs {
+		tagPairs = append(tagPairs, struct {
+			start string
+			end   string
+		}{pair.Start, pair.End})
 	}
 
 	// Track the last position we've processed
diff --git a/pkg/reasoning/reasoning_test.go b/pkg/reasoning/reasoning_test.go
index 9f3675ff6..5e6151b01 100644
--- a/pkg/reasoning/reasoning_test.go
+++ b/pkg/reasoning/reasoning_test.go
@@ -1175,6 +1175,55 @@ var _ = Describe("Custom Tokens and Tag Pairs Integration", func() {
 	})
 })
 
+var _ = Describe("ClosingTokenForStart", func() {
+	It("returns the default closing tag for a known start token", func() {
+		Expect(ClosingTokenForStart("<think>", nil)).To(Equal("</think>"))
+		Expect(ClosingTokenForStart("<thinking>", nil)).To(Equal("</thinking>"))
+		Expect(ClosingTokenForStart("[THINK]", nil)).To(Equal("[/THINK]"))
+	})
+
+	It("returns empty for an empty or unknown start token", func() {
+		Expect(ClosingTokenForStart("", nil)).To(BeEmpty())
+		Expect(ClosingTokenForStart("<nope>", nil)).To(BeEmpty())
+	})
+
+	It("prefers custom config tag pairs over the defaults", func() {
+		cfg := &Config{TagPairs: []TagPair{{Start: "<think>", End: "<<END>>"}}}
+		Expect(ClosingTokenForStart("<think>", cfg)).To(Equal("<<END>>"))
+	})
+})
+
+var _ = Describe("ExtractReasoningComplete", func() {
+	const startToken = "<think>"
+
+	It("keeps a tag-less answer as content when a start token is prefilled but no close tag is present", func() {
+		// The bug guard: prompt-prefilled <think>, model answered directly with
+		// no reasoning. The synthetic prefill must not swallow it as reasoning.
+		reasoning, content := ExtractReasoningComplete("hello", startToken, Config{})
+		Expect(reasoning).To(BeEmpty())
+		Expect(content).To(Equal("hello"))
+	})
+
+	It("extracts reasoning when the model emits only the closing tag (legitimate prefill)", func() {
+		reasoning, content := ExtractReasoningComplete("the rationale\n</think>\n\nthe answer", startToken, Config{})
+		Expect(reasoning).To(ContainSubstring("the rationale"))
+		Expect(content).To(ContainSubstring("the answer"))
+		Expect(content).ToNot(ContainSubstring("</think>"))
+	})
+
+	It("extracts a fully-tagged block regardless of the prefill token", func() {
+		reasoning, content := ExtractReasoningComplete("<think>r</think>answer", startToken, Config{})
+		Expect(reasoning).To(Equal("r"))
+		Expect(content).To(Equal("answer"))
+	})
+
+	It("behaves like ExtractReasoningWithConfig when no start token is prefilled", func() {
+		reasoning, content := ExtractReasoningComplete("<think>r</think>answer", "", Config{})
+		Expect(reasoning).To(Equal("r"))
+		Expect(content).To(Equal("answer"))
+	})
+})
+
 // Helper function to create bool pointers for test configs
 func boolPtr(b bool) *bool {
 	return &b