diff --git a/core/http/endpoints/openai/chat_stream_reasoning_test.go b/core/http/endpoints/openai/chat_stream_reasoning_test.go
new file mode 100644
index 000000000..8ee7219ac
--- /dev/null
+++ b/core/http/endpoints/openai/chat_stream_reasoning_test.go
@@ -0,0 +1,99 @@
+package openai
+
+import (
+	pb "github.com/mudler/LocalAI/pkg/grpc/proto"
+	reason "github.com/mudler/LocalAI/pkg/reasoning"
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+)
+
+// Regression test for the prefill-misclassification artifact surfaced in
+// the review of #9991: when LocalAI templates qwen3 with
+// use_tokenizer_template (the post-#9985 gallery shape),
+// DetectThinkingStartToken finds <think> in the model's jinja chat
+// template — without evaluating the surrounding {% if enable_thinking %}
+// guard — and the Go-side extractor's PrependThinkingTokenIfNeeded then
+// treats every non-thinking output token as reasoning. The autoparser does
+// not classify qwen3's tool calls into ChatDelta.ToolCalls (qwen3's tool
+// format isn't on llama.cpp's recognized-tool list), so all tokens land in
+// ChatDelta.Content while the Go-side extractor silently accumulates a
+// "reasoning" string equal to the raw tool-call JSON. End-of-stream this
+// is flushed as a trailing `delta.reasoning` chunk to the client.
+//
+// chooseDeferredReasoning is the gate: when the autoparser was active for
+// any chunk (preferAutoparser sticky), we trust its reasoning_content
+// classification (usually empty) instead of the polluted Go-side state.
+var _ = Describe("chooseDeferredReasoning", func() {
+	// Simulate the qwen3-after-#9985 misclassification: build a real
+	// extractor with a <think> thinking-start token, then feed it
+	// non-thinking content. The extractor will (correctly per its own
+	// contract) treat the content as reasoning because
+	// PrependThinkingTokenIfNeeded synthesizes a leading <think>.
+	pollutedExtractor := func(content string) *reason.ReasoningExtractor {
+		e := reason.NewReasoningExtractor("<think>", reason.Config{})
+		e.ProcessToken(content)
+		Expect(e.Reasoning()).To(Equal(content),
+			"sanity: when the thinking-start token is set and content has no real <think>...</think>, "+
+				"the extractor classifies all content as reasoning — this is exactly the prefill pollution "+
+				"we want chooseDeferredReasoning to guard against")
+		return e
+	}
+
+	Context("autoparser was active (preferAutoparser=true)", func() {
+		It("returns the autoparser's reasoning classification, ignoring the polluted Go-side state", func() {
+			toolCallJSON := `{"arguments": {"cmd": "echo hello"}, "name": "exec"}`
+			extractor := pollutedExtractor(toolCallJSON)
+			// What the C++ autoparser sent: content chunks but no
+			// reasoning_content (qwen3 tool calls aren't classified by
+			// the upstream PEG parser).
+			chatDeltas := []*pb.ChatDelta{
+				{Content: toolCallJSON, ReasoningContent: ""},
+			}
+
+			got := chooseDeferredReasoning(true, chatDeltas, extractor)
+
+			Expect(got).To(BeEmpty(),
+				"chooseDeferredReasoning must NOT return the polluted extractor state "+
+					"when the autoparser was active — the autoparser correctly classified zero reasoning")
+		})
+
+		It("returns the autoparser's reasoning when it actually did classify reasoning", func() {
+			// The other side of the contract: when the autoparser was
+			// in jinja-with-recognized-format mode and DID classify
+			// reasoning, pass that through verbatim.
+			actualReasoning := "Okay, the user asked X. I should call exec."
+			extractor := pollutedExtractor("ignored polluted state")
+			chatDeltas := []*pb.ChatDelta{
+				{Content: "", ReasoningContent: actualReasoning},
+			}
+
+			got := chooseDeferredReasoning(true, chatDeltas, extractor)
+
+			Expect(got).To(Equal(actualReasoning))
+		})
+	})
+
+	Context("autoparser was NOT active (preferAutoparser=false)", func() {
+		It("falls back to the Go-side extractor — the right source for vLLM and other autoparser-less backends", func() {
+			realReasoning := "Genuine reasoning from a backend without an autoparser"
+			extractor := reason.NewReasoningExtractor("<think>", reason.Config{})
+			extractor.ProcessToken("<think>" + realReasoning + "</think>final answer")
+
+			got := chooseDeferredReasoning(false, nil, extractor)
+
+			Expect(got).To(Equal(realReasoning))
+		})
+
+		It("falls back even when ChatDeltas are present but the autoparser never classified anything", func() {
+			// Defensive: chatDeltas could carry vestigial data; if
+			// preferAutoparser wasn't flipped, we still use the
+			// extractor.
+			extractor := reason.NewReasoningExtractor("", reason.Config{})
+			extractor.ProcessToken("<think>some thoughts</think>answer")
+
+			got := chooseDeferredReasoning(false, []*pb.ChatDelta{{Content: "answer"}}, extractor)
+
+			Expect(got).To(Equal("some thoughts"))
+		})
+	})
+})
diff --git a/core/http/endpoints/openai/chat_stream_workers.go b/core/http/endpoints/openai/chat_stream_workers.go
index c871ae59a..87f5ee21e 100644
--- a/core/http/endpoints/openai/chat_stream_workers.go
+++ b/core/http/endpoints/openai/chat_stream_workers.go
@@ -7,6 +7,7 @@ import (
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/schema"
 	"github.com/mudler/LocalAI/pkg/functions"
+	pb "github.com/mudler/LocalAI/pkg/grpc/proto"
 	"github.com/mudler/LocalAI/pkg/model"
 	reason "github.com/mudler/LocalAI/pkg/reasoning"
 	"github.com/mudler/xlog"
@@ -83,6 +84,34 @@ func emitJSONToolCallDeltas(
 	return lastEmittedCount
 }
 
+// chooseDeferredReasoning picks the source of truth for the end-of-stream
+// reasoning flush in processStreamWithTools. When the C++ autoparser was
+// active during the stream (preferAutoparser), it returns the autoparser's
+// own classified reasoning_content from ChatDeltas — usually empty when the
+// autoparser is in pure-content fallback mode. Otherwise it falls back to
+// the Go-side streaming extractor, which is the right source for backends
+// without an autoparser (vLLM, etc.).
+//
+// Why: the Go-side extractor's accumulated Reasoning() can be polluted by
+// PrependThinkingTokenIfNeeded — when the tokenizer template contains a
+// thinking start token (qwen3's jinja template has <think> inside an
+// {% if enable_thinking %} block, and DetectThinkingStartToken does not
+// evaluate jinja conditionals), prefill detection treats every chunk's
+// content as reasoning, even when the model emitted a raw tool-call JSON
+// in non-thinking mode. Without this guard, qwen3-4b with streaming + tools
+// (after #9985 flipped the gallery to use_tokenizer_template) emits a
+// trailing SSE chunk where `reasoning` carries the tool-call JSON.
+func chooseDeferredReasoning(
+	preferAutoparser bool,
+	chatDeltas []*pb.ChatDelta,
+	extractor *reason.ReasoningExtractor,
+) string {
+	if preferAutoparser {
+		return functions.ReasoningFromChatDeltas(chatDeltas)
+	}
+	return extractor.Reasoning()
+}
+
 // processStream is the streaming worker for chat completions with no
 // tool/function calling involved. It pushes SSE-shaped chunks onto
 // `responses` and returns the authoritative cumulative TokenUsage from
@@ -228,6 +257,17 @@ func processStreamWithTools(
 	hasChatDeltaToolCalls := false
 	hasChatDeltaContent := false
 
+	// preferAutoparser is sticky: once the C++ autoparser has ever delivered
+	// content or reasoning via ChatDeltas, we trust its classification for the
+	// rest of the stream — including for the end-of-stream reasoning flush in
+	// buildDeferredToolCallChunks. Otherwise the Go-side extractor's
+	// accumulated Reasoning() can be polluted by prefill detection
+	// misclassifying content as reasoning (this happens when <think> appears
+	// in the tokenizer template and the model emits non-reasoning content
+	// like a raw tool-call JSON — qwen3-4b after #9985 enabled
+	// use_tokenizer_template). Mirrors the analogous flag in processStream.
+	preferAutoparser := false
+
 	// X-LocalAI-Node attribution is handled by middleware.ExposeNodeHeader
 	// at the wrapper layer; no in-band signalling from this worker.
 
@@ -251,12 +291,17 @@ func processStreamWithTools(
 
 		if usage.HasChatDeltaContent() {
 			rawReasoning, cd := usage.ChatDeltaReasoningAndContent()
+			preferAutoparser = true
 			contentDelta = cd
 			reasoningDelta = extractor.ProcessChatDeltaReasoning(rawReasoning)
-		} else {
+		} else if !preferAutoparser {
 			reasoningDelta = goReasoning
 			contentDelta = goContent
 		}
+		// If preferAutoparser is already true but this chunk carried no
+		// autoparser data, leave both deltas empty — the next autoparser
+		// chunk will pick things up. Falling back to Go-side here would
+		// re-introduce the prefill-misclassification leak.
 
 		// Emit reasoning deltas in their own SSE chunks before any tool-call chunks
 		// (OpenAI spec: reasoning and tool_calls never share a delta)
@@ -399,7 +444,14 @@ func processStreamWithTools(
 	} else {
 		// Fallback: parse tool calls from raw text (no chat deltas from backend)
 		xlog.Debug("[ChatDeltas] no pre-parsed tool calls, falling back to Go-side text parsing")
-		reasoning = extractor.Reasoning()
+		// When the autoparser was active during streaming (preferAutoparser),
+		// trust its reasoning classification rather than the Go-side
+		// extractor's accumulated state — the latter may have misclassified
+		// content as reasoning due to prefill detection on a tokenizer
+		// template that contains <think>. This was visible on qwen3-4b after
+		// #9985 enabled use_tokenizer_template: a streaming tool-call JSON
+		// would leak as a trailing reasoning chunk via the deferred flush.
+		reasoning = chooseDeferredReasoning(preferAutoparser, chatDeltas, extractor)
 		cleanedResult := extractor.CleanedContent()
 		*textContentToReturn = functions.ParseTextContent(cleanedResult, cfg.FunctionsConfig)
 		cleanedResult = functions.CleanupLLMResult(cleanedResult, cfg.FunctionsConfig)