feat(realtime): pipeline disable_thinking maps to enable_thinking off

applyPipelineThinking forces the LLM's ReasoningConfig.DisableReasoning when pipeline.disable_thinking is set, which gRPCPredictOpts turns into the enable_thinking=false backend metadata. Applied at newModel construction on the per-session LLM config copy, so it doesn't leak to other model users and needs no realtime-specific request plumbing. Assisted-by: Claude:claude-opus-4-8 go vet Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-06-06 07:46:15 -04:00 · 2026-06-04 16:25:53 +00:00
parent 98ed541b22
commit 685e4632d7
3 changed files with 46 additions and 1 deletions
--- a/core/http/endpoints/openai/realtime_model.go
+++ b/core/http/endpoints/openai/realtime_model.go
@@ -528,8 +528,10 @@ func newModel(pipeline *config.Pipeline, cl *config.ModelConfigLoader, ml *model
 		return nil, fmt.Errorf("failed to validate config: %w", err)
 	}

-	// Let the pipeline set the LLM's reasoning effort (cfgLLM is a per-session copy).
+	// Let the pipeline set the LLM's reasoning effort and force thinking off
+	// (cfgLLM is a per-session copy). disable_thinking applies after the effort.
 	applyPipelineReasoning(cfgLLM, *pipeline)
+	applyPipelineThinking(cfgLLM, *pipeline)

 	cfgTTS, err := cl.LoadModelConfigFileByName(pipeline.TTS, ml.ModelPath)
 	if err != nil {
--- a/core/http/endpoints/openai/realtime_thinking.go
+++ b/core/http/endpoints/openai/realtime_thinking.go
@@ -0,0 +1,17 @@
+package openai
+
+import "github.com/mudler/LocalAI/core/config"
+
+// applyPipelineThinking forces the LLM's reasoning/thinking off when the realtime
+// pipeline sets disable_thinking, mapping to the enable_thinking=false backend
+// metadata via ReasoningConfig.DisableReasoning. The LLM config passed in is the
+// per-session copy returned by the config loader, so this does not affect other
+// users of the same model. When the pipeline does not set disable_thinking the
+// LLM config is left untouched.
+func applyPipelineThinking(llm *config.ModelConfig, pipeline config.Pipeline) {
+	if llm == nil || !pipeline.ThinkingDisabled() {
+		return
+	}
+	disable := true
+	llm.ReasoningConfig.DisableReasoning = &disable
+}
--- a/core/http/endpoints/openai/realtime_thinking_test.go
+++ b/core/http/endpoints/openai/realtime_thinking_test.go
@@ -0,0 +1,26 @@
+package openai
+
+import (
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+
+	"github.com/mudler/LocalAI/core/config"
+)
+
+// applyPipelineThinking lets a realtime pipeline force the LLM's thinking off
+// (enable_thinking=false metadata) without editing the LLM model config.
+var _ = Describe("applyPipelineThinking", func() {
+	It("disables reasoning on the LLM config when the pipeline disables thinking", func() {
+		disable := true
+		llm := &config.ModelConfig{}
+		applyPipelineThinking(llm, config.Pipeline{DisableThinking: &disable})
+		Expect(llm.ReasoningConfig.DisableReasoning).ToNot(BeNil())
+		Expect(*llm.ReasoningConfig.DisableReasoning).To(BeTrue())
+	})
+
+	It("leaves the LLM config untouched when the pipeline does not set disable_thinking", func() {
+		llm := &config.ModelConfig{}
+		applyPipelineThinking(llm, config.Pipeline{})
+		Expect(llm.ReasoningConfig.DisableReasoning).To(BeNil())
+	})
+})