From 685e4632d7ef96c7419e7d818380e781da8d97a5 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 4 Jun 2026 16:25:53 +0000 Subject: [PATCH] feat(realtime): pipeline disable_thinking maps to enable_thinking off applyPipelineThinking forces the LLM's ReasoningConfig.DisableReasoning when pipeline.disable_thinking is set, which gRPCPredictOpts turns into the enable_thinking=false backend metadata. Applied at newModel construction on the per-session LLM config copy, so it doesn't leak to other model users and needs no realtime-specific request plumbing. Assisted-by: Claude:claude-opus-4-8 go vet Signed-off-by: Ettore Di Giacinto --- core/http/endpoints/openai/realtime_model.go | 4 ++- .../endpoints/openai/realtime_thinking.go | 17 ++++++++++++ .../openai/realtime_thinking_test.go | 26 +++++++++++++++++++ 3 files changed, 46 insertions(+), 1 deletion(-) create mode 100644 core/http/endpoints/openai/realtime_thinking.go create mode 100644 core/http/endpoints/openai/realtime_thinking_test.go diff --git a/core/http/endpoints/openai/realtime_model.go b/core/http/endpoints/openai/realtime_model.go index b18439340..8281197a3 100644 --- a/core/http/endpoints/openai/realtime_model.go +++ b/core/http/endpoints/openai/realtime_model.go @@ -528,8 +528,10 @@ func newModel(pipeline *config.Pipeline, cl *config.ModelConfigLoader, ml *model return nil, fmt.Errorf("failed to validate config: %w", err) } - // Let the pipeline set the LLM's reasoning effort (cfgLLM is a per-session copy). + // Let the pipeline set the LLM's reasoning effort and force thinking off + // (cfgLLM is a per-session copy). disable_thinking applies after the effort. applyPipelineReasoning(cfgLLM, *pipeline) + applyPipelineThinking(cfgLLM, *pipeline) cfgTTS, err := cl.LoadModelConfigFileByName(pipeline.TTS, ml.ModelPath) if err != nil { diff --git a/core/http/endpoints/openai/realtime_thinking.go b/core/http/endpoints/openai/realtime_thinking.go new file mode 100644 index 000000000..41addf963 --- /dev/null +++ b/core/http/endpoints/openai/realtime_thinking.go @@ -0,0 +1,17 @@ +package openai + +import "github.com/mudler/LocalAI/core/config" + +// applyPipelineThinking forces the LLM's reasoning/thinking off when the realtime +// pipeline sets disable_thinking, mapping to the enable_thinking=false backend +// metadata via ReasoningConfig.DisableReasoning. The LLM config passed in is the +// per-session copy returned by the config loader, so this does not affect other +// users of the same model. When the pipeline does not set disable_thinking the +// LLM config is left untouched. +func applyPipelineThinking(llm *config.ModelConfig, pipeline config.Pipeline) { + if llm == nil || !pipeline.ThinkingDisabled() { + return + } + disable := true + llm.ReasoningConfig.DisableReasoning = &disable +} diff --git a/core/http/endpoints/openai/realtime_thinking_test.go b/core/http/endpoints/openai/realtime_thinking_test.go new file mode 100644 index 000000000..6a38fa86d --- /dev/null +++ b/core/http/endpoints/openai/realtime_thinking_test.go @@ -0,0 +1,26 @@ +package openai + +import ( + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "github.com/mudler/LocalAI/core/config" +) + +// applyPipelineThinking lets a realtime pipeline force the LLM's thinking off +// (enable_thinking=false metadata) without editing the LLM model config. +var _ = Describe("applyPipelineThinking", func() { + It("disables reasoning on the LLM config when the pipeline disables thinking", func() { + disable := true + llm := &config.ModelConfig{} + applyPipelineThinking(llm, config.Pipeline{DisableThinking: &disable}) + Expect(llm.ReasoningConfig.DisableReasoning).ToNot(BeNil()) + Expect(*llm.ReasoningConfig.DisableReasoning).To(BeTrue()) + }) + + It("leaves the LLM config untouched when the pipeline does not set disable_thinking", func() { + llm := &config.ModelConfig{} + applyPipelineThinking(llm, config.Pipeline{}) + Expect(llm.ReasoningConfig.DisableReasoning).To(BeNil()) + }) +})