mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-06 07:46:15 -04:00
feat(realtime): pipeline disable_thinking maps to enable_thinking off
applyPipelineThinking forces the LLM's ReasoningConfig.DisableReasoning when pipeline.disable_thinking is set, which gRPCPredictOpts turns into the enable_thinking=false backend metadata. Applied at newModel construction on the per-session LLM config copy, so it doesn't leak to other model users and needs no realtime-specific request plumbing. Assisted-by: Claude:claude-opus-4-8 go vet Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
@@ -528,8 +528,10 @@ func newModel(pipeline *config.Pipeline, cl *config.ModelConfigLoader, ml *model
|
||||
return nil, fmt.Errorf("failed to validate config: %w", err)
|
||||
}
|
||||
|
||||
// Let the pipeline set the LLM's reasoning effort (cfgLLM is a per-session copy).
|
||||
// Let the pipeline set the LLM's reasoning effort and force thinking off
|
||||
// (cfgLLM is a per-session copy). disable_thinking applies after the effort.
|
||||
applyPipelineReasoning(cfgLLM, *pipeline)
|
||||
applyPipelineThinking(cfgLLM, *pipeline)
|
||||
|
||||
cfgTTS, err := cl.LoadModelConfigFileByName(pipeline.TTS, ml.ModelPath)
|
||||
if err != nil {
|
||||
|
||||
17
core/http/endpoints/openai/realtime_thinking.go
Normal file
17
core/http/endpoints/openai/realtime_thinking.go
Normal file
@@ -0,0 +1,17 @@
|
||||
package openai
|
||||
|
||||
import "github.com/mudler/LocalAI/core/config"
|
||||
|
||||
// applyPipelineThinking forces the LLM's reasoning/thinking off when the realtime
|
||||
// pipeline sets disable_thinking, mapping to the enable_thinking=false backend
|
||||
// metadata via ReasoningConfig.DisableReasoning. The LLM config passed in is the
|
||||
// per-session copy returned by the config loader, so this does not affect other
|
||||
// users of the same model. When the pipeline does not set disable_thinking the
|
||||
// LLM config is left untouched.
|
||||
func applyPipelineThinking(llm *config.ModelConfig, pipeline config.Pipeline) {
|
||||
if llm == nil || !pipeline.ThinkingDisabled() {
|
||||
return
|
||||
}
|
||||
disable := true
|
||||
llm.ReasoningConfig.DisableReasoning = &disable
|
||||
}
|
||||
26
core/http/endpoints/openai/realtime_thinking_test.go
Normal file
26
core/http/endpoints/openai/realtime_thinking_test.go
Normal file
@@ -0,0 +1,26 @@
|
||||
package openai
|
||||
|
||||
import (
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
)
|
||||
|
||||
// applyPipelineThinking lets a realtime pipeline force the LLM's thinking off
|
||||
// (enable_thinking=false metadata) without editing the LLM model config.
|
||||
var _ = Describe("applyPipelineThinking", func() {
|
||||
It("disables reasoning on the LLM config when the pipeline disables thinking", func() {
|
||||
disable := true
|
||||
llm := &config.ModelConfig{}
|
||||
applyPipelineThinking(llm, config.Pipeline{DisableThinking: &disable})
|
||||
Expect(llm.ReasoningConfig.DisableReasoning).ToNot(BeNil())
|
||||
Expect(*llm.ReasoningConfig.DisableReasoning).To(BeTrue())
|
||||
})
|
||||
|
||||
It("leaves the LLM config untouched when the pipeline does not set disable_thinking", func() {
|
||||
llm := &config.ModelConfig{}
|
||||
applyPipelineThinking(llm, config.Pipeline{})
|
||||
Expect(llm.ReasoningConfig.DisableReasoning).To(BeNil())
|
||||
})
|
||||
})
|
||||
Reference in New Issue
Block a user