mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-05 23:36:49 -04:00
feat: forward reasoning_effort to the backend so jinja models honor it (#10184)
* feat: forward reasoning_effort to the backend so jinja models honor it reasoning_effort was only mapped to the binary enable_thinking toggle and otherwise reached Go-side templates — it was never sent to the backend. So jinja-templated models whose chat template keys on reasoning_effort (gpt-oss Harmony, LFM2.5) could not be driven by it: LFM2.5 ignores enable_thinking and kept emitting <think>. Forward the effective reasoning_effort to the backend as a chat_template_kwarg (mirroring enable_thinking) in grpc-server.cpp, and put it in PredictOptions metadata (gRPCPredictOpts). Add a config-level default: ModelConfig.reasoning_effort and Pipeline.reasoning_effort, resolved by ModelConfig.ApplyReasoningEffort (request value overrides config default, none->disable / level->enable, an operator's reasoning.disable wins). request.go now uses that helper. Assisted-by: Claude:claude-opus-4-8 go test, golangci-lint Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * feat(realtime): set the pipeline LLM's reasoning_effort Apply Pipeline.ReasoningEffort to the pipeline's LLM config when the realtime model is built (per-session copy, overrides the LLM's own reasoning_effort), and surface the resolved effort on the template input so Go-templated models get it too. jinja models receive it via the backend metadata. This lets a realtime pipeline disable thinking on models that only honor reasoning_effort (e.g. LFM2.5), which enable_thinking can't. Assisted-by: Claude:claude-opus-4-8 go test, golangci-lint Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Co-authored-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
@@ -1944,6 +1944,17 @@ public:
|
||||
body_json["chat_template_kwargs"]["enable_thinking"] = (et_it->second == "true");
|
||||
}
|
||||
|
||||
// Pass reasoning_effort via chat_template_kwargs too: the lever
|
||||
// jinja templates like gpt-oss (Harmony) / LFM2.5 read, distinct
|
||||
// from enable_thinking which those templates ignore.
|
||||
auto re_it = metadata.find("reasoning_effort");
|
||||
if (re_it != metadata.end() && !re_it->second.empty()) {
|
||||
if (!body_json.contains("chat_template_kwargs")) {
|
||||
body_json["chat_template_kwargs"] = json::object();
|
||||
}
|
||||
body_json["chat_template_kwargs"]["reasoning_effort"] = re_it->second;
|
||||
}
|
||||
|
||||
// Debug: Print full body_json before template processing (includes messages, tools, tool_choice, etc.)
|
||||
SRV_DBG("[CONVERSATION DEBUG] PredictStream: Full body_json before oaicompat_chat_params_parse:\n%s\n", body_json.dump(2).c_str());
|
||||
|
||||
@@ -2737,6 +2748,17 @@ public:
|
||||
body_json["chat_template_kwargs"]["enable_thinking"] = (predict_et_it->second == "true");
|
||||
}
|
||||
|
||||
// Pass reasoning_effort via chat_template_kwargs too: the lever
|
||||
// jinja templates like gpt-oss (Harmony) / LFM2.5 read, distinct
|
||||
// from enable_thinking which those templates ignore.
|
||||
auto predict_re_it = predict_metadata.find("reasoning_effort");
|
||||
if (predict_re_it != predict_metadata.end() && !predict_re_it->second.empty()) {
|
||||
if (!body_json.contains("chat_template_kwargs")) {
|
||||
body_json["chat_template_kwargs"] = json::object();
|
||||
}
|
||||
body_json["chat_template_kwargs"]["reasoning_effort"] = predict_re_it->second;
|
||||
}
|
||||
|
||||
// Debug: Print full body_json before template processing (includes messages, tools, tool_choice, etc.)
|
||||
SRV_DBG("[CONVERSATION DEBUG] Predict: Full body_json before oaicompat_chat_params_parse:\n%s\n", body_json.dump(2).c_str());
|
||||
|
||||
|
||||
@@ -239,13 +239,13 @@ func grpcModelOpts(c config.ModelConfig, modelPath string) *pb.ModelOptions {
|
||||
|
||||
if c.Backend == "cloud-proxy" {
|
||||
opts.Proxy = &pb.ProxyOptions{
|
||||
UpstreamUrl: c.Proxy.UpstreamURL,
|
||||
Mode: c.Proxy.Mode,
|
||||
Provider: c.Proxy.Provider,
|
||||
ApiKeyEnv: c.Proxy.APIKeyEnv,
|
||||
ApiKeyFile: c.Proxy.APIKeyFile,
|
||||
UpstreamModel: c.Proxy.UpstreamModel,
|
||||
RequestTimeoutSeconds: int32(c.Proxy.RequestTimeoutSeconds),
|
||||
UpstreamUrl: c.Proxy.UpstreamURL,
|
||||
Mode: c.Proxy.Mode,
|
||||
Provider: c.Proxy.Provider,
|
||||
ApiKeyEnv: c.Proxy.APIKeyEnv,
|
||||
ApiKeyFile: c.Proxy.APIKeyFile,
|
||||
UpstreamModel: c.Proxy.UpstreamModel,
|
||||
RequestTimeoutSeconds: int32(c.Proxy.RequestTimeoutSeconds),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -323,6 +323,12 @@ func gRPCPredictOpts(c config.ModelConfig, modelPath string) *pb.PredictOptions
|
||||
metadata["enable_thinking"] = "true"
|
||||
}
|
||||
}
|
||||
// Forward the effective reasoning effort so the backend can pass it to the
|
||||
// jinja chat template (chat_template_kwargs.reasoning_effort) — the lever
|
||||
// models like gpt-oss / LFM2.5 actually read, distinct from enable_thinking.
|
||||
if c.ReasoningEffort != "" {
|
||||
metadata["reasoning_effort"] = c.ReasoningEffort
|
||||
}
|
||||
pbOpts.Metadata = metadata
|
||||
|
||||
// Logprobs and TopLogprobs are set by the caller if provided
|
||||
|
||||
@@ -75,3 +75,25 @@ var _ = Describe("gRPCPredictOpts enable_thinking metadata", func() {
|
||||
Expect(opts.Metadata).ToNot(HaveKey("enable_thinking"))
|
||||
})
|
||||
})
|
||||
|
||||
// Guards forwarding the effective reasoning_effort into PredictOptions.Metadata,
|
||||
// where the backend passes it to the jinja chat template (chat_template_kwargs)
|
||||
// so models like gpt-oss / LFM2.5 honor it.
|
||||
var _ = Describe("gRPCPredictOpts reasoning_effort metadata", func() {
|
||||
withEffort := func(effort string) config.ModelConfig {
|
||||
cfg := config.ModelConfig{}
|
||||
cfg.SetDefaults()
|
||||
cfg.ReasoningEffort = effort
|
||||
return cfg
|
||||
}
|
||||
|
||||
It("forwards reasoning_effort when set", func() {
|
||||
opts := gRPCPredictOpts(withEffort("none"), "/tmp/models")
|
||||
Expect(opts.Metadata).To(HaveKeyWithValue("reasoning_effort", "none"))
|
||||
})
|
||||
|
||||
It("omits reasoning_effort when empty", func() {
|
||||
opts := gRPCPredictOpts(withEffort(""), "/tmp/models")
|
||||
Expect(opts.Metadata).ToNot(HaveKey("reasoning_effort"))
|
||||
})
|
||||
})
|
||||
|
||||
@@ -128,6 +128,22 @@ func DefaultRegistry() map[string]FieldMetaOverride {
|
||||
Advanced: true,
|
||||
Order: 21,
|
||||
},
|
||||
"reasoning_effort": {
|
||||
Section: "llm",
|
||||
Label: "Reasoning Effort",
|
||||
Description: "Default reasoning effort, forwarded to the backend as the reasoning_effort chat_template_kwarg (jinja models like gpt-oss / LFM2.5 honor it). A per-request reasoning_effort overrides it. 'none' also turns thinking off.",
|
||||
Component: "select",
|
||||
Options: []FieldOption{
|
||||
{Value: "", Label: "Unset (model default)"},
|
||||
{Value: "none", Label: "none (disable thinking)"},
|
||||
{Value: "minimal", Label: "minimal"},
|
||||
{Value: "low", Label: "low"},
|
||||
{Value: "medium", Label: "medium"},
|
||||
{Value: "high", Label: "high"},
|
||||
},
|
||||
Advanced: true,
|
||||
Order: 22,
|
||||
},
|
||||
"cache_type_k": {
|
||||
Section: "llm",
|
||||
Label: "KV Cache Type (K)",
|
||||
@@ -277,6 +293,21 @@ func DefaultRegistry() map[string]FieldMetaOverride {
|
||||
AutocompleteProvider: ProviderModelsVAD,
|
||||
Order: 63,
|
||||
},
|
||||
"pipeline.reasoning_effort": {
|
||||
Section: "pipeline",
|
||||
Label: "Reasoning Effort",
|
||||
Description: "Reasoning effort for the pipeline's LLM, forwarded to the backend as the reasoning_effort chat_template_kwarg (jinja models like gpt-oss / LFM2.5 honor it). Overrides the LLM model's own reasoning_effort. 'none' also turns thinking off.",
|
||||
Component: "select",
|
||||
Options: []FieldOption{
|
||||
{Value: "", Label: "Default (model config)"},
|
||||
{Value: "none", Label: "none (disable thinking)"},
|
||||
{Value: "minimal", Label: "minimal"},
|
||||
{Value: "low", Label: "low"},
|
||||
{Value: "medium", Label: "medium"},
|
||||
{Value: "high", Label: "high"},
|
||||
},
|
||||
Order: 64,
|
||||
},
|
||||
|
||||
// --- Functions ---
|
||||
"function.grammar.parallel_calls": {
|
||||
|
||||
@@ -63,6 +63,13 @@ type ModelConfig struct {
|
||||
FunctionsConfig functions.FunctionsConfig `yaml:"function,omitempty" json:"function,omitempty"`
|
||||
ReasoningConfig reasoning.Config `yaml:"reasoning,omitempty" json:"reasoning,omitempty"`
|
||||
|
||||
// ReasoningEffort is the default reasoning effort (none|minimal|low|medium|high)
|
||||
// for this model. A per-request reasoning_effort overrides it. It is forwarded
|
||||
// to the backend as the reasoning_effort chat_template_kwarg (see
|
||||
// gRPCPredictOpts), so jinja-templated models that key on it — e.g. gpt-oss
|
||||
// (Harmony) or LFM2.5 — honor it; "none" also toggles enable_thinking off.
|
||||
ReasoningEffort string `yaml:"reasoning_effort,omitempty" json:"reasoning_effort,omitempty"`
|
||||
|
||||
FeatureFlag FeatureFlag `yaml:"feature_flags,omitempty" json:"feature_flags,omitempty"` // Feature Flag registry. We move fast, and features may break on a per model/backend basis. Registry for (usually temporary) flags that indicate aborting something early.
|
||||
// LLM configs (GPT4ALL, Llama.cpp, ...)
|
||||
LLMConfig `yaml:",inline" json:",inline"`
|
||||
@@ -487,6 +494,40 @@ type Pipeline struct {
|
||||
LLM string `yaml:"llm,omitempty" json:"llm,omitempty"`
|
||||
Transcription string `yaml:"transcription,omitempty" json:"transcription,omitempty"`
|
||||
VAD string `yaml:"vad,omitempty" json:"vad,omitempty"`
|
||||
|
||||
// ReasoningEffort sets the reasoning effort (none|minimal|low|medium|high) for
|
||||
// the pipeline's LLM without editing the LLM model config. Overrides the LLM's
|
||||
// own reasoning_effort. Unset leaves the LLM model config in charge.
|
||||
ReasoningEffort string `yaml:"reasoning_effort,omitempty" json:"reasoning_effort,omitempty"`
|
||||
}
|
||||
|
||||
// ApplyReasoningEffort resolves the effective reasoning effort — a per-request
|
||||
// value (requestEffort) overrides the config's own ReasoningEffort default —
|
||||
// stores it on the config so gRPCPredictOpts forwards it to the backend as the
|
||||
// reasoning_effort chat_template_kwarg, and maps it onto the enable_thinking
|
||||
// toggle the backend also reads:
|
||||
// - "none" always disables thinking.
|
||||
// - any explicit level enables it, UNLESS the config already disabled reasoning
|
||||
// (an operator's explicit disable wins over a request asking to think).
|
||||
//
|
||||
// An empty requestEffort keeps the config's own default. With no effort set
|
||||
// anywhere it is a no-op, leaving the model's reasoning settings untouched.
|
||||
func (c *ModelConfig) ApplyReasoningEffort(requestEffort string) {
|
||||
effort := requestEffort
|
||||
if effort == "" {
|
||||
effort = c.ReasoningEffort
|
||||
}
|
||||
c.ReasoningEffort = effort
|
||||
switch strings.ToLower(effort) {
|
||||
case "none":
|
||||
disable := true
|
||||
c.ReasoningConfig.DisableReasoning = &disable
|
||||
case "minimal", "low", "medium", "high":
|
||||
if c.ReasoningConfig.DisableReasoning == nil || !*c.ReasoningConfig.DisableReasoning {
|
||||
enable := false
|
||||
c.ReasoningConfig.DisableReasoning = &enable
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// @Description File configuration for model downloads
|
||||
|
||||
52
core/config/reasoning_effort_test.go
Normal file
52
core/config/reasoning_effort_test.go
Normal file
@@ -0,0 +1,52 @@
|
||||
package config_test
|
||||
|
||||
import (
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
)
|
||||
|
||||
// ApplyReasoningEffort resolves the effective reasoning effort (request value
|
||||
// overrides the model config default), stores it on the config so it reaches the
|
||||
// backend, and maps it onto the enable_thinking toggle.
|
||||
var _ = Describe("ModelConfig.ApplyReasoningEffort", func() {
|
||||
It("uses the request value over the config default", func() {
|
||||
c := &config.ModelConfig{ReasoningEffort: "high"}
|
||||
c.ApplyReasoningEffort("none")
|
||||
Expect(c.ReasoningEffort).To(Equal("none"))
|
||||
Expect(c.ReasoningConfig.DisableReasoning).ToNot(BeNil())
|
||||
Expect(*c.ReasoningConfig.DisableReasoning).To(BeTrue())
|
||||
})
|
||||
|
||||
It("falls back to the config default when the request omits it", func() {
|
||||
c := &config.ModelConfig{ReasoningEffort: "none"}
|
||||
c.ApplyReasoningEffort("")
|
||||
Expect(c.ReasoningEffort).To(Equal("none"))
|
||||
Expect(c.ReasoningConfig.DisableReasoning).ToNot(BeNil())
|
||||
Expect(*c.ReasoningConfig.DisableReasoning).To(BeTrue())
|
||||
})
|
||||
|
||||
It("enables thinking for an explicit effort level", func() {
|
||||
c := &config.ModelConfig{}
|
||||
c.ApplyReasoningEffort("medium")
|
||||
Expect(c.ReasoningEffort).To(Equal("medium"))
|
||||
Expect(c.ReasoningConfig.DisableReasoning).ToNot(BeNil())
|
||||
Expect(*c.ReasoningConfig.DisableReasoning).To(BeFalse())
|
||||
})
|
||||
|
||||
It("does not let a level override an operator's config-level disable", func() {
|
||||
disabled := true
|
||||
c := &config.ModelConfig{}
|
||||
c.ReasoningConfig.DisableReasoning = &disabled
|
||||
c.ApplyReasoningEffort("high")
|
||||
Expect(*c.ReasoningConfig.DisableReasoning).To(BeTrue())
|
||||
})
|
||||
|
||||
It("is a no-op on the toggle when no effort is set anywhere", func() {
|
||||
c := &config.ModelConfig{}
|
||||
c.ApplyReasoningEffort("")
|
||||
Expect(c.ReasoningEffort).To(Equal(""))
|
||||
Expect(c.ReasoningConfig.DisableReasoning).To(BeNil())
|
||||
})
|
||||
})
|
||||
@@ -44,10 +44,10 @@ type wrappedModel struct {
|
||||
// deps in. nil-safe: with classifierRegistry == nil the per-turn
|
||||
// routing block in Predict is skipped, preserving today's "one LLM
|
||||
// for the whole session" behaviour.
|
||||
routerDeps *middleware.ClassifierDeps
|
||||
routerStore router.DecisionStore
|
||||
routerSessionID string
|
||||
routerUserID string
|
||||
routerDeps *middleware.ClassifierDeps
|
||||
routerStore router.DecisionStore
|
||||
routerSessionID string
|
||||
routerUserID string
|
||||
}
|
||||
|
||||
// anyToAnyModel represent a model which supports Any-to-Any operations
|
||||
@@ -119,6 +119,11 @@ func (m *wrappedModel) Predict(ctx context.Context, messages schema.Messages, im
|
||||
}
|
||||
}
|
||||
|
||||
// Surface the resolved reasoning effort to the Go-side template path too
|
||||
// (jinja models get it via backend metadata in gRPCPredictOpts; Go-templated
|
||||
// models like gpt-oss read it from the template's .ReasoningEffort).
|
||||
input.ReasoningEffort = turnCfg.ReasoningEffort
|
||||
|
||||
var predInput string
|
||||
var funcs []functions.Function
|
||||
if !turnCfg.TemplateConfig.UseTokenizerTemplate {
|
||||
@@ -449,6 +454,9 @@ func newModel(pipeline *config.Pipeline, cl *config.ModelConfigLoader, ml *model
|
||||
return nil, fmt.Errorf("failed to validate config: %w", err)
|
||||
}
|
||||
|
||||
// Let the pipeline set the LLM's reasoning effort (cfgLLM is a per-session copy).
|
||||
applyPipelineReasoning(cfgLLM, *pipeline)
|
||||
|
||||
cfgTTS, err := cl.LoadModelConfigFileByName(pipeline.TTS, ml.ModelPath)
|
||||
if err != nil {
|
||||
|
||||
|
||||
16
core/http/endpoints/openai/realtime_reasoning.go
Normal file
16
core/http/endpoints/openai/realtime_reasoning.go
Normal file
@@ -0,0 +1,16 @@
|
||||
package openai
|
||||
|
||||
import "github.com/mudler/LocalAI/core/config"
|
||||
|
||||
// applyPipelineReasoning sets the reasoning effort for a realtime pipeline's LLM
|
||||
// from the pipeline config, without editing the underlying LLM model config. The
|
||||
// pipeline value overrides the LLM's own reasoning_effort; when the pipeline does
|
||||
// not set it, the LLM model config's reasoning_effort (if any) is used. The LLM
|
||||
// config passed in is the per-session copy returned by the config loader, so this
|
||||
// does not affect other users of the same model.
|
||||
func applyPipelineReasoning(llm *config.ModelConfig, pipeline config.Pipeline) {
|
||||
if llm == nil {
|
||||
return
|
||||
}
|
||||
llm.ApplyReasoningEffort(pipeline.ReasoningEffort)
|
||||
}
|
||||
33
core/http/endpoints/openai/realtime_reasoning_test.go
Normal file
33
core/http/endpoints/openai/realtime_reasoning_test.go
Normal file
@@ -0,0 +1,33 @@
|
||||
package openai
|
||||
|
||||
import (
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
)
|
||||
|
||||
// applyPipelineReasoning lets a realtime pipeline set the reasoning effort for
|
||||
// its LLM (forwarded to the backend as reasoning_effort) without editing the LLM
|
||||
// model config. The pipeline value overrides the LLM's own reasoning_effort.
|
||||
var _ = Describe("applyPipelineReasoning", func() {
|
||||
It("applies the pipeline reasoning_effort to the LLM config", func() {
|
||||
llm := &config.ModelConfig{}
|
||||
applyPipelineReasoning(llm, config.Pipeline{ReasoningEffort: "none"})
|
||||
Expect(llm.ReasoningEffort).To(Equal("none"))
|
||||
Expect(llm.ReasoningConfig.DisableReasoning).ToNot(BeNil())
|
||||
Expect(*llm.ReasoningConfig.DisableReasoning).To(BeTrue())
|
||||
})
|
||||
|
||||
It("falls back to the LLM's own reasoning_effort when the pipeline is unset", func() {
|
||||
llm := &config.ModelConfig{ReasoningEffort: "high"}
|
||||
applyPipelineReasoning(llm, config.Pipeline{})
|
||||
Expect(llm.ReasoningEffort).To(Equal("high"))
|
||||
Expect(llm.ReasoningConfig.DisableReasoning).ToNot(BeNil())
|
||||
Expect(*llm.ReasoningConfig.DisableReasoning).To(BeFalse())
|
||||
})
|
||||
|
||||
It("is nil-safe", func() {
|
||||
applyPipelineReasoning(nil, config.Pipeline{ReasoningEffort: "low"})
|
||||
})
|
||||
})
|
||||
@@ -310,25 +310,13 @@ func mergeOpenAIRequestAndModelConfig(config *config.ModelConfig, input *schema.
|
||||
config.Temperature = input.Temperature
|
||||
}
|
||||
|
||||
// Map the per-request reasoning_effort onto the reasoning toggle the
|
||||
// backend reads (enable_thinking metadata, set in gRPCPredictOpts).
|
||||
// "none" disables thinking for this request - the use case from #10072,
|
||||
// running a single Qwen3-style model and turning reasoning off per
|
||||
// request. Any explicit effort level enables thinking, UNLESS the model
|
||||
// config explicitly disabled it (DisableReasoning==true wins): an
|
||||
// operator who deliberately turned reasoning off should not be overridden
|
||||
// by a request. A value of "none" always disables, since that never
|
||||
// conflicts with a config that also disables.
|
||||
switch strings.ToLower(input.ReasoningEffort) {
|
||||
case "none":
|
||||
disable := true
|
||||
config.ReasoningConfig.DisableReasoning = &disable
|
||||
case "minimal", "low", "medium", "high":
|
||||
if config.ReasoningConfig.DisableReasoning == nil || !*config.ReasoningConfig.DisableReasoning {
|
||||
enable := false
|
||||
config.ReasoningConfig.DisableReasoning = &enable
|
||||
}
|
||||
}
|
||||
// Resolve the effective reasoning effort (request overrides the model config
|
||||
// default), store it so gRPCPredictOpts forwards it to the backend as the
|
||||
// reasoning_effort chat_template_kwarg (what gpt-oss / LFM2.5 read), and map
|
||||
// it onto the enable_thinking toggle. "none" disables thinking (the #10072
|
||||
// use case); a level enables it unless the config already disabled reasoning
|
||||
// (an operator's explicit disable wins over a request asking to think).
|
||||
config.ApplyReasoningEffort(input.ReasoningEffort)
|
||||
|
||||
// Collapse the modern max_completion_tokens alias into the
|
||||
// legacy Maxtokens field so downstream code reads exactly one.
|
||||
|
||||
@@ -418,6 +418,26 @@ This is the load-time reasoning configuration. The orthogonal per-request `enabl
|
||||
- `reasoning_effort: "minimal" | "low" | "medium" | "high"` enables thinking, unless the model config explicitly set `reasoning.disable: true` (an operator's explicit disable wins and is never re-enabled by a request).
|
||||
{{% /notice %}}
|
||||
|
||||
#### `reasoning_effort` as a chat-template kwarg
|
||||
|
||||
`reasoning_effort` is also forwarded to the backend as a `chat_template_kwarg`, so models whose **jinja chat template** keys on it — e.g. gpt-oss (Harmony) or LFM2.5 — honor the **level**, not just the on/off `enable_thinking` flag. This matters for models that ignore `enable_thinking` entirely (LFM2.5 keeps emitting `<think>` for `enable_thinking=false`, but respects `reasoning_effort`).
|
||||
|
||||
Set a per-model default in the config so every request inherits it (a per-request `reasoning_effort` still overrides):
|
||||
|
||||
```yaml
|
||||
name: my-model
|
||||
reasoning_effort: none # none | minimal | low | medium | high
|
||||
```
|
||||
|
||||
For [realtime pipelines]({{%relref "docs/features/openai-realtime" %}}), set it on the pipeline so it applies to the pipeline's LLM without editing that model's own config:
|
||||
|
||||
```yaml
|
||||
name: gpt-realtime
|
||||
pipeline:
|
||||
llm: lfm2.5
|
||||
reasoning_effort: none # overrides the LLM model's own reasoning_effort
|
||||
```
|
||||
|
||||
### Multimodal Backend Options
|
||||
|
||||
| Option | Type | Default | Description |
|
||||
|
||||
Reference in New Issue
Block a user