feat(config): add chat_template_kwargs model field + resolver

Adds the ChatTemplateKwargs model-config map and RequestMetadata carrier,
plus ResolveChatTemplateKwargs which layers the config map under coerced
request metadata. Foundation for generic jinja chat-template kwargs (issue #10329).

Assisted-by: Claude:claude-opus-4-8
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
Ettore Di Giacinto
2026-06-16 07:46:43 +00:00
parent 8bd2df8f68
commit bc8a1be801
2 changed files with 99 additions and 0 deletions

View File

@@ -0,0 +1,48 @@
package config_test
import (
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"github.com/mudler/LocalAI/core/config"
)
// ResolveChatTemplateKwargs layers the model config map (base) under the coerced
// backend metadata (server reasoning levers + client request overrides).
var _ = Describe("ModelConfig.ResolveChatTemplateKwargs", func() {
It("returns nil when nothing is set", func() {
c := &config.ModelConfig{}
Expect(c.ResolveChatTemplateKwargs(nil)).To(BeNil())
})
It("returns the config map when no metadata is present", func() {
c := &config.ModelConfig{ChatTemplateKwargs: map[string]any{"preserve_thinking": true}}
Expect(c.ResolveChatTemplateKwargs(nil)).To(HaveKeyWithValue("preserve_thinking", true))
})
It("lets metadata override the config map", func() {
c := &config.ModelConfig{ChatTemplateKwargs: map[string]any{"enable_thinking": true}}
got := c.ResolveChatTemplateKwargs(map[string]string{"enable_thinking": "false"})
Expect(got).To(HaveKeyWithValue("enable_thinking", false))
})
It("coerces true/false to bool and leaves other strings as-is", func() {
c := &config.ModelConfig{}
got := c.ResolveChatTemplateKwargs(map[string]string{
"enable_thinking": "true",
"reasoning_effort": "high",
})
Expect(got).To(HaveKeyWithValue("enable_thinking", true))
Expect(got).To(HaveKeyWithValue("reasoning_effort", "high"))
})
It("skips the reserved chat_template_kwargs metadata key but keeps siblings", func() {
c := &config.ModelConfig{}
got := c.ResolveChatTemplateKwargs(map[string]string{
"chat_template_kwargs": "{\"x\":1}",
"preserve_thinking": "true",
})
Expect(got).ToNot(HaveKey("chat_template_kwargs"))
Expect(got).To(HaveKeyWithValue("preserve_thinking", true))
})
})

View File

@@ -70,6 +70,19 @@ type ModelConfig struct {
// (Harmony) or LFM2.5 — honor it; "none" also toggles enable_thinking off.
ReasoningEffort string `yaml:"reasoning_effort,omitempty" json:"reasoning_effort,omitempty"`
// ChatTemplateKwargs are arbitrary key/values forwarded to the backend's jinja
// chat template via chat_template_kwargs (e.g. preserve_thinking: true). The
// server-derived reasoning levers (enable_thinking / reasoning_effort) and any
// per-request metadata overrides layer on top. See gRPCPredictOpts.
ChatTemplateKwargs map[string]any `yaml:"chat_template_kwargs,omitempty" json:"chat_template_kwargs,omitempty"`
// RequestMetadata holds the raw client request `metadata` map for the current
// request. The request middleware stamps it; gRPCPredictOpts merges it into the
// backend gRPC metadata (overriding the server-derived enable_thinking /
// reasoning_effort) and folds it, coerced, into the chat_template_kwargs blob.
// Never persisted to YAML.
RequestMetadata map[string]string `yaml:"-" json:"-"`
FeatureFlag FeatureFlag `yaml:"feature_flags,omitempty" json:"feature_flags,omitempty"` // Feature Flag registry. We move fast, and features may break on a per model/backend basis. Registry for (usually temporary) flags that indicate aborting something early.
// LLM configs (GPT4ALL, Llama.cpp, ...)
LLMConfig `yaml:",inline" json:",inline"`
@@ -551,6 +564,44 @@ func (c *ModelConfig) ApplyReasoningEffort(requestEffort string) {
}
}
// coerceChatTemplateKwarg coerces a request-metadata string value for use as a
// jinja chat_template_kwarg. "true"/"false" become real booleans (so a jinja
// `{% if preserve_thinking %}` reads false correctly, since any non-empty string
// is truthy); everything else stays a string. Numeric/typed per-request values are
// out of scope - set those in the model YAML chat_template_kwargs (YAML keeps the type).
func coerceChatTemplateKwarg(v string) any {
switch v {
case "true":
return true
case "false":
return false
default:
return v
}
}
// ResolveChatTemplateKwargs builds the final chat_template_kwargs map forwarded to
// the backend, layered: the model config map (base) < the coerced backend metadata
// (server reasoning levers + client request overrides). `meta` is the already-merged
// backend metadata string map. The reserved "chat_template_kwargs" key is skipped so
// a client cannot smuggle a nested blob. Returns nil when there is nothing to forward.
func (c *ModelConfig) ResolveChatTemplateKwargs(meta map[string]string) map[string]any {
out := map[string]any{}
for k, v := range c.ChatTemplateKwargs {
out[k] = v
}
for k, v := range meta {
if k == "chat_template_kwargs" {
continue
}
out[k] = coerceChatTemplateKwarg(v)
}
if len(out) == 0 {
return nil
}
return out
}
// @Description PipelineStreaming toggles incremental delivery per realtime stage.
type PipelineStreaming struct {
LLM *bool `yaml:"llm,omitempty" json:"llm,omitempty"`