From bc8a1be8014e59d9126ae1b4139c41fbf2a1c06a Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 16 Jun 2026 07:46:43 +0000 Subject: [PATCH] feat(config): add chat_template_kwargs model field + resolver Adds the ChatTemplateKwargs model-config map and RequestMetadata carrier, plus ResolveChatTemplateKwargs which layers the config map under coerced request metadata. Foundation for generic jinja chat-template kwargs (issue #10329). Assisted-by: Claude:claude-opus-4-8 Signed-off-by: Ettore Di Giacinto --- core/config/chat_template_kwargs_test.go | 48 ++++++++++++++++++++++ core/config/model_config.go | 51 ++++++++++++++++++++++++ 2 files changed, 99 insertions(+) create mode 100644 core/config/chat_template_kwargs_test.go diff --git a/core/config/chat_template_kwargs_test.go b/core/config/chat_template_kwargs_test.go new file mode 100644 index 000000000..4b32e695e --- /dev/null +++ b/core/config/chat_template_kwargs_test.go @@ -0,0 +1,48 @@ +package config_test + +import ( + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "github.com/mudler/LocalAI/core/config" +) + +// ResolveChatTemplateKwargs layers the model config map (base) under the coerced +// backend metadata (server reasoning levers + client request overrides). +var _ = Describe("ModelConfig.ResolveChatTemplateKwargs", func() { + It("returns nil when nothing is set", func() { + c := &config.ModelConfig{} + Expect(c.ResolveChatTemplateKwargs(nil)).To(BeNil()) + }) + + It("returns the config map when no metadata is present", func() { + c := &config.ModelConfig{ChatTemplateKwargs: map[string]any{"preserve_thinking": true}} + Expect(c.ResolveChatTemplateKwargs(nil)).To(HaveKeyWithValue("preserve_thinking", true)) + }) + + It("lets metadata override the config map", func() { + c := &config.ModelConfig{ChatTemplateKwargs: map[string]any{"enable_thinking": true}} + got := c.ResolveChatTemplateKwargs(map[string]string{"enable_thinking": "false"}) + Expect(got).To(HaveKeyWithValue("enable_thinking", false)) + }) + + It("coerces true/false to bool and leaves other strings as-is", func() { + c := &config.ModelConfig{} + got := c.ResolveChatTemplateKwargs(map[string]string{ + "enable_thinking": "true", + "reasoning_effort": "high", + }) + Expect(got).To(HaveKeyWithValue("enable_thinking", true)) + Expect(got).To(HaveKeyWithValue("reasoning_effort", "high")) + }) + + It("skips the reserved chat_template_kwargs metadata key but keeps siblings", func() { + c := &config.ModelConfig{} + got := c.ResolveChatTemplateKwargs(map[string]string{ + "chat_template_kwargs": "{\"x\":1}", + "preserve_thinking": "true", + }) + Expect(got).ToNot(HaveKey("chat_template_kwargs")) + Expect(got).To(HaveKeyWithValue("preserve_thinking", true)) + }) +}) diff --git a/core/config/model_config.go b/core/config/model_config.go index 755280cc3..955a7a6ec 100644 --- a/core/config/model_config.go +++ b/core/config/model_config.go @@ -70,6 +70,19 @@ type ModelConfig struct { // (Harmony) or LFM2.5 — honor it; "none" also toggles enable_thinking off. ReasoningEffort string `yaml:"reasoning_effort,omitempty" json:"reasoning_effort,omitempty"` + // ChatTemplateKwargs are arbitrary key/values forwarded to the backend's jinja + // chat template via chat_template_kwargs (e.g. preserve_thinking: true). The + // server-derived reasoning levers (enable_thinking / reasoning_effort) and any + // per-request metadata overrides layer on top. See gRPCPredictOpts. + ChatTemplateKwargs map[string]any `yaml:"chat_template_kwargs,omitempty" json:"chat_template_kwargs,omitempty"` + + // RequestMetadata holds the raw client request `metadata` map for the current + // request. The request middleware stamps it; gRPCPredictOpts merges it into the + // backend gRPC metadata (overriding the server-derived enable_thinking / + // reasoning_effort) and folds it, coerced, into the chat_template_kwargs blob. + // Never persisted to YAML. + RequestMetadata map[string]string `yaml:"-" json:"-"` + FeatureFlag FeatureFlag `yaml:"feature_flags,omitempty" json:"feature_flags,omitempty"` // Feature Flag registry. We move fast, and features may break on a per model/backend basis. Registry for (usually temporary) flags that indicate aborting something early. // LLM configs (GPT4ALL, Llama.cpp, ...) LLMConfig `yaml:",inline" json:",inline"` @@ -551,6 +564,44 @@ func (c *ModelConfig) ApplyReasoningEffort(requestEffort string) { } } +// coerceChatTemplateKwarg coerces a request-metadata string value for use as a +// jinja chat_template_kwarg. "true"/"false" become real booleans (so a jinja +// `{% if preserve_thinking %}` reads false correctly, since any non-empty string +// is truthy); everything else stays a string. Numeric/typed per-request values are +// out of scope - set those in the model YAML chat_template_kwargs (YAML keeps the type). +func coerceChatTemplateKwarg(v string) any { + switch v { + case "true": + return true + case "false": + return false + default: + return v + } +} + +// ResolveChatTemplateKwargs builds the final chat_template_kwargs map forwarded to +// the backend, layered: the model config map (base) < the coerced backend metadata +// (server reasoning levers + client request overrides). `meta` is the already-merged +// backend metadata string map. The reserved "chat_template_kwargs" key is skipped so +// a client cannot smuggle a nested blob. Returns nil when there is nothing to forward. +func (c *ModelConfig) ResolveChatTemplateKwargs(meta map[string]string) map[string]any { + out := map[string]any{} + for k, v := range c.ChatTemplateKwargs { + out[k] = v + } + for k, v := range meta { + if k == "chat_template_kwargs" { + continue + } + out[k] = coerceChatTemplateKwarg(v) + } + if len(out) == 0 { + return nil + } + return out +} + // @Description PipelineStreaming toggles incremental delivery per realtime stage. type PipelineStreaming struct { LLM *bool `yaml:"llm,omitempty" json:"llm,omitempty"`