mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-16 04:38:50 -04:00
feat(config): add chat_template_kwargs model field + resolver
Adds the ChatTemplateKwargs model-config map and RequestMetadata carrier, plus ResolveChatTemplateKwargs which layers the config map under coerced request metadata. Foundation for generic jinja chat-template kwargs (issue #10329). Assisted-by: Claude:claude-opus-4-8 Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
48
core/config/chat_template_kwargs_test.go
Normal file
48
core/config/chat_template_kwargs_test.go
Normal file
@@ -0,0 +1,48 @@
|
||||
package config_test
|
||||
|
||||
import (
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
)
|
||||
|
||||
// ResolveChatTemplateKwargs layers the model config map (base) under the coerced
|
||||
// backend metadata (server reasoning levers + client request overrides).
|
||||
var _ = Describe("ModelConfig.ResolveChatTemplateKwargs", func() {
|
||||
It("returns nil when nothing is set", func() {
|
||||
c := &config.ModelConfig{}
|
||||
Expect(c.ResolveChatTemplateKwargs(nil)).To(BeNil())
|
||||
})
|
||||
|
||||
It("returns the config map when no metadata is present", func() {
|
||||
c := &config.ModelConfig{ChatTemplateKwargs: map[string]any{"preserve_thinking": true}}
|
||||
Expect(c.ResolveChatTemplateKwargs(nil)).To(HaveKeyWithValue("preserve_thinking", true))
|
||||
})
|
||||
|
||||
It("lets metadata override the config map", func() {
|
||||
c := &config.ModelConfig{ChatTemplateKwargs: map[string]any{"enable_thinking": true}}
|
||||
got := c.ResolveChatTemplateKwargs(map[string]string{"enable_thinking": "false"})
|
||||
Expect(got).To(HaveKeyWithValue("enable_thinking", false))
|
||||
})
|
||||
|
||||
It("coerces true/false to bool and leaves other strings as-is", func() {
|
||||
c := &config.ModelConfig{}
|
||||
got := c.ResolveChatTemplateKwargs(map[string]string{
|
||||
"enable_thinking": "true",
|
||||
"reasoning_effort": "high",
|
||||
})
|
||||
Expect(got).To(HaveKeyWithValue("enable_thinking", true))
|
||||
Expect(got).To(HaveKeyWithValue("reasoning_effort", "high"))
|
||||
})
|
||||
|
||||
It("skips the reserved chat_template_kwargs metadata key but keeps siblings", func() {
|
||||
c := &config.ModelConfig{}
|
||||
got := c.ResolveChatTemplateKwargs(map[string]string{
|
||||
"chat_template_kwargs": "{\"x\":1}",
|
||||
"preserve_thinking": "true",
|
||||
})
|
||||
Expect(got).ToNot(HaveKey("chat_template_kwargs"))
|
||||
Expect(got).To(HaveKeyWithValue("preserve_thinking", true))
|
||||
})
|
||||
})
|
||||
@@ -70,6 +70,19 @@ type ModelConfig struct {
|
||||
// (Harmony) or LFM2.5 — honor it; "none" also toggles enable_thinking off.
|
||||
ReasoningEffort string `yaml:"reasoning_effort,omitempty" json:"reasoning_effort,omitempty"`
|
||||
|
||||
// ChatTemplateKwargs are arbitrary key/values forwarded to the backend's jinja
|
||||
// chat template via chat_template_kwargs (e.g. preserve_thinking: true). The
|
||||
// server-derived reasoning levers (enable_thinking / reasoning_effort) and any
|
||||
// per-request metadata overrides layer on top. See gRPCPredictOpts.
|
||||
ChatTemplateKwargs map[string]any `yaml:"chat_template_kwargs,omitempty" json:"chat_template_kwargs,omitempty"`
|
||||
|
||||
// RequestMetadata holds the raw client request `metadata` map for the current
|
||||
// request. The request middleware stamps it; gRPCPredictOpts merges it into the
|
||||
// backend gRPC metadata (overriding the server-derived enable_thinking /
|
||||
// reasoning_effort) and folds it, coerced, into the chat_template_kwargs blob.
|
||||
// Never persisted to YAML.
|
||||
RequestMetadata map[string]string `yaml:"-" json:"-"`
|
||||
|
||||
FeatureFlag FeatureFlag `yaml:"feature_flags,omitempty" json:"feature_flags,omitempty"` // Feature Flag registry. We move fast, and features may break on a per model/backend basis. Registry for (usually temporary) flags that indicate aborting something early.
|
||||
// LLM configs (GPT4ALL, Llama.cpp, ...)
|
||||
LLMConfig `yaml:",inline" json:",inline"`
|
||||
@@ -551,6 +564,44 @@ func (c *ModelConfig) ApplyReasoningEffort(requestEffort string) {
|
||||
}
|
||||
}
|
||||
|
||||
// coerceChatTemplateKwarg coerces a request-metadata string value for use as a
|
||||
// jinja chat_template_kwarg. "true"/"false" become real booleans (so a jinja
|
||||
// `{% if preserve_thinking %}` reads false correctly, since any non-empty string
|
||||
// is truthy); everything else stays a string. Numeric/typed per-request values are
|
||||
// out of scope - set those in the model YAML chat_template_kwargs (YAML keeps the type).
|
||||
func coerceChatTemplateKwarg(v string) any {
|
||||
switch v {
|
||||
case "true":
|
||||
return true
|
||||
case "false":
|
||||
return false
|
||||
default:
|
||||
return v
|
||||
}
|
||||
}
|
||||
|
||||
// ResolveChatTemplateKwargs builds the final chat_template_kwargs map forwarded to
|
||||
// the backend, layered: the model config map (base) < the coerced backend metadata
|
||||
// (server reasoning levers + client request overrides). `meta` is the already-merged
|
||||
// backend metadata string map. The reserved "chat_template_kwargs" key is skipped so
|
||||
// a client cannot smuggle a nested blob. Returns nil when there is nothing to forward.
|
||||
func (c *ModelConfig) ResolveChatTemplateKwargs(meta map[string]string) map[string]any {
|
||||
out := map[string]any{}
|
||||
for k, v := range c.ChatTemplateKwargs {
|
||||
out[k] = v
|
||||
}
|
||||
for k, v := range meta {
|
||||
if k == "chat_template_kwargs" {
|
||||
continue
|
||||
}
|
||||
out[k] = coerceChatTemplateKwarg(v)
|
||||
}
|
||||
if len(out) == 0 {
|
||||
return nil
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// @Description PipelineStreaming toggles incremental delivery per realtime stage.
|
||||
type PipelineStreaming struct {
|
||||
LLM *bool `yaml:"llm,omitempty" json:"llm,omitempty"`
|
||||
|
||||
Reference in New Issue
Block a user