feat(backend): forward resolved chat_template_kwargs blob to backends

gRPCPredictOpts now merges per-request client metadata over the server-derived
enable_thinking/reasoning_effort (reaching all backends via the standalone keys)
and serialises the resolved chat_template_kwargs map into a JSON blob for
llama.cpp, written last so a client cannot clobber it. Issue #10329.

Assisted-by: Claude:claude-opus-4-8
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
Ettore Di Giacinto
2026-06-16 07:51:06 +00:00
parent bc8a1be801
commit acf4f5add3
2 changed files with 70 additions and 0 deletions

View File

@@ -368,6 +368,25 @@ func gRPCPredictOpts(c config.ModelConfig, modelPath string) *pb.PredictOptions
if c.ReasoningEffort != "" {
metadata["reasoning_effort"] = c.ReasoningEffort
}
// Client request metadata overrides the server-derived reasoning levers and
// reaches every backend through these standalone string keys (Python backends
// read them directly). The reserved blob key is server-owned and skipped.
for k, v := range c.RequestMetadata {
if k == "chat_template_kwargs" {
continue
}
metadata[k] = v
}
// Build the generic chat_template_kwargs blob (model config map + coerced
// metadata) for llama.cpp and write it LAST so a client cannot clobber it.
if blob := c.ResolveChatTemplateKwargs(metadata); len(blob) > 0 {
b, err := json.Marshal(blob)
if err != nil {
xlog.Warn("failed to marshal chat_template_kwargs", "error", err)
} else {
metadata["chat_template_kwargs"] = string(b)
}
}
pbOpts.Metadata = metadata
// Logprobs and TopLogprobs are set by the caller if provided

View File

@@ -161,3 +161,54 @@ var _ = Describe("grpcModelOpts NBatch", func() {
Expect(opts.ContextSize).To(BeEquivalentTo(4096), "n_batch must match the effective n_ctx the backend receives")
})
})
// Guards the generic chat_template_kwargs forwarding: the model config map plus any
// per-request metadata overrides are merged, coerced, and serialised into the
// backend metadata blob that llama.cpp reads. Client metadata also overrides the
// server-derived standalone enable_thinking key (cross-backend consistency).
var _ = Describe("gRPCPredictOpts chat_template_kwargs metadata", func() {
baseCfg := func() config.ModelConfig {
cfg := config.ModelConfig{}
cfg.SetDefaults()
return cfg
}
It("serialises the config map into the chat_template_kwargs blob", func() {
cfg := baseCfg()
cfg.ChatTemplateKwargs = map[string]any{"preserve_thinking": true}
opts := gRPCPredictOpts(cfg, "/tmp/models")
Expect(opts.Metadata).To(HaveKey("chat_template_kwargs"))
var blob map[string]any
Expect(json.Unmarshal([]byte(opts.Metadata["chat_template_kwargs"]), &blob)).To(Succeed())
Expect(blob).To(HaveKeyWithValue("preserve_thinking", true))
})
It("lets client request metadata override the server-derived enable_thinking key", func() {
cfg := baseCfg()
disable := true
cfg.ReasoningConfig = reasoning.Config{DisableReasoning: &disable} // server: enable_thinking=false
cfg.RequestMetadata = map[string]string{"enable_thinking": "true"} // client overrides
opts := gRPCPredictOpts(cfg, "/tmp/models")
// standalone key (Python backends) reflects the client override
Expect(opts.Metadata).To(HaveKeyWithValue("enable_thinking", "true"))
// blob (llama.cpp) reflects it too, as a real bool
var blob map[string]any
Expect(json.Unmarshal([]byte(opts.Metadata["chat_template_kwargs"]), &blob)).To(Succeed())
Expect(blob).To(HaveKeyWithValue("enable_thinking", true))
})
It("does not let a client clobber the blob via a chat_template_kwargs metadata key", func() {
cfg := baseCfg()
cfg.ChatTemplateKwargs = map[string]any{"preserve_thinking": true}
cfg.RequestMetadata = map[string]string{"chat_template_kwargs": "{\"preserve_thinking\": false}"}
opts := gRPCPredictOpts(cfg, "/tmp/models")
var blob map[string]any
Expect(json.Unmarshal([]byte(opts.Metadata["chat_template_kwargs"]), &blob)).To(Succeed())
Expect(blob).To(HaveKeyWithValue("preserve_thinking", true))
})
It("omits the blob when there is nothing to forward", func() {
opts := gRPCPredictOpts(baseCfg(), "/tmp/models")
Expect(opts.Metadata).ToNot(HaveKey("chat_template_kwargs"))
})
})