From 73cfedc0238eb4123545db96292f61c3f0f10089 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Fri, 29 May 2026 10:12:53 +0200 Subject: [PATCH] fix: tool-call JSON leaks into content with stream+tools on tokenizer-template models (#10052) (#10057) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix(grammars): honor properties_order entry at index 0 The JSON-schema-to-GBNF property sort used `aOrder != 0 && bOrder != 0` as its "is this key ordered?" guard. That treats index 0 — the first key listed in properties_order — as unset, so `properties_order: name,arguments` fell back to alphabetical ordering and still emitted "arguments" before "name". Use presence in the order map instead: listed keys sort by their index and ahead of unlisted keys, which keep a stable alphabetical order. This makes the documented `properties_order: name,arguments` actually produce name-first tool-call JSON. Relates to #10052. Signed-off-by: Ettore Di Giacinto Assisted-by: Claude:claude-opus-4-8 [Claude Code] * fix(functions): defer tool grammar to the backend when the tokenizer template owns templating (#10052) When use_tokenizer_template delegates templating to the backend (llama.cpp), the backend also owns tool-call grammar generation and parsing. LocalAI was still generating its own GBNF grammar and sending it down. With a grammar present, llama.cpp does not hand the tools to its template, so its native peg/json tool parser never engages: it streams the grammar-constrained tool-call JSON back as plain content instead of emitting tool_calls. In streaming mode the JSON object leaked into the content field, and the Go-side incremental detector never gated content because the LocalAI-generated grammar emitted "arguments" before "name". The GGUF auto-import path already couples use_tokenizer_template with grammar.disable, but that block is skipped when a template is already configured, so gallery and hand-written configs (e.g. qwen3) that set the tokenizer template directly never got the paired grammar.disable. - SetDefaults now enforces the coupling for every config: when use_tokenizer_template is set, grammar generation is disabled and tools flow to the backend's native (name-first) pipeline. This also fixes already-installed models without editing each config. - Set function.grammar.disable in the shared gallery/qwen3.yaml, which is the base config referenced by every qwen3 gallery entry. Verified end to end against qwen3-4b with stream:true + tools: content no longer carries the tool-call JSON, reasoning is classified separately, and tool calls stream as proper name-first tool_calls deltas. Signed-off-by: Ettore Di Giacinto Assisted-by: Claude:claude-opus-4-8 [Claude Code] --------- Signed-off-by: Ettore Di Giacinto Co-authored-by: Ettore Di Giacinto --- core/config/model_config.go | 11 ++++ core/config/model_config_test.go | 29 +++++++++++ gallery/qwen3.yaml | 7 +++ pkg/functions/grammars/json_schema.go | 18 +++++-- pkg/functions/grammars/json_schema_test.go | 58 ++++++++++++++++++++++ 5 files changed, 119 insertions(+), 4 deletions(-) diff --git a/core/config/model_config.go b/core/config/model_config.go index 727d12f7c..d57544c6f 100644 --- a/core/config/model_config.go +++ b/core/config/model_config.go @@ -732,6 +732,17 @@ func (cfg *ModelConfig) SetDefaults(opts ...ConfigLoaderOption) { cfg.Proxy.Mode = ProxyModePassthrough } + // When templating is delegated to the backend (use_tokenizer_template), + // the backend also owns tool-call grammar generation and parsing. Sending + // a LocalAI-generated grammar alongside overrides the backend's native + // (name-first) tool pipeline and makes it stream the tool-call JSON back as + // plain content (issue #10052). The GGUF auto-import path already couples + // these two flags; enforce it here so gallery and hand-written configs that + // set use_tokenizer_template directly stay consistent. + if cfg.TemplateConfig.UseTokenizerTemplate { + cfg.FunctionsConfig.GrammarConfig.NoGrammar = true + } + // Apply model-family-specific inference defaults before generic fallbacks. // This ensures gallery-installed and runtime-loaded models get optimal parameters. ApplyInferenceDefaults(cfg, cfg.Name, cfg.Model) diff --git a/core/config/model_config_test.go b/core/config/model_config_test.go index d1b7abae9..a93912e1b 100644 --- a/core/config/model_config_test.go +++ b/core/config/model_config_test.go @@ -471,4 +471,33 @@ concurrency_groups: Expect(configs[0].GetConcurrencyGroups()).To(Equal([]string{"vram-heavy", "120b"})) }) }) + + // When templating is delegated to the backend (use_tokenizer_template), + // the backend also owns tool-call grammar generation and parsing. A + // LocalAI-generated grammar sent alongside would override the backend's + // native (name-first) tool pipeline and make it stream the tool-call JSON + // back as plain content (issue #10052). SetDefaults must therefore couple + // the two: tokenizer template implies grammar generation is disabled. + Context("use_tokenizer_template couples with grammar disable (issue #10052)", func() { + It("disables Go grammar generation when the tokenizer template is used", func() { + cfg := &ModelConfig{ + TemplateConfig: TemplateConfig{UseTokenizerTemplate: true}, + } + Expect(cfg.FunctionsConfig.GrammarConfig.NoGrammar).To(BeFalse()) + + cfg.SetDefaults() + + Expect(cfg.FunctionsConfig.GrammarConfig.NoGrammar).To(BeTrue(), + "use_tokenizer_template must imply grammar.disable so tools go to the backend's native pipeline") + }) + + It("leaves grammar generation enabled when the tokenizer template is not used", func() { + cfg := &ModelConfig{} + + cfg.SetDefaults() + + Expect(cfg.FunctionsConfig.GrammarConfig.NoGrammar).To(BeFalse(), + "models that template in Go still rely on the Go-generated grammar") + }) + }) }) diff --git a/gallery/qwen3.yaml b/gallery/qwen3.yaml index f09ee0e35..62bae155a 100644 --- a/gallery/qwen3.yaml +++ b/gallery/qwen3.yaml @@ -17,6 +17,13 @@ config_file: | # "pure content" PEG parser that leaks reasoning tags into content. options: - use_jinja:true + # With use_tokenizer_template the backend (llama.cpp) owns tool-call + # grammar generation and parsing too. Disabling LocalAI's own grammar lets + # llama.cpp's native name-first tool pipeline run; otherwise the generated + # grammar overrides it and the tool-call JSON leaks into content (#10052). + function: + grammar: + disable: true template: use_tokenizer_template: true name: qwen3 diff --git a/pkg/functions/grammars/json_schema.go b/pkg/functions/grammars/json_schema.go index b9bf73759..903e3be66 100644 --- a/pkg/functions/grammars/json_schema.go +++ b/pkg/functions/grammars/json_schema.go @@ -155,12 +155,22 @@ func (sc *JSONSchemaConverter) visit(schema map[string]any, name string, rootSch propName string propSchema map[string]any }) int { - aOrder := propOrder[a.propName] - bOrder := propOrder[b.propName] - if aOrder != 0 && bOrder != 0 { + // Use presence in the order map (not a non-zero sentinel) so that + // the first listed key — index 0 — is honored. Keys present in + // properties_order sort by their index and ahead of any key that + // isn't listed; unlisted keys keep a stable alphabetical order. + aOrder, aOK := propOrder[a.propName] + bOrder, bOK := propOrder[b.propName] + switch { + case aOK && bOK: return cmp.Compare(aOrder, bOrder) + case aOK: + return -1 + case bOK: + return 1 + default: + return cmp.Compare(a.propName, b.propName) } - return cmp.Compare(a.propName, b.propName) }) var rule strings.Builder diff --git a/pkg/functions/grammars/json_schema_test.go b/pkg/functions/grammars/json_schema_test.go index b53e310b7..c371b958a 100644 --- a/pkg/functions/grammars/json_schema_test.go +++ b/pkg/functions/grammars/json_schema_test.go @@ -547,3 +547,61 @@ realvalue }) }) }) + +var _ = Describe("JSON schema property ordering (issue #10052)", func() { + // A function-call shaped schema. The grammar must honor the configured + // properties_order. Before the fix, the sort guard `aOrder != 0 && bOrder != 0` + // treated the first listed key (index 0) as "unset" and fell back to + // alphabetical order, so "arguments" was emitted before "name" even when + // properties_order put name first. + const schema = `{ + "type": "object", + "properties": { + "name": {"type": "string"}, + "arguments": {"type": "object", "properties": {"cmd": {"type": "string"}}} + } + }` + + // keyIndex finds the position of an object-key literal (escaped as \"key\" + // in GBNF), which only appears where the key is emitted in the rule — not + // in derived rule names like root-name. + keyIndex := func(grammar, key string) int { + return strings.Index(grammar, `\"`+key+`\"`) + } + + It("honors properties_order with name listed first (index 0)", func() { + grammar, err := NewJSONSchemaConverter("name,arguments").GrammarFromBytes([]byte(schema)) + Expect(err).To(BeNil()) + ni := keyIndex(grammar, "name") + ai := keyIndex(grammar, "arguments") + Expect(ni).To(BeNumerically(">=", 0)) + Expect(ai).To(BeNumerically(">=", 0)) + Expect(ni).To(BeNumerically("<", ai), + "properties_order lists name first, so the grammar must emit \"name\" before \"arguments\"") + }) + + It("keeps alphabetical order when properties_order is empty", func() { + grammar, err := NewJSONSchemaConverter("").GrammarFromBytes([]byte(schema)) + Expect(err).To(BeNil()) + // No explicit order: keys fall back to alphabetical, so "arguments" + // precedes "name". This is the documented default and must not change. + Expect(keyIndex(grammar, "arguments")).To(BeNumerically("<", keyIndex(grammar, "name"))) + }) + + It("sorts keys present in properties_order ahead of unlisted keys", func() { + const schemaWithExtra = `{ + "type": "object", + "properties": { + "name": {"type": "string"}, + "arguments": {"type": "object", "properties": {"cmd": {"type": "string"}}}, + "aaa_unlisted": {"type": "string"} + } + }` + // "aaa_unlisted" is alphabetically first but not in the order list, so + // it must still come after the listed name/arguments keys. + grammar, err := NewJSONSchemaConverter("name,arguments").GrammarFromBytes([]byte(schemaWithExtra)) + Expect(err).To(BeNil()) + Expect(keyIndex(grammar, "name")).To(BeNumerically("<", keyIndex(grammar, "arguments"))) + Expect(keyIndex(grammar, "arguments")).To(BeNumerically("<", keyIndex(grammar, "aaa_unlisted"))) + }) +})