From 20ed0bc7354fae6c8f4926a05eb6bb0e36033a23 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 27 Jun 2026 21:57:23 +0000 Subject: [PATCH] fix(gallery): match mmproj/model quant as a whole token so F16 no longer selects BF16 (#10559) pickPreferredGroup matched a quant preference against the shard base filename with strings.Contains. Because `f16` is a substring of `bf16`, asking for the `F16` mmproj quant would wrongly satisfy a `BF16` file and select it when its group came first. Match the preference as a whole token instead: it must be delimited by a non-alphanumeric character (or the string start/end) on both outer edges. Separators inside the preference itself (e.g. `ud-q4_k_xl`) are left untouched, and all occurrences are scanned before rejecting. Signed-off-by: Ettore Di Giacinto Assisted-by: Claude:claude-opus-4-8 [Claude Code] --- core/gallery/importers/llama-cpp.go | 39 +++++++++- core/gallery/importers/llama-cpp_test.go | 98 ++++++++++++++++++++++++ 2 files changed, 134 insertions(+), 3 deletions(-) diff --git a/core/gallery/importers/llama-cpp.go b/core/gallery/importers/llama-cpp.go index 39a732560..5797e6352 100644 --- a/core/gallery/importers/llama-cpp.go +++ b/core/gallery/importers/llama-cpp.go @@ -25,8 +25,8 @@ var ( type LlamaCPPImporter struct{} -func (i *LlamaCPPImporter) Name() string { return "llama-cpp" } -func (i *LlamaCPPImporter) Modality() string { return "text" } +func (i *LlamaCPPImporter) Name() string { return "llama-cpp" } +func (i *LlamaCPPImporter) Modality() string { return "text" } func (i *LlamaCPPImporter) AutoDetects() bool { return true } // AdditionalBackends advertises drop-in replacements that share the @@ -293,7 +293,7 @@ func pickPreferredGroup(groups []hfapi.ShardGroup, prefs []string) *hfapi.ShardG for _, pref := range prefs { lower := strings.ToLower(pref) for i := range groups { - if strings.Contains(strings.ToLower(groups[i].Base), lower) { + if quantTokenMatches(strings.ToLower(groups[i].Base), lower) { return &groups[i] } } @@ -301,6 +301,39 @@ func pickPreferredGroup(groups []hfapi.ShardGroup, prefs []string) *hfapi.ShardG return &groups[len(groups)-1] } +// quantTokenMatches reports whether pref appears in base as a whole token +// rather than as a substring of a larger alphanumeric run. Both arguments +// must already be lowercased. +// +// A plain strings.Contains is wrong here: `f16` is a substring of `bf16`, so +// asking for the `F16` quant used to wrongly select a `BF16` file (#10559). +// Only the OUTER edges of the matched preference must hit a boundary — a +// non-alphanumeric char (or the start/end of base). Separators inside the +// preference itself (e.g. `ud-q4_k_xl`) are intentionally left untouched. +func quantTokenMatches(base, pref string) bool { + if pref == "" { + return false + } + for start := strings.Index(base, pref); start != -1; { + end := start + len(pref) + leftOK := start == 0 || !isAlphaNum(base[start-1]) + rightOK := end == len(base) || !isAlphaNum(base[end]) + if leftOK && rightOK { + return true + } + next := strings.Index(base[start+1:], pref) + if next == -1 { + break + } + start += next + 1 + } + return false +} + +func isAlphaNum(b byte) bool { + return (b >= 'a' && b <= 'z') || (b >= '0' && b <= '9') +} + // maybeApplyMTPDefaults parses the picked GGUF header (range-fetched over // HTTP for HF/URL imports) and, if the file declares a Multi-Token Prediction // head, appends the auto-MTP option keys to modelConfig.Options. Failures diff --git a/core/gallery/importers/llama-cpp_test.go b/core/gallery/importers/llama-cpp_test.go index f141fc29f..e3f730945 100644 --- a/core/gallery/importers/llama-cpp_test.go +++ b/core/gallery/importers/llama-cpp_test.go @@ -374,6 +374,104 @@ var _ = Describe("LlamaCPPImporter", func() { }) }) + Context("quant token boundary matching", func() { + // Regression for #10559: the quant preference must match as a whole + // token, not as a substring. Asking for `F16` used to select a + // `BF16` mmproj because strings.Contains("...bf16.gguf", "f16") is + // true — the leading `b` was ignored. + + const repoBase = "https://huggingface.co/acme/example-GGUF/resolve/main/" + + hfFile := func(path, sha string) hfapi.ModelFile { + return hfapi.ModelFile{ + Path: path, + SHA256: sha, + URL: repoBase + path, + } + } + + withHF := func(preferences string, files ...hfapi.ModelFile) Details { + d := Details{ + URI: "https://huggingface.co/acme/example-GGUF", + HuggingFace: &hfapi.ModelDetails{ + ModelID: "acme/example-GGUF", + Files: files, + }, + } + if preferences != "" { + d.Preferences = json.RawMessage(preferences) + } + return d + } + + It("selects the F16 mmproj over BF16 (BF16 listed first)", func() { + details := withHF(`{"name":"VL","mmproj_quantizations":"F16"}`, + hfFile("model-Q4_K_M.gguf", "model"), + hfFile("mmproj-x-BF16.gguf", "bf16"), + hfFile("mmproj-x-F16.gguf", "f16"), + ) + + modelConfig, err := importer.Import(details) + + Expect(err).ToNot(HaveOccurred()) + Expect(modelConfig.ConfigFile).To(ContainSubstring("mmproj: llama-cpp/mmproj/VL/mmproj-x-F16.gguf"), fmt.Sprintf("%+v", modelConfig)) + Expect(modelConfig.ConfigFile).ToNot(ContainSubstring("BF16"), fmt.Sprintf("%+v", modelConfig)) + }) + + It("selects the F16 mmproj over BF16 (F16 listed first)", func() { + details := withHF(`{"name":"VL","mmproj_quantizations":"F16"}`, + hfFile("model-Q4_K_M.gguf", "model"), + hfFile("mmproj-x-F16.gguf", "f16"), + hfFile("mmproj-x-BF16.gguf", "bf16"), + ) + + modelConfig, err := importer.Import(details) + + Expect(err).ToNot(HaveOccurred()) + Expect(modelConfig.ConfigFile).To(ContainSubstring("mmproj: llama-cpp/mmproj/VL/mmproj-x-F16.gguf"), fmt.Sprintf("%+v", modelConfig)) + Expect(modelConfig.ConfigFile).ToNot(ContainSubstring("BF16"), fmt.Sprintf("%+v", modelConfig)) + }) + + It("selects BF16 when BF16 is the requested mmproj quant", func() { + details := withHF(`{"name":"VL","mmproj_quantizations":"BF16"}`, + hfFile("model-Q4_K_M.gguf", "model"), + hfFile("mmproj-x-F16.gguf", "f16"), + hfFile("mmproj-x-BF16.gguf", "bf16"), + ) + + modelConfig, err := importer.Import(details) + + Expect(err).ToNot(HaveOccurred()) + Expect(modelConfig.ConfigFile).To(ContainSubstring("mmproj: llama-cpp/mmproj/VL/mmproj-x-BF16.gguf"), fmt.Sprintf("%+v", modelConfig)) + }) + + It("still matches a normal model quant with internal separators", func() { + // ud-q4_k_xl contains `-`/`_` internally; only the outer edges + // must hit a token boundary. + details := withHF(`{"name":"M","quantizations":"ud-q4_k_xl"}`, + hfFile("model-UD-Q4_K_XL.gguf", "xl"), + hfFile("model-Q3_K_M.gguf", "q3"), + ) + + modelConfig, err := importer.Import(details) + + Expect(err).ToNot(HaveOccurred()) + Expect(modelConfig.ConfigFile).To(ContainSubstring("model: llama-cpp/models/M/model-UD-Q4_K_XL.gguf"), fmt.Sprintf("%+v", modelConfig)) + }) + + It("falls back to the last group when no preference matches", func() { + details := withHF(`{"name":"M","quantizations":"Q2_K"}`, + hfFile("model-Q8_0.gguf", "q8"), + hfFile("model-Q3_K_M.gguf", "q3"), + ) + + modelConfig, err := importer.Import(details) + + Expect(err).ToNot(HaveOccurred()) + Expect(modelConfig.ConfigFile).To(ContainSubstring("model: llama-cpp/models/M/model-Q3_K_M.gguf"), fmt.Sprintf("%+v", modelConfig)) + }) + }) + Context("AdditionalBackends", func() { It("advertises ik-llama-cpp and turboquant as drop-in replacements", func() { entries := importer.AdditionalBackends()