fix(gallery): match mmproj/model quant as a whole token so F16 no longer selects BF16 (#10559) (#10564)

pickPreferredGroup matched a quant preference against the shard base filename with strings.Contains. Because `f16` is a substring of `bf16`, asking for the `F16` mmproj quant would wrongly satisfy a `BF16` file and select it when its group came first. Match the preference as a whole token instead: it must be delimited by a non-alphanumeric character (or the string start/end) on both outer edges. Separators inside the preference itself (e.g. `ud-q4_k_xl`) are left untouched, and all occurrences are scanned before rejecting. Assisted-by: Claude:claude-opus-4-8 [Claude Code] Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Co-authored-by: Ettore Di Giacinto <mudler@localai.io>
2026-06-28 10:27:30 -04:00 · 2026-06-28 01:21:33 +02:00
parent fdff114701
commit f1fcafb888
2 changed files with 134 additions and 3 deletions
--- a/core/gallery/importers/llama-cpp.go
+++ b/core/gallery/importers/llama-cpp.go
@@ -25,8 +25,8 @@ var (

 type LlamaCPPImporter struct{}

-func (i *LlamaCPPImporter) Name() string     { return "llama-cpp" }
-func (i *LlamaCPPImporter) Modality() string { return "text" }
+func (i *LlamaCPPImporter) Name() string      { return "llama-cpp" }
+func (i *LlamaCPPImporter) Modality() string  { return "text" }
 func (i *LlamaCPPImporter) AutoDetects() bool { return true }

 // AdditionalBackends advertises drop-in replacements that share the
@@ -293,7 +293,7 @@ func pickPreferredGroup(groups []hfapi.ShardGroup, prefs []string) *hfapi.ShardG
 	for _, pref := range prefs {
 		lower := strings.ToLower(pref)
 		for i := range groups {
-			if strings.Contains(strings.ToLower(groups[i].Base), lower) {
+			if quantTokenMatches(strings.ToLower(groups[i].Base), lower) {
 				return &groups[i]
 			}
 		}
@@ -301,6 +301,39 @@ func pickPreferredGroup(groups []hfapi.ShardGroup, prefs []string) *hfapi.ShardG
 	return &groups[len(groups)-1]
 }

+// quantTokenMatches reports whether pref appears in base as a whole token
+// rather than as a substring of a larger alphanumeric run. Both arguments
+// must already be lowercased.
+//
+// A plain strings.Contains is wrong here: `f16` is a substring of `bf16`, so
+// asking for the `F16` quant used to wrongly select a `BF16` file (#10559).
+// Only the OUTER edges of the matched preference must hit a boundary — a
+// non-alphanumeric char (or the start/end of base). Separators inside the
+// preference itself (e.g. `ud-q4_k_xl`) are intentionally left untouched.
+func quantTokenMatches(base, pref string) bool {
+	if pref == "" {
+		return false
+	}
+	for start := strings.Index(base, pref); start != -1; {
+		end := start + len(pref)
+		leftOK := start == 0 || !isAlphaNum(base[start-1])
+		rightOK := end == len(base) || !isAlphaNum(base[end])
+		if leftOK && rightOK {
+			return true
+		}
+		next := strings.Index(base[start+1:], pref)
+		if next == -1 {
+			break
+		}
+		start += next + 1
+	}
+	return false
+}
+
+func isAlphaNum(b byte) bool {
+	return (b >= 'a' && b <= 'z') || (b >= '0' && b <= '9')
+}
+
 // maybeApplyMTPDefaults parses the picked GGUF header (range-fetched over
 // HTTP for HF/URL imports) and, if the file declares a Multi-Token Prediction
 // head, appends the auto-MTP option keys to modelConfig.Options. Failures
--- a/core/gallery/importers/llama-cpp_test.go
+++ b/core/gallery/importers/llama-cpp_test.go
@@ -374,6 +374,104 @@ var _ = Describe("LlamaCPPImporter", func() {
 		})
 	})

+	Context("quant token boundary matching", func() {
+		// Regression for #10559: the quant preference must match as a whole
+		// token, not as a substring. Asking for `F16` used to select a
+		// `BF16` mmproj because strings.Contains("...bf16.gguf", "f16") is
+		// true — the leading `b` was ignored.
+
+		const repoBase = "https://huggingface.co/acme/example-GGUF/resolve/main/"
+
+		hfFile := func(path, sha string) hfapi.ModelFile {
+			return hfapi.ModelFile{
+				Path:   path,
+				SHA256: sha,
+				URL:    repoBase + path,
+			}
+		}
+
+		withHF := func(preferences string, files ...hfapi.ModelFile) Details {
+			d := Details{
+				URI: "https://huggingface.co/acme/example-GGUF",
+				HuggingFace: &hfapi.ModelDetails{
+					ModelID: "acme/example-GGUF",
+					Files:   files,
+				},
+			}
+			if preferences != "" {
+				d.Preferences = json.RawMessage(preferences)
+			}
+			return d
+		}
+
+		It("selects the F16 mmproj over BF16 (BF16 listed first)", func() {
+			details := withHF(`{"name":"VL","mmproj_quantizations":"F16"}`,
+				hfFile("model-Q4_K_M.gguf", "model"),
+				hfFile("mmproj-x-BF16.gguf", "bf16"),
+				hfFile("mmproj-x-F16.gguf", "f16"),
+			)
+
+			modelConfig, err := importer.Import(details)
+
+			Expect(err).ToNot(HaveOccurred())
+			Expect(modelConfig.ConfigFile).To(ContainSubstring("mmproj: llama-cpp/mmproj/VL/mmproj-x-F16.gguf"), fmt.Sprintf("%+v", modelConfig))
+			Expect(modelConfig.ConfigFile).ToNot(ContainSubstring("BF16"), fmt.Sprintf("%+v", modelConfig))
+		})
+
+		It("selects the F16 mmproj over BF16 (F16 listed first)", func() {
+			details := withHF(`{"name":"VL","mmproj_quantizations":"F16"}`,
+				hfFile("model-Q4_K_M.gguf", "model"),
+				hfFile("mmproj-x-F16.gguf", "f16"),
+				hfFile("mmproj-x-BF16.gguf", "bf16"),
+			)
+
+			modelConfig, err := importer.Import(details)
+
+			Expect(err).ToNot(HaveOccurred())
+			Expect(modelConfig.ConfigFile).To(ContainSubstring("mmproj: llama-cpp/mmproj/VL/mmproj-x-F16.gguf"), fmt.Sprintf("%+v", modelConfig))
+			Expect(modelConfig.ConfigFile).ToNot(ContainSubstring("BF16"), fmt.Sprintf("%+v", modelConfig))
+		})
+
+		It("selects BF16 when BF16 is the requested mmproj quant", func() {
+			details := withHF(`{"name":"VL","mmproj_quantizations":"BF16"}`,
+				hfFile("model-Q4_K_M.gguf", "model"),
+				hfFile("mmproj-x-F16.gguf", "f16"),
+				hfFile("mmproj-x-BF16.gguf", "bf16"),
+			)
+
+			modelConfig, err := importer.Import(details)
+
+			Expect(err).ToNot(HaveOccurred())
+			Expect(modelConfig.ConfigFile).To(ContainSubstring("mmproj: llama-cpp/mmproj/VL/mmproj-x-BF16.gguf"), fmt.Sprintf("%+v", modelConfig))
+		})
+
+		It("still matches a normal model quant with internal separators", func() {
+			// ud-q4_k_xl contains `-`/`_` internally; only the outer edges
+			// must hit a token boundary.
+			details := withHF(`{"name":"M","quantizations":"ud-q4_k_xl"}`,
+				hfFile("model-UD-Q4_K_XL.gguf", "xl"),
+				hfFile("model-Q3_K_M.gguf", "q3"),
+			)
+
+			modelConfig, err := importer.Import(details)
+
+			Expect(err).ToNot(HaveOccurred())
+			Expect(modelConfig.ConfigFile).To(ContainSubstring("model: llama-cpp/models/M/model-UD-Q4_K_XL.gguf"), fmt.Sprintf("%+v", modelConfig))
+		})
+
+		It("falls back to the last group when no preference matches", func() {
+			details := withHF(`{"name":"M","quantizations":"Q2_K"}`,
+				hfFile("model-Q8_0.gguf", "q8"),
+				hfFile("model-Q3_K_M.gguf", "q3"),
+			)
+
+			modelConfig, err := importer.Import(details)
+
+			Expect(err).ToNot(HaveOccurred())
+			Expect(modelConfig.ConfigFile).To(ContainSubstring("model: llama-cpp/models/M/model-Q3_K_M.gguf"), fmt.Sprintf("%+v", modelConfig))
+		})
+	})
+
 	Context("AdditionalBackends", func() {
 		It("advertises ik-llama-cpp and turboquant as drop-in replacements", func() {
 			entries := importer.AdditionalBackends()