From 8452068f4362a09aac2962db591268f799b90fec Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Fri, 1 May 2026 12:03:07 +0200
Subject: [PATCH] feat(importers): whisper.cpp HF repos pick a quant + nest
 under whisper/models (#9630)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The WhisperImporter's Import() switch ordered LooksLikeURL ahead of the
HuggingFace branch, so any https://huggingface.co/<owner>/<repo> URI
(e.g. LocalAI-io/whisper-large-v3-it-yodas-only-ggml) hijacked the URL
path. FilenameFromUrl returned the repo slug, the gallery entry pointed
at the HTML repo page, the SHA256 was empty, and the HF file listing
was effectively dead code for HTTPS imports. The HF branch only fired
for huggingface://owner/repo and hf://owner/repo references.

Gate the URL case on a "ggml-*.bin" basename signal — mirroring how
the llama-cpp importer gates on ".gguf" — so direct file URLs still
take the URL path while HF repo URLs fall through to the HF branch.
There the file listing is actually consulted: every ggml-*.bin entry
is collected and one is picked by the new preferences.quantizations
preference (default q5_0; comma-separated for fallback ordering).

Pin the chosen file under whisper/models/<name>/<file> so a single
repo can ship q4_0/q5_0/q8_0 side-by-side without colliding on disk,
matching the llama-cpp/models/<name>/ layout. The fallback when no
preference matches is the last available ggml file, mirroring
llama-cpp's pickPreferredGroup behaviour.

Tests: replace the previous probe spec with positive assertions
against LocalAI-io/whisper-large-v3-it-yodas-only-ggml (default →
ggml-model-q5_0.bin, quantizations=q4_0 → ggml-model-q4_0.bin) plus
two offline specs that build a fake hfapi.ModelDetails to cover the
fallback rule and non-ggml filtering without touching the network.


Assisted-by: Claude:claude-opus-4-7 [Bash Read Edit WebFetch]

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 core/gallery/importers/whisper.go      |  71 +++++++++++++----
 core/gallery/importers/whisper_test.go | 101 +++++++++++++++++++++++++
 2 files changed, 156 insertions(+), 16 deletions(-)
diff --git a/core/gallery/importers/whisper.go b/core/gallery/importers/whisper.go
index 8c46363ed..49007be98 100644
--- a/core/gallery/importers/whisper.go
+++ b/core/gallery/importers/whisper.go
@@ -9,6 +9,7 @@ import (
 	"github.com/mudler/LocalAI/core/gallery"
 	"github.com/mudler/LocalAI/core/schema"
 	"github.com/mudler/LocalAI/pkg/downloader"
+	hfapi "github.com/mudler/LocalAI/pkg/huggingface-api"
 	"go.yaml.in/yaml/v2"
 )
 
@@ -42,8 +43,7 @@ func (i *WhisperImporter) Match(details Details) bool {
 	}
 
 	// Direct URL or path ending in ggml-*.bin
-	base := filepath.Base(details.URI)
-	if strings.HasPrefix(base, "ggml-") && strings.HasSuffix(strings.ToLower(base), ".bin") {
+	if isGGMLFilename(filepath.Base(details.URI)) {
 		return true
 	}
 
@@ -76,6 +76,12 @@ func (i *WhisperImporter) Import(details Details) (gallery.ModelConfig, error) {
 		description = "Imported from " + details.URI
 	}
 
+	preferredQuants, _ := preferencesMap["quantizations"].(string)
+	quants := []string{"q5_0"}
+	if preferredQuants != "" {
+		quants = strings.Split(preferredQuants, ",")
+	}
+
 	cfg := gallery.ModelConfig{
 		Name:        name,
 		Description: description,
@@ -89,37 +95,43 @@ func (i *WhisperImporter) Import(details Details) (gallery.ModelConfig, error) {
 	}
 
 	uri := downloader.URI(details.URI)
+	directGGML := isGGMLFilename(filepath.Base(details.URI))
 	switch {
-	case uri.LooksLikeURL():
+	case uri.LooksLikeURL() && directGGML:
+		// Direct file URL (e.g. .../resolve/main/ggml-base.en.bin). We
+		// already know the exact file the user wants — no quant pick.
 		fileName, err := uri.FilenameFromUrl()
 		if err != nil {
 			return gallery.ModelConfig{}, err
 		}
+		target := filepath.Join("whisper", "models", name, fileName)
 		cfg.Files = append(cfg.Files, gallery.File{
 			URI:      details.URI,
-			Filename: fileName,
+			Filename: target,
 		})
 		modelConfig.PredictionOptions = schema.PredictionOptions{
-			BasicModelRequest: schema.BasicModelRequest{Model: fileName},
+			BasicModelRequest: schema.BasicModelRequest{Model: target},
 		}
 	case details.HuggingFace != nil:
+		// HF repo: collect every ggml-*.bin, pick the preferred quant
+		// (default q5_0), nest under whisper/models/<name>/ so the same
+		// repo can ship multiple quants without colliding on disk.
+		var ggmlFiles []hfapi.ModelFile
 		for _, f := range details.HuggingFace.Files {
-			base := filepath.Base(f.Path)
-			if !strings.HasPrefix(base, "ggml-") {
-				continue
-			}
-			if !strings.HasSuffix(strings.ToLower(base), ".bin") {
-				continue
+			if isGGMLFilename(filepath.Base(f.Path)) {
+				ggmlFiles = append(ggmlFiles, f)
 			}
+		}
+		if chosen, ok := pickPreferredGGMLFile(ggmlFiles, quants); ok {
+			target := filepath.Join("whisper", "models", name, filepath.Base(chosen.Path))
 			cfg.Files = append(cfg.Files, gallery.File{
-				URI:      f.URL,
-				Filename: base,
-				SHA256:   f.SHA256,
+				URI:      chosen.URL,
+				Filename: target,
+				SHA256:   chosen.SHA256,
 			})
 			modelConfig.PredictionOptions = schema.PredictionOptions{
-				BasicModelRequest: schema.BasicModelRequest{Model: base},
+				BasicModelRequest: schema.BasicModelRequest{Model: target},
 			}
-			break
 		}
 	default:
 		// Bare URI with no HF metadata (pref-only path). Point the config at
@@ -137,3 +149,30 @@ func (i *WhisperImporter) Import(details Details) (gallery.ModelConfig, error) {
 
 	return cfg, nil
 }
+
+// isGGMLFilename returns true when name follows whisper.cpp's "ggml-*.bin"
+// packaging convention. The .bin check is case-insensitive; the ggml- prefix
+// is exact.
+func isGGMLFilename(name string) bool {
+	return strings.HasPrefix(name, "ggml-") && strings.HasSuffix(strings.ToLower(name), ".bin")
+}
+
+// pickPreferredGGMLFile walks prefs in order and returns the first ggml file
+// whose basename contains any preference token (case-insensitive match on the
+// quant suffix, e.g. "q5_0"). When no preference matches, falls back to the
+// last file — mirroring llama-cpp's pickPreferredGroup behaviour so a missing
+// quant still yields *something* the user can run.
+func pickPreferredGGMLFile(files []hfapi.ModelFile, prefs []string) (hfapi.ModelFile, bool) {
+	if len(files) == 0 {
+		return hfapi.ModelFile{}, false
+	}
+	for _, pref := range prefs {
+		lower := strings.ToLower(pref)
+		for _, f := range files {
+			if strings.Contains(strings.ToLower(filepath.Base(f.Path)), lower) {
+				return f, true
+			}
+		}
+	}
+	return files[len(files)-1], true
+}
diff --git a/core/gallery/importers/whisper_test.go b/core/gallery/importers/whisper_test.go
index 03ee07aa0..48c5d452f 100644
--- a/core/gallery/importers/whisper_test.go
+++ b/core/gallery/importers/whisper_test.go
@@ -5,6 +5,7 @@ import (
 	"fmt"
 
 	"github.com/mudler/LocalAI/core/gallery/importers"
+	hfapi "github.com/mudler/LocalAI/pkg/huggingface-api"
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
 )
@@ -36,6 +37,106 @@ var _ = Describe("WhisperImporter", func() {
 		})
 	})
 
+	// Real-world repo that ships *multiple* ggml-*.bin quantizations
+	// (ggml-model-q4_0.bin, ggml-model-q5_0.bin, ggml-model-q8_0.bin).
+	// We assert the importer (a) follows the HF metadata branch — not the
+	// URL branch — when given the repo URL, (b) lays files out under
+	// whisper/models/<name>/ like llama-cpp does, and (c) honours the
+	// quantizations preference, defaulting to q5_0.
+	Context("real-world multi-quant repo: LocalAI-io/whisper-large-v3-it-yodas-only-ggml", func() {
+		const (
+			uri  = "https://huggingface.co/LocalAI-io/whisper-large-v3-it-yodas-only-ggml"
+			name = "whisper-large-v3-it-yodas-only-ggml"
+		)
+
+		It("defaults to q5_0 and nests the file under whisper/models/<name>/", func() {
+			modelConfig, err := importers.DiscoverModelConfig(uri, json.RawMessage(`{}`))
+
+			Expect(err).ToNot(HaveOccurred(), fmt.Sprintf("Error: %v", err))
+			Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: whisper"))
+			Expect(modelConfig.ConfigFile).To(ContainSubstring("transcript"))
+
+			Expect(modelConfig.Files).To(HaveLen(1), fmt.Sprintf("Model config: %+v", modelConfig))
+
+			expectedPath := "whisper/models/" + name + "/ggml-model-q5_0.bin"
+			Expect(modelConfig.Files[0].Filename).To(Equal(expectedPath))
+			Expect(modelConfig.Files[0].URI).To(Equal(uri + "/resolve/main/ggml-model-q5_0.bin"))
+			Expect(modelConfig.Files[0].SHA256).ToNot(BeEmpty(), "HF metadata should provide a sha256")
+			Expect(modelConfig.ConfigFile).To(ContainSubstring("model: " + expectedPath))
+		})
+
+		It("honours preferences.quantizations=q4_0 to pick ggml-model-q4_0.bin", func() {
+			modelConfig, err := importers.DiscoverModelConfig(uri, json.RawMessage(`{"quantizations":"q4_0"}`))
+
+			Expect(err).ToNot(HaveOccurred())
+			Expect(modelConfig.Files).To(HaveLen(1))
+
+			expectedPath := "whisper/models/" + name + "/ggml-model-q4_0.bin"
+			Expect(modelConfig.Files[0].Filename).To(Equal(expectedPath))
+			Expect(modelConfig.ConfigFile).To(ContainSubstring("model: " + expectedPath))
+		})
+	})
+
+	Context("Import from HuggingFace file listing (offline)", func() {
+		// Mirror of llama-cpp_test.go's offline HF context: build a fake
+		// *hfapi.ModelDetails and assert the emitted gallery entry without
+		// touching the network.
+		const repoBase = "https://huggingface.co/acme/example-ggml/resolve/main/"
+
+		hfFile := func(path, sha string) hfapi.ModelFile {
+			return hfapi.ModelFile{
+				Path:   path,
+				SHA256: sha,
+				URL:    repoBase + path,
+			}
+		}
+
+		withHF := func(preferences string, files ...hfapi.ModelFile) importers.Details {
+			d := importers.Details{
+				URI: "https://huggingface.co/acme/example-ggml",
+				HuggingFace: &hfapi.ModelDetails{
+					ModelID: "acme/example-ggml",
+					Files:   files,
+				},
+			}
+			if preferences != "" {
+				d.Preferences = json.RawMessage(preferences)
+			}
+			return d
+		}
+
+		It("falls back to the last ggml file when no preference matches", func() {
+			imp := &importers.WhisperImporter{}
+			details := withHF(`{"name":"example"}`,
+				hfFile("ggml-model-q4_0.bin", "aaa"),
+				hfFile("ggml-model-q8_0.bin", "ccc"),
+				hfFile("README.md", ""),
+			)
+
+			modelConfig, err := imp.Import(details)
+			Expect(err).ToNot(HaveOccurred())
+			Expect(modelConfig.Files).To(HaveLen(1))
+			// Default pref is q5_0; repo has only q4_0 and q8_0 — fallback
+			// is the last ggml entry, mirroring llama-cpp's behaviour.
+			Expect(modelConfig.Files[0].Filename).To(Equal("whisper/models/example/ggml-model-q8_0.bin"))
+			Expect(modelConfig.Files[0].SHA256).To(Equal("ccc"))
+		})
+
+		It("ignores non-ggml files in the repo listing", func() {
+			imp := &importers.WhisperImporter{}
+			details := withHF(`{"name":"noise","quantizations":"q5_0"}`,
+				hfFile("README.md", ""),
+				hfFile("config.json", ""),
+				hfFile("ggml-model-q5_0.bin", "bbb"),
+			)
+
+			modelConfig, err := imp.Import(details)
+			Expect(err).ToNot(HaveOccurred())
+			Expect(modelConfig.Files).To(HaveLen(1))
+			Expect(modelConfig.Files[0].Filename).To(Equal("whisper/models/noise/ggml-model-q5_0.bin"))
+		})
+	})
+
 	Context("Importer interface metadata", func() {
 		It("exposes name/modality/autodetect", func() {
 			imp := &importers.WhisperImporter{}