feat(importers): whisper.cpp HF repos pick a quant + nest under whisper/models (#9630)

The WhisperImporter's Import() switch ordered LooksLikeURL ahead of the HuggingFace branch, so any https://huggingface.co/<owner>/<repo> URI (e.g. LocalAI-io/whisper-large-v3-it-yodas-only-ggml) hijacked the URL path. FilenameFromUrl returned the repo slug, the gallery entry pointed at the HTML repo page, the SHA256 was empty, and the HF file listing was effectively dead code for HTTPS imports. The HF branch only fired for huggingface://owner/repo and hf://owner/repo references. Gate the URL case on a "ggml-*.bin" basename signal — mirroring how the llama-cpp importer gates on ".gguf" — so direct file URLs still take the URL path while HF repo URLs fall through to the HF branch. There the file listing is actually consulted: every ggml-*.bin entry is collected and one is picked by the new preferences.quantizations preference (default q5_0; comma-separated for fallback ordering). Pin the chosen file under whisper/models/<name>/<file> so a single repo can ship q4_0/q5_0/q8_0 side-by-side without colliding on disk, matching the llama-cpp/models/<name>/ layout. The fallback when no preference matches is the last available ggml file, mirroring llama-cpp's pickPreferredGroup behaviour. Tests: replace the previous probe spec with positive assertions against LocalAI-io/whisper-large-v3-it-yodas-only-ggml (default → ggml-model-q5_0.bin, quantizations=q4_0 → ggml-model-q4_0.bin) plus two offline specs that build a fake hfapi.ModelDetails to cover the fallback rule and non-ggml filtering without touching the network. Assisted-by: Claude:claude-opus-4-7 [Bash Read Edit WebFetch] Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-05-16 20:52:08 -04:00 · 2026-05-01 12:03:07 +02:00
parent 0b0078047f
commit 8452068f43
2 changed files with 156 additions and 16 deletions
--- a/core/gallery/importers/whisper.go
+++ b/core/gallery/importers/whisper.go
@@ -9,6 +9,7 @@ import (
 	"github.com/mudler/LocalAI/core/gallery"
 	"github.com/mudler/LocalAI/core/schema"
 	"github.com/mudler/LocalAI/pkg/downloader"
+	hfapi "github.com/mudler/LocalAI/pkg/huggingface-api"
 	"go.yaml.in/yaml/v2"
 )

@@ -42,8 +43,7 @@ func (i *WhisperImporter) Match(details Details) bool {
 	}

 	// Direct URL or path ending in ggml-*.bin
-	base := filepath.Base(details.URI)
-	if strings.HasPrefix(base, "ggml-") && strings.HasSuffix(strings.ToLower(base), ".bin") {
+	if isGGMLFilename(filepath.Base(details.URI)) {
 		return true
 	}

@@ -76,6 +76,12 @@ func (i *WhisperImporter) Import(details Details) (gallery.ModelConfig, error) {
 		description = "Imported from " + details.URI
 	}

+	preferredQuants, _ := preferencesMap["quantizations"].(string)
+	quants := []string{"q5_0"}
+	if preferredQuants != "" {
+		quants = strings.Split(preferredQuants, ",")
+	}
+
 	cfg := gallery.ModelConfig{
 		Name:        name,
 		Description: description,
@@ -89,37 +95,43 @@ func (i *WhisperImporter) Import(details Details) (gallery.ModelConfig, error) {
 	}

 	uri := downloader.URI(details.URI)
+	directGGML := isGGMLFilename(filepath.Base(details.URI))
 	switch {
-	case uri.LooksLikeURL():
+	case uri.LooksLikeURL() && directGGML:
+		// Direct file URL (e.g. .../resolve/main/ggml-base.en.bin). We
+		// already know the exact file the user wants — no quant pick.
 		fileName, err := uri.FilenameFromUrl()
 		if err != nil {
 			return gallery.ModelConfig{}, err
 		}
+		target := filepath.Join("whisper", "models", name, fileName)
 		cfg.Files = append(cfg.Files, gallery.File{
 			URI:      details.URI,
-			Filename: fileName,
+			Filename: target,
 		})
 		modelConfig.PredictionOptions = schema.PredictionOptions{
-			BasicModelRequest: schema.BasicModelRequest{Model: fileName},
+			BasicModelRequest: schema.BasicModelRequest{Model: target},
 		}
 	case details.HuggingFace != nil:
+		// HF repo: collect every ggml-*.bin, pick the preferred quant
+		// (default q5_0), nest under whisper/models/<name>/ so the same
+		// repo can ship multiple quants without colliding on disk.
+		var ggmlFiles []hfapi.ModelFile
 		for _, f := range details.HuggingFace.Files {
-			base := filepath.Base(f.Path)
-			if !strings.HasPrefix(base, "ggml-") {
-				continue
-			}
-			if !strings.HasSuffix(strings.ToLower(base), ".bin") {
-				continue
+			if isGGMLFilename(filepath.Base(f.Path)) {
+				ggmlFiles = append(ggmlFiles, f)
 			}
+		}
+		if chosen, ok := pickPreferredGGMLFile(ggmlFiles, quants); ok {
+			target := filepath.Join("whisper", "models", name, filepath.Base(chosen.Path))
 			cfg.Files = append(cfg.Files, gallery.File{
-				URI:      f.URL,
-				Filename: base,
-				SHA256:   f.SHA256,
+				URI:      chosen.URL,
+				Filename: target,
+				SHA256:   chosen.SHA256,
 			})
 			modelConfig.PredictionOptions = schema.PredictionOptions{
-				BasicModelRequest: schema.BasicModelRequest{Model: base},
+				BasicModelRequest: schema.BasicModelRequest{Model: target},
 			}
-			break
 		}
 	default:
 		// Bare URI with no HF metadata (pref-only path). Point the config at
@@ -137,3 +149,30 @@ func (i *WhisperImporter) Import(details Details) (gallery.ModelConfig, error) {

 	return cfg, nil
 }
+
+// isGGMLFilename returns true when name follows whisper.cpp's "ggml-*.bin"
+// packaging convention. The .bin check is case-insensitive; the ggml- prefix
+// is exact.
+func isGGMLFilename(name string) bool {
+	return strings.HasPrefix(name, "ggml-") && strings.HasSuffix(strings.ToLower(name), ".bin")
+}
+
+// pickPreferredGGMLFile walks prefs in order and returns the first ggml file
+// whose basename contains any preference token (case-insensitive match on the
+// quant suffix, e.g. "q5_0"). When no preference matches, falls back to the
+// last file — mirroring llama-cpp's pickPreferredGroup behaviour so a missing
+// quant still yields *something* the user can run.
+func pickPreferredGGMLFile(files []hfapi.ModelFile, prefs []string) (hfapi.ModelFile, bool) {
+	if len(files) == 0 {
+		return hfapi.ModelFile{}, false
+	}
+	for _, pref := range prefs {
+		lower := strings.ToLower(pref)
+		for _, f := range files {
+			if strings.Contains(strings.ToLower(filepath.Base(f.Path)), lower) {
+				return f, true
+			}
+		}
+	}
+	return files[len(files)-1], true
+}
--- a/core/gallery/importers/whisper_test.go
+++ b/core/gallery/importers/whisper_test.go
@@ -5,6 +5,7 @@ import (
 	"fmt"

 	"github.com/mudler/LocalAI/core/gallery/importers"
+	hfapi "github.com/mudler/LocalAI/pkg/huggingface-api"
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
 )
@@ -36,6 +37,106 @@ var _ = Describe("WhisperImporter", func() {
 		})
 	})

+	// Real-world repo that ships *multiple* ggml-*.bin quantizations
+	// (ggml-model-q4_0.bin, ggml-model-q5_0.bin, ggml-model-q8_0.bin).
+	// We assert the importer (a) follows the HF metadata branch — not the
+	// URL branch — when given the repo URL, (b) lays files out under
+	// whisper/models/<name>/ like llama-cpp does, and (c) honours the
+	// quantizations preference, defaulting to q5_0.
+	Context("real-world multi-quant repo: LocalAI-io/whisper-large-v3-it-yodas-only-ggml", func() {
+		const (
+			uri  = "https://huggingface.co/LocalAI-io/whisper-large-v3-it-yodas-only-ggml"
+			name = "whisper-large-v3-it-yodas-only-ggml"
+		)
+
+		It("defaults to q5_0 and nests the file under whisper/models/<name>/", func() {
+			modelConfig, err := importers.DiscoverModelConfig(uri, json.RawMessage(`{}`))
+
+			Expect(err).ToNot(HaveOccurred(), fmt.Sprintf("Error: %v", err))
+			Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: whisper"))
+			Expect(modelConfig.ConfigFile).To(ContainSubstring("transcript"))
+
+			Expect(modelConfig.Files).To(HaveLen(1), fmt.Sprintf("Model config: %+v", modelConfig))
+
+			expectedPath := "whisper/models/" + name + "/ggml-model-q5_0.bin"
+			Expect(modelConfig.Files[0].Filename).To(Equal(expectedPath))
+			Expect(modelConfig.Files[0].URI).To(Equal(uri + "/resolve/main/ggml-model-q5_0.bin"))
+			Expect(modelConfig.Files[0].SHA256).ToNot(BeEmpty(), "HF metadata should provide a sha256")
+			Expect(modelConfig.ConfigFile).To(ContainSubstring("model: " + expectedPath))
+		})
+
+		It("honours preferences.quantizations=q4_0 to pick ggml-model-q4_0.bin", func() {
+			modelConfig, err := importers.DiscoverModelConfig(uri, json.RawMessage(`{"quantizations":"q4_0"}`))
+
+			Expect(err).ToNot(HaveOccurred())
+			Expect(modelConfig.Files).To(HaveLen(1))
+
+			expectedPath := "whisper/models/" + name + "/ggml-model-q4_0.bin"
+			Expect(modelConfig.Files[0].Filename).To(Equal(expectedPath))
+			Expect(modelConfig.ConfigFile).To(ContainSubstring("model: " + expectedPath))
+		})
+	})
+
+	Context("Import from HuggingFace file listing (offline)", func() {
+		// Mirror of llama-cpp_test.go's offline HF context: build a fake
+		// *hfapi.ModelDetails and assert the emitted gallery entry without
+		// touching the network.
+		const repoBase = "https://huggingface.co/acme/example-ggml/resolve/main/"
+
+		hfFile := func(path, sha string) hfapi.ModelFile {
+			return hfapi.ModelFile{
+				Path:   path,
+				SHA256: sha,
+				URL:    repoBase + path,
+			}
+		}
+
+		withHF := func(preferences string, files ...hfapi.ModelFile) importers.Details {
+			d := importers.Details{
+				URI: "https://huggingface.co/acme/example-ggml",
+				HuggingFace: &hfapi.ModelDetails{
+					ModelID: "acme/example-ggml",
+					Files:   files,
+				},
+			}
+			if preferences != "" {
+				d.Preferences = json.RawMessage(preferences)
+			}
+			return d
+		}
+
+		It("falls back to the last ggml file when no preference matches", func() {
+			imp := &importers.WhisperImporter{}
+			details := withHF(`{"name":"example"}`,
+				hfFile("ggml-model-q4_0.bin", "aaa"),
+				hfFile("ggml-model-q8_0.bin", "ccc"),
+				hfFile("README.md", ""),
+			)
+
+			modelConfig, err := imp.Import(details)
+			Expect(err).ToNot(HaveOccurred())
+			Expect(modelConfig.Files).To(HaveLen(1))
+			// Default pref is q5_0; repo has only q4_0 and q8_0 — fallback
+			// is the last ggml entry, mirroring llama-cpp's behaviour.
+			Expect(modelConfig.Files[0].Filename).To(Equal("whisper/models/example/ggml-model-q8_0.bin"))
+			Expect(modelConfig.Files[0].SHA256).To(Equal("ccc"))
+		})
+
+		It("ignores non-ggml files in the repo listing", func() {
+			imp := &importers.WhisperImporter{}
+			details := withHF(`{"name":"noise","quantizations":"q5_0"}`,
+				hfFile("README.md", ""),
+				hfFile("config.json", ""),
+				hfFile("ggml-model-q5_0.bin", "bbb"),
+			)
+
+			modelConfig, err := imp.Import(details)
+			Expect(err).ToNot(HaveOccurred())
+			Expect(modelConfig.Files).To(HaveLen(1))
+			Expect(modelConfig.Files[0].Filename).To(Equal("whisper/models/noise/ggml-model-q5_0.bin"))
+		})
+	})
+
 	Context("Importer interface metadata", func() {
 		It("exposes name/modality/autodetect", func() {
 			imp := &importers.WhisperImporter{}