diff --git a/core/gallery/importers/whisper.go b/core/gallery/importers/whisper.go
index 8c46363ed..49007be98 100644
--- a/core/gallery/importers/whisper.go
+++ b/core/gallery/importers/whisper.go
@@ -9,6 +9,7 @@ import (
 	"github.com/mudler/LocalAI/core/gallery"
 	"github.com/mudler/LocalAI/core/schema"
 	"github.com/mudler/LocalAI/pkg/downloader"
+	hfapi "github.com/mudler/LocalAI/pkg/huggingface-api"
 	"go.yaml.in/yaml/v2"
 )
 
@@ -42,8 +43,7 @@ func (i *WhisperImporter) Match(details Details) bool {
 	}
 
 	// Direct URL or path ending in ggml-*.bin
-	base := filepath.Base(details.URI)
-	if strings.HasPrefix(base, "ggml-") && strings.HasSuffix(strings.ToLower(base), ".bin") {
+	if isGGMLFilename(filepath.Base(details.URI)) {
 		return true
 	}
 
@@ -76,6 +76,12 @@ func (i *WhisperImporter) Import(details Details) (gallery.ModelConfig, error) {
 		description = "Imported from " + details.URI
 	}
 
+	preferredQuants, _ := preferencesMap["quantizations"].(string)
+	quants := []string{"q5_0"}
+	if preferredQuants != "" {
+		quants = strings.Split(preferredQuants, ",")
+	}
+
 	cfg := gallery.ModelConfig{
 		Name:        name,
 		Description: description,
@@ -89,37 +95,43 @@ func (i *WhisperImporter) Import(details Details) (gallery.ModelConfig, error) {
 	}
 
 	uri := downloader.URI(details.URI)
+	directGGML := isGGMLFilename(filepath.Base(details.URI))
 	switch {
-	case uri.LooksLikeURL():
+	case uri.LooksLikeURL() && directGGML:
+		// Direct file URL (e.g. .../resolve/main/ggml-base.en.bin). We
+		// already know the exact file the user wants — no quant pick.
 		fileName, err := uri.FilenameFromUrl()
 		if err != nil {
 			return gallery.ModelConfig{}, err
 		}
+		target := filepath.Join("whisper", "models", name, fileName)
 		cfg.Files = append(cfg.Files, gallery.File{
 			URI:      details.URI,
-			Filename: fileName,
+			Filename: target,
 		})
 		modelConfig.PredictionOptions = schema.PredictionOptions{
-			BasicModelRequest: schema.BasicModelRequest{Model: fileName},
+			BasicModelRequest: schema.BasicModelRequest{Model: target},
 		}
 	case details.HuggingFace != nil:
+		// HF repo: collect every ggml-*.bin, pick the preferred quant
+		// (default q5_0), nest under whisper/models/<name>/ so the same
+		// repo can ship multiple quants without colliding on disk.
+		var ggmlFiles []hfapi.ModelFile
 		for _, f := range details.HuggingFace.Files {
-			base := filepath.Base(f.Path)
-			if !strings.HasPrefix(base, "ggml-") {
-				continue
-			}
-			if !strings.HasSuffix(strings.ToLower(base), ".bin") {
-				continue
+			if isGGMLFilename(filepath.Base(f.Path)) {
+				ggmlFiles = append(ggmlFiles, f)
 			}
+		}
+		if chosen, ok := pickPreferredGGMLFile(ggmlFiles, quants); ok {
+			target := filepath.Join("whisper", "models", name, filepath.Base(chosen.Path))
 			cfg.Files = append(cfg.Files, gallery.File{
-				URI:      f.URL,
-				Filename: base,
-				SHA256:   f.SHA256,
+				URI:      chosen.URL,
+				Filename: target,
+				SHA256:   chosen.SHA256,
 			})
 			modelConfig.PredictionOptions = schema.PredictionOptions{
-				BasicModelRequest: schema.BasicModelRequest{Model: base},
+				BasicModelRequest: schema.BasicModelRequest{Model: target},
 			}
-			break
 		}
 	default:
 		// Bare URI with no HF metadata (pref-only path). Point the config at
@@ -137,3 +149,30 @@ func (i *WhisperImporter) Import(details Details) (gallery.ModelConfig, error) {
 
 	return cfg, nil
 }
+
+// isGGMLFilename returns true when name follows whisper.cpp's "ggml-*.bin"
+// packaging convention. The .bin check is case-insensitive; the ggml- prefix
+// is exact.
+func isGGMLFilename(name string) bool {
+	return strings.HasPrefix(name, "ggml-") && strings.HasSuffix(strings.ToLower(name), ".bin")
+}
+
+// pickPreferredGGMLFile walks prefs in order and returns the first ggml file
+// whose basename contains any preference token (case-insensitive match on the
+// quant suffix, e.g. "q5_0"). When no preference matches, falls back to the
+// last file — mirroring llama-cpp's pickPreferredGroup behaviour so a missing
+// quant still yields *something* the user can run.
+func pickPreferredGGMLFile(files []hfapi.ModelFile, prefs []string) (hfapi.ModelFile, bool) {
+	if len(files) == 0 {
+		return hfapi.ModelFile{}, false
+	}
+	for _, pref := range prefs {
+		lower := strings.ToLower(pref)
+		for _, f := range files {
+			if strings.Contains(strings.ToLower(filepath.Base(f.Path)), lower) {
+				return f, true
+			}
+		}
+	}
+	return files[len(files)-1], true
+}
diff --git a/core/gallery/importers/whisper_test.go b/core/gallery/importers/whisper_test.go
index 03ee07aa0..48c5d452f 100644
--- a/core/gallery/importers/whisper_test.go
+++ b/core/gallery/importers/whisper_test.go
@@ -5,6 +5,7 @@ import (
 	"fmt"
 
 	"github.com/mudler/LocalAI/core/gallery/importers"
+	hfapi "github.com/mudler/LocalAI/pkg/huggingface-api"
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
 )
@@ -36,6 +37,106 @@ var _ = Describe("WhisperImporter", func() {
 		})
 	})
 
+	// Real-world repo that ships *multiple* ggml-*.bin quantizations
+	// (ggml-model-q4_0.bin, ggml-model-q5_0.bin, ggml-model-q8_0.bin).
+	// We assert the importer (a) follows the HF metadata branch — not the
+	// URL branch — when given the repo URL, (b) lays files out under
+	// whisper/models/<name>/ like llama-cpp does, and (c) honours the
+	// quantizations preference, defaulting to q5_0.
+	Context("real-world multi-quant repo: LocalAI-io/whisper-large-v3-it-yodas-only-ggml", func() {
+		const (
+			uri  = "https://huggingface.co/LocalAI-io/whisper-large-v3-it-yodas-only-ggml"
+			name = "whisper-large-v3-it-yodas-only-ggml"
+		)
+
+		It("defaults to q5_0 and nests the file under whisper/models/<name>/", func() {
+			modelConfig, err := importers.DiscoverModelConfig(uri, json.RawMessage(`{}`))
+
+			Expect(err).ToNot(HaveOccurred(), fmt.Sprintf("Error: %v", err))
+			Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: whisper"))
+			Expect(modelConfig.ConfigFile).To(ContainSubstring("transcript"))
+
+			Expect(modelConfig.Files).To(HaveLen(1), fmt.Sprintf("Model config: %+v", modelConfig))
+
+			expectedPath := "whisper/models/" + name + "/ggml-model-q5_0.bin"
+			Expect(modelConfig.Files[0].Filename).To(Equal(expectedPath))
+			Expect(modelConfig.Files[0].URI).To(Equal(uri + "/resolve/main/ggml-model-q5_0.bin"))
+			Expect(modelConfig.Files[0].SHA256).ToNot(BeEmpty(), "HF metadata should provide a sha256")
+			Expect(modelConfig.ConfigFile).To(ContainSubstring("model: " + expectedPath))
+		})
+
+		It("honours preferences.quantizations=q4_0 to pick ggml-model-q4_0.bin", func() {
+			modelConfig, err := importers.DiscoverModelConfig(uri, json.RawMessage(`{"quantizations":"q4_0"}`))
+
+			Expect(err).ToNot(HaveOccurred())
+			Expect(modelConfig.Files).To(HaveLen(1))
+
+			expectedPath := "whisper/models/" + name + "/ggml-model-q4_0.bin"
+			Expect(modelConfig.Files[0].Filename).To(Equal(expectedPath))
+			Expect(modelConfig.ConfigFile).To(ContainSubstring("model: " + expectedPath))
+		})
+	})
+
+	Context("Import from HuggingFace file listing (offline)", func() {
+		// Mirror of llama-cpp_test.go's offline HF context: build a fake
+		// *hfapi.ModelDetails and assert the emitted gallery entry without
+		// touching the network.
+		const repoBase = "https://huggingface.co/acme/example-ggml/resolve/main/"
+
+		hfFile := func(path, sha string) hfapi.ModelFile {
+			return hfapi.ModelFile{
+				Path:   path,
+				SHA256: sha,
+				URL:    repoBase + path,
+			}
+		}
+
+		withHF := func(preferences string, files ...hfapi.ModelFile) importers.Details {
+			d := importers.Details{
+				URI: "https://huggingface.co/acme/example-ggml",
+				HuggingFace: &hfapi.ModelDetails{
+					ModelID: "acme/example-ggml",
+					Files:   files,
+				},
+			}
+			if preferences != "" {
+				d.Preferences = json.RawMessage(preferences)
+			}
+			return d
+		}
+
+		It("falls back to the last ggml file when no preference matches", func() {
+			imp := &importers.WhisperImporter{}
+			details := withHF(`{"name":"example"}`,
+				hfFile("ggml-model-q4_0.bin", "aaa"),
+				hfFile("ggml-model-q8_0.bin", "ccc"),
+				hfFile("README.md", ""),
+			)
+
+			modelConfig, err := imp.Import(details)
+			Expect(err).ToNot(HaveOccurred())
+			Expect(modelConfig.Files).To(HaveLen(1))
+			// Default pref is q5_0; repo has only q4_0 and q8_0 — fallback
+			// is the last ggml entry, mirroring llama-cpp's behaviour.
+			Expect(modelConfig.Files[0].Filename).To(Equal("whisper/models/example/ggml-model-q8_0.bin"))
+			Expect(modelConfig.Files[0].SHA256).To(Equal("ccc"))
+		})
+
+		It("ignores non-ggml files in the repo listing", func() {
+			imp := &importers.WhisperImporter{}
+			details := withHF(`{"name":"noise","quantizations":"q5_0"}`,
+				hfFile("README.md", ""),
+				hfFile("config.json", ""),
+				hfFile("ggml-model-q5_0.bin", "bbb"),
+			)
+
+			modelConfig, err := imp.Import(details)
+			Expect(err).ToNot(HaveOccurred())
+			Expect(modelConfig.Files).To(HaveLen(1))
+			Expect(modelConfig.Files[0].Filename).To(Equal("whisper/models/noise/ggml-model-q5_0.bin"))
+		})
+	})
+
 	Context("Importer interface metadata", func() {
 		It("exposes name/modality/autodetect", func() {
 			imp := &importers.WhisperImporter{}