diff --git a/core/gallery/importers/whisper.go b/core/gallery/importers/whisper.go index 8c46363ed..49007be98 100644 --- a/core/gallery/importers/whisper.go +++ b/core/gallery/importers/whisper.go @@ -9,6 +9,7 @@ import ( "github.com/mudler/LocalAI/core/gallery" "github.com/mudler/LocalAI/core/schema" "github.com/mudler/LocalAI/pkg/downloader" + hfapi "github.com/mudler/LocalAI/pkg/huggingface-api" "go.yaml.in/yaml/v2" ) @@ -42,8 +43,7 @@ func (i *WhisperImporter) Match(details Details) bool { } // Direct URL or path ending in ggml-*.bin - base := filepath.Base(details.URI) - if strings.HasPrefix(base, "ggml-") && strings.HasSuffix(strings.ToLower(base), ".bin") { + if isGGMLFilename(filepath.Base(details.URI)) { return true } @@ -76,6 +76,12 @@ func (i *WhisperImporter) Import(details Details) (gallery.ModelConfig, error) { description = "Imported from " + details.URI } + preferredQuants, _ := preferencesMap["quantizations"].(string) + quants := []string{"q5_0"} + if preferredQuants != "" { + quants = strings.Split(preferredQuants, ",") + } + cfg := gallery.ModelConfig{ Name: name, Description: description, @@ -89,37 +95,43 @@ func (i *WhisperImporter) Import(details Details) (gallery.ModelConfig, error) { } uri := downloader.URI(details.URI) + directGGML := isGGMLFilename(filepath.Base(details.URI)) switch { - case uri.LooksLikeURL(): + case uri.LooksLikeURL() && directGGML: + // Direct file URL (e.g. .../resolve/main/ggml-base.en.bin). We + // already know the exact file the user wants — no quant pick. fileName, err := uri.FilenameFromUrl() if err != nil { return gallery.ModelConfig{}, err } + target := filepath.Join("whisper", "models", name, fileName) cfg.Files = append(cfg.Files, gallery.File{ URI: details.URI, - Filename: fileName, + Filename: target, }) modelConfig.PredictionOptions = schema.PredictionOptions{ - BasicModelRequest: schema.BasicModelRequest{Model: fileName}, + BasicModelRequest: schema.BasicModelRequest{Model: target}, } case details.HuggingFace != nil: + // HF repo: collect every ggml-*.bin, pick the preferred quant + // (default q5_0), nest under whisper/models// so the same + // repo can ship multiple quants without colliding on disk. + var ggmlFiles []hfapi.ModelFile for _, f := range details.HuggingFace.Files { - base := filepath.Base(f.Path) - if !strings.HasPrefix(base, "ggml-") { - continue - } - if !strings.HasSuffix(strings.ToLower(base), ".bin") { - continue + if isGGMLFilename(filepath.Base(f.Path)) { + ggmlFiles = append(ggmlFiles, f) } + } + if chosen, ok := pickPreferredGGMLFile(ggmlFiles, quants); ok { + target := filepath.Join("whisper", "models", name, filepath.Base(chosen.Path)) cfg.Files = append(cfg.Files, gallery.File{ - URI: f.URL, - Filename: base, - SHA256: f.SHA256, + URI: chosen.URL, + Filename: target, + SHA256: chosen.SHA256, }) modelConfig.PredictionOptions = schema.PredictionOptions{ - BasicModelRequest: schema.BasicModelRequest{Model: base}, + BasicModelRequest: schema.BasicModelRequest{Model: target}, } - break } default: // Bare URI with no HF metadata (pref-only path). Point the config at @@ -137,3 +149,30 @@ func (i *WhisperImporter) Import(details Details) (gallery.ModelConfig, error) { return cfg, nil } + +// isGGMLFilename returns true when name follows whisper.cpp's "ggml-*.bin" +// packaging convention. The .bin check is case-insensitive; the ggml- prefix +// is exact. +func isGGMLFilename(name string) bool { + return strings.HasPrefix(name, "ggml-") && strings.HasSuffix(strings.ToLower(name), ".bin") +} + +// pickPreferredGGMLFile walks prefs in order and returns the first ggml file +// whose basename contains any preference token (case-insensitive match on the +// quant suffix, e.g. "q5_0"). When no preference matches, falls back to the +// last file — mirroring llama-cpp's pickPreferredGroup behaviour so a missing +// quant still yields *something* the user can run. +func pickPreferredGGMLFile(files []hfapi.ModelFile, prefs []string) (hfapi.ModelFile, bool) { + if len(files) == 0 { + return hfapi.ModelFile{}, false + } + for _, pref := range prefs { + lower := strings.ToLower(pref) + for _, f := range files { + if strings.Contains(strings.ToLower(filepath.Base(f.Path)), lower) { + return f, true + } + } + } + return files[len(files)-1], true +} diff --git a/core/gallery/importers/whisper_test.go b/core/gallery/importers/whisper_test.go index 03ee07aa0..48c5d452f 100644 --- a/core/gallery/importers/whisper_test.go +++ b/core/gallery/importers/whisper_test.go @@ -5,6 +5,7 @@ import ( "fmt" "github.com/mudler/LocalAI/core/gallery/importers" + hfapi "github.com/mudler/LocalAI/pkg/huggingface-api" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" ) @@ -36,6 +37,106 @@ var _ = Describe("WhisperImporter", func() { }) }) + // Real-world repo that ships *multiple* ggml-*.bin quantizations + // (ggml-model-q4_0.bin, ggml-model-q5_0.bin, ggml-model-q8_0.bin). + // We assert the importer (a) follows the HF metadata branch — not the + // URL branch — when given the repo URL, (b) lays files out under + // whisper/models// like llama-cpp does, and (c) honours the + // quantizations preference, defaulting to q5_0. + Context("real-world multi-quant repo: LocalAI-io/whisper-large-v3-it-yodas-only-ggml", func() { + const ( + uri = "https://huggingface.co/LocalAI-io/whisper-large-v3-it-yodas-only-ggml" + name = "whisper-large-v3-it-yodas-only-ggml" + ) + + It("defaults to q5_0 and nests the file under whisper/models//", func() { + modelConfig, err := importers.DiscoverModelConfig(uri, json.RawMessage(`{}`)) + + Expect(err).ToNot(HaveOccurred(), fmt.Sprintf("Error: %v", err)) + Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: whisper")) + Expect(modelConfig.ConfigFile).To(ContainSubstring("transcript")) + + Expect(modelConfig.Files).To(HaveLen(1), fmt.Sprintf("Model config: %+v", modelConfig)) + + expectedPath := "whisper/models/" + name + "/ggml-model-q5_0.bin" + Expect(modelConfig.Files[0].Filename).To(Equal(expectedPath)) + Expect(modelConfig.Files[0].URI).To(Equal(uri + "/resolve/main/ggml-model-q5_0.bin")) + Expect(modelConfig.Files[0].SHA256).ToNot(BeEmpty(), "HF metadata should provide a sha256") + Expect(modelConfig.ConfigFile).To(ContainSubstring("model: " + expectedPath)) + }) + + It("honours preferences.quantizations=q4_0 to pick ggml-model-q4_0.bin", func() { + modelConfig, err := importers.DiscoverModelConfig(uri, json.RawMessage(`{"quantizations":"q4_0"}`)) + + Expect(err).ToNot(HaveOccurred()) + Expect(modelConfig.Files).To(HaveLen(1)) + + expectedPath := "whisper/models/" + name + "/ggml-model-q4_0.bin" + Expect(modelConfig.Files[0].Filename).To(Equal(expectedPath)) + Expect(modelConfig.ConfigFile).To(ContainSubstring("model: " + expectedPath)) + }) + }) + + Context("Import from HuggingFace file listing (offline)", func() { + // Mirror of llama-cpp_test.go's offline HF context: build a fake + // *hfapi.ModelDetails and assert the emitted gallery entry without + // touching the network. + const repoBase = "https://huggingface.co/acme/example-ggml/resolve/main/" + + hfFile := func(path, sha string) hfapi.ModelFile { + return hfapi.ModelFile{ + Path: path, + SHA256: sha, + URL: repoBase + path, + } + } + + withHF := func(preferences string, files ...hfapi.ModelFile) importers.Details { + d := importers.Details{ + URI: "https://huggingface.co/acme/example-ggml", + HuggingFace: &hfapi.ModelDetails{ + ModelID: "acme/example-ggml", + Files: files, + }, + } + if preferences != "" { + d.Preferences = json.RawMessage(preferences) + } + return d + } + + It("falls back to the last ggml file when no preference matches", func() { + imp := &importers.WhisperImporter{} + details := withHF(`{"name":"example"}`, + hfFile("ggml-model-q4_0.bin", "aaa"), + hfFile("ggml-model-q8_0.bin", "ccc"), + hfFile("README.md", ""), + ) + + modelConfig, err := imp.Import(details) + Expect(err).ToNot(HaveOccurred()) + Expect(modelConfig.Files).To(HaveLen(1)) + // Default pref is q5_0; repo has only q4_0 and q8_0 — fallback + // is the last ggml entry, mirroring llama-cpp's behaviour. + Expect(modelConfig.Files[0].Filename).To(Equal("whisper/models/example/ggml-model-q8_0.bin")) + Expect(modelConfig.Files[0].SHA256).To(Equal("ccc")) + }) + + It("ignores non-ggml files in the repo listing", func() { + imp := &importers.WhisperImporter{} + details := withHF(`{"name":"noise","quantizations":"q5_0"}`, + hfFile("README.md", ""), + hfFile("config.json", ""), + hfFile("ggml-model-q5_0.bin", "bbb"), + ) + + modelConfig, err := imp.Import(details) + Expect(err).ToNot(HaveOccurred()) + Expect(modelConfig.Files).To(HaveLen(1)) + Expect(modelConfig.Files[0].Filename).To(Equal("whisper/models/noise/ggml-model-q5_0.bin")) + }) + }) + Context("Importer interface metadata", func() { It("exposes name/modality/autodetect", func() { imp := &importers.WhisperImporter{}