mirror of
https://github.com/mudler/LocalAI.git
synced 2026-05-30 03:25:42 -04:00
feat(parakeet-cpp): L4 gallery importer for parakeet GGUFs
Add ParakeetCppImporter so parakeet.cpp GGUFs auto-detect on /import-model and route to the parakeet-cpp backend (it also surfaces in /backends/known, which drives the import dropdown). - Match is narrow: a .gguf whose name carries a parakeet architecture token (<arch>-<size>-<quant>.gguf, e.g. tdt_ctc-110m-f16.gguf, rnnt-0.6b-q4_k.gguf, realtime_eou_120m-v1-q8_0.gguf), a direct URL to one, or preferences.backend="parakeet-cpp". It deliberately does NOT claim arbitrary llama-style GGUFs, nor the upstream nvidia/parakeet-* NeMo repos (.nemo, not runnable here). - Registered in the ASR batch BEFORE LlamaCPPImporter so its GGUFs aren't swallowed by the generic .gguf importer. - Import nests files under parakeet-cpp/models/<name>/, defaults to the smallest quant (q4_k, near-lossless on parakeet) with a size-ladder fallback, and honours preferences.quantizations / name / description. Tested with synthetic HF details (no network): metadata, positive matches (HF repo, direct URL, preference), narrowness negatives (llama GGUF, NeMo repo), and import (default quant, override, direct URL), 9 specs pass, build/vet/gofmt clean. Assisted-by: Claude:claude-opus-4-8 [Claude Code]
This commit is contained in:
@@ -115,6 +115,10 @@ var defaultImporters = []Importer{
|
||||
&NemoImporter{},
|
||||
&FasterWhisperImporter{},
|
||||
&QwenASRImporter{},
|
||||
// ParakeetCppImporter matches only parakeet GGUFs (<arch>-<size>-<quant>.gguf);
|
||||
// kept ahead of LlamaCPPImporter so its .gguf bundles aren't claimed by the
|
||||
// generic GGUF importer.
|
||||
&ParakeetCppImporter{},
|
||||
// TTS (Batch 2)
|
||||
&PiperImporter{},
|
||||
&BarkImporter{},
|
||||
|
||||
180
core/gallery/importers/parakeet-cpp.go
Normal file
180
core/gallery/importers/parakeet-cpp.go
Normal file
@@ -0,0 +1,180 @@
|
||||
package importers
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/gallery"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
"github.com/mudler/LocalAI/pkg/downloader"
|
||||
hfapi "github.com/mudler/LocalAI/pkg/huggingface-api"
|
||||
"go.yaml.in/yaml/v2"
|
||||
)
|
||||
|
||||
var _ Importer = &ParakeetCppImporter{}
|
||||
|
||||
// ParakeetCppImporter recognises parakeet.cpp GGUF weights — the C++/ggml port
|
||||
// of NVIDIA NeMo Parakeet. The signal is narrow on purpose: parakeet.cpp names
|
||||
// its weights "<arch>-<size>-<quant>.gguf" (e.g. tdt_ctc-110m-f16.gguf,
|
||||
// rnnt-0.6b-q4_k.gguf, realtime_eou_120m-v1-q8_0.gguf), so we only match a
|
||||
// .gguf whose name carries a parakeet architecture token. That keeps us from
|
||||
// claiming arbitrary llama-style GGUFs (the importer is registered before
|
||||
// llama-cpp), and it deliberately does NOT match the upstream nvidia/parakeet-*
|
||||
// NeMo repos (which ship .nemo checkpoints, not runnable GGUFs).
|
||||
// preferences.backend="parakeet-cpp" forces the importer regardless.
|
||||
type ParakeetCppImporter struct{}
|
||||
|
||||
func (i *ParakeetCppImporter) Name() string { return "parakeet-cpp" }
|
||||
func (i *ParakeetCppImporter) Modality() string { return "asr" }
|
||||
func (i *ParakeetCppImporter) AutoDetects() bool { return true }
|
||||
|
||||
func (i *ParakeetCppImporter) Match(details Details) bool {
|
||||
preferences, err := details.Preferences.MarshalJSON()
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
preferencesMap := make(map[string]any)
|
||||
if len(preferences) > 0 {
|
||||
if err := json.Unmarshal(preferences, &preferencesMap); err != nil {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
if b, ok := preferencesMap["backend"].(string); ok && b == "parakeet-cpp" {
|
||||
return true
|
||||
}
|
||||
|
||||
// Direct URL or path to a parakeet GGUF.
|
||||
if isParakeetGGUF(filepath.Base(details.URI)) {
|
||||
return true
|
||||
}
|
||||
|
||||
// HF repo shipping at least one parakeet GGUF.
|
||||
if details.HuggingFace != nil {
|
||||
for _, f := range details.HuggingFace.Files {
|
||||
if isParakeetGGUF(filepath.Base(f.Path)) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func (i *ParakeetCppImporter) Import(details Details) (gallery.ModelConfig, error) {
|
||||
preferences, err := details.Preferences.MarshalJSON()
|
||||
if err != nil {
|
||||
return gallery.ModelConfig{}, err
|
||||
}
|
||||
preferencesMap := make(map[string]any)
|
||||
if len(preferences) > 0 {
|
||||
if err := json.Unmarshal(preferences, &preferencesMap); err != nil {
|
||||
return gallery.ModelConfig{}, err
|
||||
}
|
||||
}
|
||||
|
||||
name, ok := preferencesMap["name"].(string)
|
||||
if !ok {
|
||||
name = filepath.Base(details.URI)
|
||||
}
|
||||
|
||||
description, ok := preferencesMap["description"].(string)
|
||||
if !ok {
|
||||
description = "Imported from " + details.URI
|
||||
}
|
||||
|
||||
// parakeet quants are near-lossless even at Q4_K (WER 0.0 vs NeMo on 110m),
|
||||
// so default to the smallest, then fall back up the size ladder; the last
|
||||
// file wins if none match (mirrors whisper / llama-cpp).
|
||||
preferredQuants, _ := preferencesMap["quantizations"].(string)
|
||||
quants := []string{"q4_k", "q5_k", "q6_k", "q8_0", "f16"}
|
||||
if preferredQuants != "" {
|
||||
quants = strings.Split(preferredQuants, ",")
|
||||
}
|
||||
|
||||
cfg := gallery.ModelConfig{
|
||||
Name: name,
|
||||
Description: description,
|
||||
}
|
||||
|
||||
modelConfig := config.ModelConfig{
|
||||
Name: name,
|
||||
Description: description,
|
||||
Backend: "parakeet-cpp",
|
||||
KnownUsecaseStrings: []string{"transcript"},
|
||||
}
|
||||
|
||||
uri := downloader.URI(details.URI)
|
||||
directGGUF := isParakeetGGUF(filepath.Base(details.URI))
|
||||
switch {
|
||||
case uri.LooksLikeURL() && directGGUF:
|
||||
// Direct file URL (e.g. .../resolve/main/tdt_ctc-110m-f16.gguf). The
|
||||
// exact file is known — no quant pick.
|
||||
fileName, err := uri.FilenameFromUrl()
|
||||
if err != nil {
|
||||
return gallery.ModelConfig{}, err
|
||||
}
|
||||
target := filepath.Join("parakeet-cpp", "models", name, fileName)
|
||||
cfg.Files = append(cfg.Files, gallery.File{
|
||||
URI: details.URI,
|
||||
Filename: target,
|
||||
})
|
||||
modelConfig.PredictionOptions = schema.PredictionOptions{
|
||||
BasicModelRequest: schema.BasicModelRequest{Model: target},
|
||||
}
|
||||
case details.HuggingFace != nil:
|
||||
// HF repo: collect every parakeet GGUF, pick the preferred quant, and
|
||||
// nest under parakeet-cpp/models/<name>/ so a multi-quant repo doesn't
|
||||
// collide on disk.
|
||||
var ggufFiles []hfapi.ModelFile
|
||||
for _, f := range details.HuggingFace.Files {
|
||||
if isParakeetGGUF(filepath.Base(f.Path)) {
|
||||
ggufFiles = append(ggufFiles, f)
|
||||
}
|
||||
}
|
||||
if chosen, ok := pickPreferredGGMLFile(ggufFiles, quants); ok {
|
||||
target := filepath.Join("parakeet-cpp", "models", name, filepath.Base(chosen.Path))
|
||||
cfg.Files = append(cfg.Files, gallery.File{
|
||||
URI: chosen.URL,
|
||||
Filename: target,
|
||||
SHA256: chosen.SHA256,
|
||||
})
|
||||
modelConfig.PredictionOptions = schema.PredictionOptions{
|
||||
BasicModelRequest: schema.BasicModelRequest{Model: target},
|
||||
}
|
||||
}
|
||||
default:
|
||||
// Bare URI with no HF metadata (pref-only path): point at the basename
|
||||
// so users can tweak the YAML after import.
|
||||
modelConfig.PredictionOptions = schema.PredictionOptions{
|
||||
BasicModelRequest: schema.BasicModelRequest{Model: filepath.Base(details.URI)},
|
||||
}
|
||||
}
|
||||
|
||||
data, err := yaml.Marshal(modelConfig)
|
||||
if err != nil {
|
||||
return gallery.ModelConfig{}, err
|
||||
}
|
||||
cfg.ConfigFile = string(data)
|
||||
|
||||
return cfg, nil
|
||||
}
|
||||
|
||||
// isParakeetGGUF reports whether name is a parakeet.cpp GGUF: a .gguf file
|
||||
// whose name carries a parakeet architecture token. The .gguf check is
|
||||
// case-insensitive; the tokens cover the published naming
|
||||
// (<arch>-<size>-<quant>.gguf) plus a generic "parakeet" fallback.
|
||||
func isParakeetGGUF(name string) bool {
|
||||
lower := strings.ToLower(name)
|
||||
if !strings.HasSuffix(lower, ".gguf") {
|
||||
return false
|
||||
}
|
||||
for _, tok := range []string{"tdt_ctc", "tdt-", "tdt_", "rnnt", "ctc-", "ctc_", "realtime_eou", "parakeet"} {
|
||||
if strings.Contains(lower, tok) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
103
core/gallery/importers/parakeet-cpp_test.go
Normal file
103
core/gallery/importers/parakeet-cpp_test.go
Normal file
@@ -0,0 +1,103 @@
|
||||
package importers_test
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
|
||||
"github.com/mudler/LocalAI/core/gallery/importers"
|
||||
hfapi "github.com/mudler/LocalAI/pkg/huggingface-api"
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
// hfWith builds Details carrying a synthetic HF file list so detection can be
|
||||
// exercised without hitting the network.
|
||||
func parakeetDetails(uri string, prefs string, files ...hfapi.ModelFile) importers.Details {
|
||||
return importers.Details{
|
||||
URI: uri,
|
||||
Preferences: json.RawMessage(prefs),
|
||||
HuggingFace: &hfapi.ModelDetails{Files: files},
|
||||
}
|
||||
}
|
||||
|
||||
var _ = Describe("ParakeetCppImporter", func() {
|
||||
imp := &importers.ParakeetCppImporter{}
|
||||
|
||||
Context("Importer interface metadata", func() {
|
||||
It("exposes name/modality/autodetect", func() {
|
||||
Expect(imp.Name()).To(Equal("parakeet-cpp"))
|
||||
Expect(imp.Modality()).To(Equal("asr"))
|
||||
Expect(imp.AutoDetects()).To(BeTrue())
|
||||
})
|
||||
})
|
||||
|
||||
Context("detection (Match)", func() {
|
||||
It("matches an HF repo shipping a parakeet GGUF", func() {
|
||||
d := parakeetDetails("huggingface://mudler/parakeet-cpp-gguf", `{}`,
|
||||
hfapi.ModelFile{Path: "tdt_ctc-110m-f16.gguf"},
|
||||
hfapi.ModelFile{Path: "README.md"},
|
||||
)
|
||||
Expect(imp.Match(d)).To(BeTrue())
|
||||
})
|
||||
|
||||
It("matches a direct URL to a parakeet GGUF", func() {
|
||||
d := parakeetDetails("https://huggingface.co/mudler/parakeet-cpp-gguf/resolve/main/rnnt-0.6b-q4_k.gguf", `{}`)
|
||||
Expect(imp.Match(d)).To(BeTrue())
|
||||
})
|
||||
|
||||
It("honours preferences.backend=parakeet-cpp for arbitrary URIs", func() {
|
||||
d := parakeetDetails("https://example.com/whatever", `{"backend": "parakeet-cpp"}`)
|
||||
Expect(imp.Match(d)).To(BeTrue())
|
||||
})
|
||||
|
||||
It("does NOT claim a generic llama-style GGUF", func() {
|
||||
d := parakeetDetails("huggingface://someorg/some-llm-gguf", `{}`,
|
||||
hfapi.ModelFile{Path: "llama-3-8b-instruct-q4_k_m.gguf"},
|
||||
)
|
||||
Expect(imp.Match(d)).To(BeFalse())
|
||||
})
|
||||
|
||||
It("does NOT claim the upstream NeMo repo (.nemo, no GGUF)", func() {
|
||||
d := parakeetDetails("huggingface://nvidia/parakeet-tdt_ctc-110m", `{}`,
|
||||
hfapi.ModelFile{Path: "parakeet-tdt_ctc-110m.nemo"},
|
||||
)
|
||||
Expect(imp.Match(d)).To(BeFalse())
|
||||
})
|
||||
})
|
||||
|
||||
Context("import (Import)", func() {
|
||||
It("picks the default quant (q4_k) from a multi-quant HF repo", func() {
|
||||
d := parakeetDetails("huggingface://mudler/parakeet-cpp-gguf", `{"name":"parakeet-110m"}`,
|
||||
hfapi.ModelFile{Path: "tdt_ctc-110m-f16.gguf", URL: "https://hf/f16", SHA256: "aaa"},
|
||||
hfapi.ModelFile{Path: "tdt_ctc-110m-q4_k.gguf", URL: "https://hf/q4k", SHA256: "bbb"},
|
||||
hfapi.ModelFile{Path: "tdt_ctc-110m-q8_0.gguf", URL: "https://hf/q8", SHA256: "ccc"},
|
||||
)
|
||||
cfg, err := imp.Import(d)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(cfg.ConfigFile).To(ContainSubstring("backend: parakeet-cpp"), fmt.Sprintf("%+v", cfg))
|
||||
Expect(cfg.ConfigFile).To(ContainSubstring("transcript"))
|
||||
Expect(cfg.Files).To(HaveLen(1))
|
||||
Expect(cfg.Files[0].URI).To(Equal("https://hf/q4k"), "default quant should be q4_k")
|
||||
Expect(cfg.Files[0].Filename).To(ContainSubstring("parakeet-cpp/models/parakeet-110m/tdt_ctc-110m-q4_k.gguf"))
|
||||
})
|
||||
|
||||
It("honours a preferred quantization override", func() {
|
||||
d := parakeetDetails("huggingface://mudler/parakeet-cpp-gguf", `{"name":"p","quantizations":"q8_0"}`,
|
||||
hfapi.ModelFile{Path: "tdt_ctc-110m-f16.gguf", URL: "https://hf/f16"},
|
||||
hfapi.ModelFile{Path: "tdt_ctc-110m-q8_0.gguf", URL: "https://hf/q8"},
|
||||
)
|
||||
cfg, err := imp.Import(d)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(cfg.Files).To(HaveLen(1))
|
||||
Expect(cfg.Files[0].URI).To(Equal("https://hf/q8"))
|
||||
})
|
||||
|
||||
It("uses the exact file for a direct GGUF URL", func() {
|
||||
d := parakeetDetails("https://huggingface.co/mudler/parakeet-cpp-gguf/resolve/main/ctc-0.6b-q5_k.gguf", `{"name":"ctc"}`)
|
||||
cfg, err := imp.Import(d)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(cfg.Files).To(HaveLen(1))
|
||||
Expect(cfg.Files[0].Filename).To(ContainSubstring("parakeet-cpp/models/ctc/ctc-0.6b-q5_k.gguf"))
|
||||
})
|
||||
})
|
||||
})
|
||||
Reference in New Issue
Block a user