feat(parakeet-cpp): L4 gallery importer for parakeet GGUFs

Add ParakeetCppImporter so parakeet.cpp GGUFs auto-detect on /import-model
and route to the parakeet-cpp backend (it also surfaces in /backends/known,
which drives the import dropdown).

- Match is narrow: a .gguf whose name carries a parakeet architecture token
  (<arch>-<size>-<quant>.gguf, e.g. tdt_ctc-110m-f16.gguf, rnnt-0.6b-q4_k.gguf,
  realtime_eou_120m-v1-q8_0.gguf), a direct URL to one, or
  preferences.backend="parakeet-cpp". It deliberately does NOT claim arbitrary
  llama-style GGUFs, nor the upstream nvidia/parakeet-* NeMo repos (.nemo, not
  runnable here).
- Registered in the ASR batch BEFORE LlamaCPPImporter so its GGUFs aren't
  swallowed by the generic .gguf importer.
- Import nests files under parakeet-cpp/models/<name>/, defaults to the
  smallest quant (q4_k, near-lossless on parakeet) with a size-ladder
  fallback, and honours preferences.quantizations / name / description.

Tested with synthetic HF details (no network): metadata, positive matches
(HF repo, direct URL, preference), narrowness negatives (llama GGUF, NeMo
repo), and import (default quant, override, direct URL), 9 specs pass,
build/vet/gofmt clean.

Assisted-by: Claude:claude-opus-4-8 [Claude Code]
This commit is contained in:
Ettore Di Giacinto
2026-05-29 22:12:03 +00:00
parent 37bb5c0b97
commit 7fe2d7b4ac
3 changed files with 287 additions and 0 deletions

View File

@@ -115,6 +115,10 @@ var defaultImporters = []Importer{
&NemoImporter{},
&FasterWhisperImporter{},
&QwenASRImporter{},
// ParakeetCppImporter matches only parakeet GGUFs (<arch>-<size>-<quant>.gguf);
// kept ahead of LlamaCPPImporter so its .gguf bundles aren't claimed by the
// generic GGUF importer.
&ParakeetCppImporter{},
// TTS (Batch 2)
&PiperImporter{},
&BarkImporter{},

View File

@@ -0,0 +1,180 @@
package importers
import (
"encoding/json"
"path/filepath"
"strings"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/gallery"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/pkg/downloader"
hfapi "github.com/mudler/LocalAI/pkg/huggingface-api"
"go.yaml.in/yaml/v2"
)
var _ Importer = &ParakeetCppImporter{}
// ParakeetCppImporter recognises parakeet.cpp GGUF weights — the C++/ggml port
// of NVIDIA NeMo Parakeet. The signal is narrow on purpose: parakeet.cpp names
// its weights "<arch>-<size>-<quant>.gguf" (e.g. tdt_ctc-110m-f16.gguf,
// rnnt-0.6b-q4_k.gguf, realtime_eou_120m-v1-q8_0.gguf), so we only match a
// .gguf whose name carries a parakeet architecture token. That keeps us from
// claiming arbitrary llama-style GGUFs (the importer is registered before
// llama-cpp), and it deliberately does NOT match the upstream nvidia/parakeet-*
// NeMo repos (which ship .nemo checkpoints, not runnable GGUFs).
// preferences.backend="parakeet-cpp" forces the importer regardless.
type ParakeetCppImporter struct{}
func (i *ParakeetCppImporter) Name() string { return "parakeet-cpp" }
func (i *ParakeetCppImporter) Modality() string { return "asr" }
func (i *ParakeetCppImporter) AutoDetects() bool { return true }
func (i *ParakeetCppImporter) Match(details Details) bool {
preferences, err := details.Preferences.MarshalJSON()
if err != nil {
return false
}
preferencesMap := make(map[string]any)
if len(preferences) > 0 {
if err := json.Unmarshal(preferences, &preferencesMap); err != nil {
return false
}
}
if b, ok := preferencesMap["backend"].(string); ok && b == "parakeet-cpp" {
return true
}
// Direct URL or path to a parakeet GGUF.
if isParakeetGGUF(filepath.Base(details.URI)) {
return true
}
// HF repo shipping at least one parakeet GGUF.
if details.HuggingFace != nil {
for _, f := range details.HuggingFace.Files {
if isParakeetGGUF(filepath.Base(f.Path)) {
return true
}
}
}
return false
}
func (i *ParakeetCppImporter) Import(details Details) (gallery.ModelConfig, error) {
preferences, err := details.Preferences.MarshalJSON()
if err != nil {
return gallery.ModelConfig{}, err
}
preferencesMap := make(map[string]any)
if len(preferences) > 0 {
if err := json.Unmarshal(preferences, &preferencesMap); err != nil {
return gallery.ModelConfig{}, err
}
}
name, ok := preferencesMap["name"].(string)
if !ok {
name = filepath.Base(details.URI)
}
description, ok := preferencesMap["description"].(string)
if !ok {
description = "Imported from " + details.URI
}
// parakeet quants are near-lossless even at Q4_K (WER 0.0 vs NeMo on 110m),
// so default to the smallest, then fall back up the size ladder; the last
// file wins if none match (mirrors whisper / llama-cpp).
preferredQuants, _ := preferencesMap["quantizations"].(string)
quants := []string{"q4_k", "q5_k", "q6_k", "q8_0", "f16"}
if preferredQuants != "" {
quants = strings.Split(preferredQuants, ",")
}
cfg := gallery.ModelConfig{
Name: name,
Description: description,
}
modelConfig := config.ModelConfig{
Name: name,
Description: description,
Backend: "parakeet-cpp",
KnownUsecaseStrings: []string{"transcript"},
}
uri := downloader.URI(details.URI)
directGGUF := isParakeetGGUF(filepath.Base(details.URI))
switch {
case uri.LooksLikeURL() && directGGUF:
// Direct file URL (e.g. .../resolve/main/tdt_ctc-110m-f16.gguf). The
// exact file is known — no quant pick.
fileName, err := uri.FilenameFromUrl()
if err != nil {
return gallery.ModelConfig{}, err
}
target := filepath.Join("parakeet-cpp", "models", name, fileName)
cfg.Files = append(cfg.Files, gallery.File{
URI: details.URI,
Filename: target,
})
modelConfig.PredictionOptions = schema.PredictionOptions{
BasicModelRequest: schema.BasicModelRequest{Model: target},
}
case details.HuggingFace != nil:
// HF repo: collect every parakeet GGUF, pick the preferred quant, and
// nest under parakeet-cpp/models/<name>/ so a multi-quant repo doesn't
// collide on disk.
var ggufFiles []hfapi.ModelFile
for _, f := range details.HuggingFace.Files {
if isParakeetGGUF(filepath.Base(f.Path)) {
ggufFiles = append(ggufFiles, f)
}
}
if chosen, ok := pickPreferredGGMLFile(ggufFiles, quants); ok {
target := filepath.Join("parakeet-cpp", "models", name, filepath.Base(chosen.Path))
cfg.Files = append(cfg.Files, gallery.File{
URI: chosen.URL,
Filename: target,
SHA256: chosen.SHA256,
})
modelConfig.PredictionOptions = schema.PredictionOptions{
BasicModelRequest: schema.BasicModelRequest{Model: target},
}
}
default:
// Bare URI with no HF metadata (pref-only path): point at the basename
// so users can tweak the YAML after import.
modelConfig.PredictionOptions = schema.PredictionOptions{
BasicModelRequest: schema.BasicModelRequest{Model: filepath.Base(details.URI)},
}
}
data, err := yaml.Marshal(modelConfig)
if err != nil {
return gallery.ModelConfig{}, err
}
cfg.ConfigFile = string(data)
return cfg, nil
}
// isParakeetGGUF reports whether name is a parakeet.cpp GGUF: a .gguf file
// whose name carries a parakeet architecture token. The .gguf check is
// case-insensitive; the tokens cover the published naming
// (<arch>-<size>-<quant>.gguf) plus a generic "parakeet" fallback.
func isParakeetGGUF(name string) bool {
lower := strings.ToLower(name)
if !strings.HasSuffix(lower, ".gguf") {
return false
}
for _, tok := range []string{"tdt_ctc", "tdt-", "tdt_", "rnnt", "ctc-", "ctc_", "realtime_eou", "parakeet"} {
if strings.Contains(lower, tok) {
return true
}
}
return false
}

View File

@@ -0,0 +1,103 @@
package importers_test
import (
"encoding/json"
"fmt"
"github.com/mudler/LocalAI/core/gallery/importers"
hfapi "github.com/mudler/LocalAI/pkg/huggingface-api"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
// hfWith builds Details carrying a synthetic HF file list so detection can be
// exercised without hitting the network.
func parakeetDetails(uri string, prefs string, files ...hfapi.ModelFile) importers.Details {
return importers.Details{
URI: uri,
Preferences: json.RawMessage(prefs),
HuggingFace: &hfapi.ModelDetails{Files: files},
}
}
var _ = Describe("ParakeetCppImporter", func() {
imp := &importers.ParakeetCppImporter{}
Context("Importer interface metadata", func() {
It("exposes name/modality/autodetect", func() {
Expect(imp.Name()).To(Equal("parakeet-cpp"))
Expect(imp.Modality()).To(Equal("asr"))
Expect(imp.AutoDetects()).To(BeTrue())
})
})
Context("detection (Match)", func() {
It("matches an HF repo shipping a parakeet GGUF", func() {
d := parakeetDetails("huggingface://mudler/parakeet-cpp-gguf", `{}`,
hfapi.ModelFile{Path: "tdt_ctc-110m-f16.gguf"},
hfapi.ModelFile{Path: "README.md"},
)
Expect(imp.Match(d)).To(BeTrue())
})
It("matches a direct URL to a parakeet GGUF", func() {
d := parakeetDetails("https://huggingface.co/mudler/parakeet-cpp-gguf/resolve/main/rnnt-0.6b-q4_k.gguf", `{}`)
Expect(imp.Match(d)).To(BeTrue())
})
It("honours preferences.backend=parakeet-cpp for arbitrary URIs", func() {
d := parakeetDetails("https://example.com/whatever", `{"backend": "parakeet-cpp"}`)
Expect(imp.Match(d)).To(BeTrue())
})
It("does NOT claim a generic llama-style GGUF", func() {
d := parakeetDetails("huggingface://someorg/some-llm-gguf", `{}`,
hfapi.ModelFile{Path: "llama-3-8b-instruct-q4_k_m.gguf"},
)
Expect(imp.Match(d)).To(BeFalse())
})
It("does NOT claim the upstream NeMo repo (.nemo, no GGUF)", func() {
d := parakeetDetails("huggingface://nvidia/parakeet-tdt_ctc-110m", `{}`,
hfapi.ModelFile{Path: "parakeet-tdt_ctc-110m.nemo"},
)
Expect(imp.Match(d)).To(BeFalse())
})
})
Context("import (Import)", func() {
It("picks the default quant (q4_k) from a multi-quant HF repo", func() {
d := parakeetDetails("huggingface://mudler/parakeet-cpp-gguf", `{"name":"parakeet-110m"}`,
hfapi.ModelFile{Path: "tdt_ctc-110m-f16.gguf", URL: "https://hf/f16", SHA256: "aaa"},
hfapi.ModelFile{Path: "tdt_ctc-110m-q4_k.gguf", URL: "https://hf/q4k", SHA256: "bbb"},
hfapi.ModelFile{Path: "tdt_ctc-110m-q8_0.gguf", URL: "https://hf/q8", SHA256: "ccc"},
)
cfg, err := imp.Import(d)
Expect(err).ToNot(HaveOccurred())
Expect(cfg.ConfigFile).To(ContainSubstring("backend: parakeet-cpp"), fmt.Sprintf("%+v", cfg))
Expect(cfg.ConfigFile).To(ContainSubstring("transcript"))
Expect(cfg.Files).To(HaveLen(1))
Expect(cfg.Files[0].URI).To(Equal("https://hf/q4k"), "default quant should be q4_k")
Expect(cfg.Files[0].Filename).To(ContainSubstring("parakeet-cpp/models/parakeet-110m/tdt_ctc-110m-q4_k.gguf"))
})
It("honours a preferred quantization override", func() {
d := parakeetDetails("huggingface://mudler/parakeet-cpp-gguf", `{"name":"p","quantizations":"q8_0"}`,
hfapi.ModelFile{Path: "tdt_ctc-110m-f16.gguf", URL: "https://hf/f16"},
hfapi.ModelFile{Path: "tdt_ctc-110m-q8_0.gguf", URL: "https://hf/q8"},
)
cfg, err := imp.Import(d)
Expect(err).ToNot(HaveOccurred())
Expect(cfg.Files).To(HaveLen(1))
Expect(cfg.Files[0].URI).To(Equal("https://hf/q8"))
})
It("uses the exact file for a direct GGUF URL", func() {
d := parakeetDetails("https://huggingface.co/mudler/parakeet-cpp-gguf/resolve/main/ctc-0.6b-q5_k.gguf", `{"name":"ctc"}`)
cfg, err := imp.Import(d)
Expect(err).ToNot(HaveOccurred())
Expect(cfg.Files).To(HaveLen(1))
Expect(cfg.Files[0].Filename).To(ContainSubstring("parakeet-cpp/models/ctc/ctc-0.6b-q5_k.gguf"))
})
})
})