From 8616397d59d701842ce42be5c7cedb8eac851dc3 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 12 Feb 2026 18:01:42 +0100 Subject: [PATCH] chore(model gallery): add nemo-asr (#8533) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/gallery/index.yaml b/gallery/index.yaml index 872496170..bf47fcb8f 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -1,6 +1,30 @@ --- +- name: nemo-parakeet-tdt-0.6b + license: apache-2.0 + url: "github:mudler/LocalAI/gallery/virtual.yaml@master" + description: | + NVIDIA NeMo Parakeet TDT 0.6B v3 is an automatic speech recognition (ASR) model from NVIDIA's NeMo toolkit. Parakeet models are state-of-the-art ASR models trained on large-scale English audio data. + urls: + - https://huggingface.co/nvidia/parakeet-tdt-0.6b-v3 + - https://github.com/NVIDIA/NeMo + tags: + - stt + - speech-to-text + - asr + - nvidia + - nemo + - parakeet + - cpu + - gpu + overrides: + backend: nemo + known_usecases: + - transcript + parameters: + model: nvidia/parakeet-tdt-0.6b-v3 - name: voxtral-mini-4b-realtime license: apache-2.0 + url: "github:mudler/LocalAI/gallery/virtual.yaml@master" description: | Voxtral Mini 4B Realtime is a speech-to-text model from Mistral AI. It is a 4B parameter model optimized for fast, accurate audio transcription with low latency, making it ideal for real-time applications. The model uses the Voxtral architecture for efficient audio processing. urls: @@ -13,11 +37,10 @@ - cpu - metal - mistral - known_usecases: - - sound_generation - - tts overrides: backend: voxtral + known_usecases: + - transcript parameters: model: voxtral-model files: