feat(parakeet-cpp): L3 register backend in build/CI/gallery (whisper parity)

Wire the new Go gRPC parakeet-cpp backend (parakeet.cpp ggml port of NVIDIA
NeMo Parakeet ASR) into LocalAI's build/CI/gallery surfaces, matching the
existing ggml whisper Go backend 1:1.

- .github/backend-matrix.yml: add 11 linux entries + 1 darwin entry mirroring
  every whisper build (cpu amd64/arm64, intel sycl f32/f16, vulkan amd64/arm64,
  nvidia cuda-12, nvidia cuda-13, nvidia-l4t-arm64, nvidia-l4t-cuda-13-arm64,
  rocm hipblas, metal-darwin-arm64), all on ./backend/Dockerfile.golang with
  backend: "parakeet-cpp" and -*-parakeet-cpp tag-suffixes.
- scripts/changed-backends.js: explicit inferBackendPath branch resolving
  parakeet-cpp to backend/go/parakeet-cpp/ before the generic golang branch.
- .github/workflows/bump_deps.yaml: track the PARAKEET_VERSION pin in
  backend/go/parakeet-cpp/Makefile (repo mudler/parakeet.cpp, branch master).
- backend/index.yaml: add &parakeetcpp meta + latest/development image entries
  for every matrix tag-suffix.
- Makefile: add backends/parakeet-cpp to .NOTPARALLEL, BACKEND_PARAKEET_CPP
  definition, docker-build target eval, and test-extra-backend-parakeet-cpp-
  transcription target (mirrors test-extra-backend-whisper-transcription).

Assisted-by: Claude:claude-opus-4-8 [Claude Code]
This commit is contained in:
Ettore Di Giacinto
2026-05-29 21:58:30 +00:00
parent 62539dfc85
commit 37bb5c0b97
5 changed files with 324 additions and 1 deletions

View File

@@ -716,6 +716,19 @@ include:
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2404'
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "8"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12-parakeet-cpp'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:24.04"
skip-drivers: 'false'
backend: "parakeet-cpp"
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2404'
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "8"
@@ -1556,6 +1569,19 @@ include:
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2404'
- build-type: 'cublas'
cuda-major-version: "13"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-13-parakeet-cpp'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:24.04"
skip-drivers: 'false'
backend: "parakeet-cpp"
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2404'
- build-type: 'cublas'
cuda-major-version: "13"
cuda-minor-version: "0"
@@ -1569,6 +1595,19 @@ include:
backend: "whisper"
dockerfile: "./backend/Dockerfile.golang"
context: "./"
- build-type: 'cublas'
cuda-major-version: "13"
cuda-minor-version: "0"
platforms: 'linux/arm64'
skip-drivers: 'false'
tag-latest: 'auto'
tag-suffix: '-nvidia-l4t-cuda-13-arm64-parakeet-cpp'
base-image: "ubuntu:24.04"
ubuntu-version: '2404'
runs-on: 'ubuntu-24.04-arm'
backend: "parakeet-cpp"
dockerfile: "./backend/Dockerfile.golang"
context: "./"
- build-type: 'cublas'
cuda-major-version: "13"
cuda-minor-version: "0"
@@ -2944,6 +2983,115 @@ include:
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2404'
# parakeet-cpp
- build-type: ''
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
platform-tag: 'amd64'
tag-latest: 'auto'
tag-suffix: '-cpu-parakeet-cpp'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:24.04"
skip-drivers: 'false'
backend: "parakeet-cpp"
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2404'
- build-type: ''
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/arm64'
platform-tag: 'arm64'
tag-latest: 'auto'
tag-suffix: '-cpu-parakeet-cpp'
runs-on: 'ubuntu-24.04-arm'
base-image: "ubuntu:24.04"
skip-drivers: 'false'
backend: "parakeet-cpp"
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2404'
- build-type: 'sycl_f32'
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-intel-sycl-f32-parakeet-cpp'
runs-on: 'ubuntu-latest'
base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
skip-drivers: 'false'
backend: "parakeet-cpp"
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2404'
- build-type: 'sycl_f16'
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-intel-sycl-f16-parakeet-cpp'
runs-on: 'ubuntu-latest'
base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
skip-drivers: 'false'
backend: "parakeet-cpp"
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2404'
- build-type: 'vulkan'
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
platform-tag: 'amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-vulkan-parakeet-cpp'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:24.04"
skip-drivers: 'false'
backend: "parakeet-cpp"
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2404'
- build-type: 'vulkan'
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/arm64'
platform-tag: 'arm64'
tag-latest: 'auto'
tag-suffix: '-gpu-vulkan-parakeet-cpp'
runs-on: 'ubuntu-24.04-arm'
base-image: "ubuntu:24.04"
skip-drivers: 'false'
backend: "parakeet-cpp"
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2404'
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "0"
platforms: 'linux/arm64'
skip-drivers: 'false'
tag-latest: 'auto'
tag-suffix: '-nvidia-l4t-arm64-parakeet-cpp'
base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
runs-on: 'ubuntu-24.04-arm'
backend: "parakeet-cpp"
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2204'
- build-type: 'hipblas'
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-rocm-hipblas-parakeet-cpp'
base-image: "rocm/dev-ubuntu-24.04:7.2.1"
runs-on: 'ubuntu-latest'
skip-drivers: 'false'
backend: "parakeet-cpp"
dockerfile: "./backend/Dockerfile.golang"
context: "./"
ubuntu-version: '2404'
# acestep-cpp
- build-type: ''
cuda-major-version: ""
@@ -3976,6 +4124,10 @@ includeDarwin:
tag-suffix: "-metal-darwin-arm64-whisper"
build-type: "metal"
lang: "go"
- backend: "parakeet-cpp"
tag-suffix: "-metal-darwin-arm64-parakeet-cpp"
build-type: "metal"
lang: "go"
- backend: "acestep-cpp"
tag-suffix: "-metal-darwin-arm64-acestep-cpp"
build-type: "metal"

View File

@@ -30,6 +30,10 @@ jobs:
variable: "WHISPER_CPP_VERSION"
branch: "master"
file: "backend/go/whisper/Makefile"
- repository: "mudler/parakeet.cpp"
variable: "PARAKEET_VERSION"
branch: "master"
file: "backend/go/parakeet-cpp/Makefile"
- repository: "leejet/stable-diffusion.cpp"
variable: "STABLEDIFFUSION_GGML_VERSION"
branch: "master"

View File

@@ -1,5 +1,5 @@
# Disable parallel execution for backend builds
.NOTPARALLEL: backends/diffusers backends/llama-cpp backends/turboquant backends/outetts backends/piper backends/stablediffusion-ggml backends/whisper backends/faster-whisper backends/silero-vad backends/local-store backends/huggingface backends/rfdetr backends/rfdetr-cpp backends/insightface backends/speaker-recognition backends/kitten-tts backends/kokoro backends/chatterbox backends/llama-cpp-darwin backends/neutts build-darwin-python-backend build-darwin-go-backend backends/mlx backends/diffuser-darwin backends/mlx-vlm backends/mlx-audio backends/mlx-distributed backends/stablediffusion-ggml-darwin backends/vllm backends/vllm-omni backends/sglang backends/moonshine backends/pocket-tts backends/qwen-tts backends/faster-qwen3-tts backends/qwen-asr backends/nemo backends/voxcpm backends/whisperx backends/ace-step backends/acestep-cpp backends/fish-speech backends/voxtral backends/opus backends/trl backends/llama-cpp-quantization backends/kokoros backends/sam3-cpp backends/qwen3-tts-cpp backends/vibevoice-cpp backends/localvqe backends/tinygrad backends/sherpa-onnx backends/ds4 backends/ds4-darwin backends/liquid-audio
.NOTPARALLEL: backends/diffusers backends/llama-cpp backends/turboquant backends/outetts backends/piper backends/stablediffusion-ggml backends/whisper backends/parakeet-cpp backends/faster-whisper backends/silero-vad backends/local-store backends/huggingface backends/rfdetr backends/rfdetr-cpp backends/insightface backends/speaker-recognition backends/kitten-tts backends/kokoro backends/chatterbox backends/llama-cpp-darwin backends/neutts build-darwin-python-backend build-darwin-go-backend backends/mlx backends/diffuser-darwin backends/mlx-vlm backends/mlx-audio backends/mlx-distributed backends/stablediffusion-ggml-darwin backends/vllm backends/vllm-omni backends/sglang backends/moonshine backends/pocket-tts backends/qwen-tts backends/faster-qwen3-tts backends/qwen-asr backends/nemo backends/voxcpm backends/whisperx backends/ace-step backends/acestep-cpp backends/fish-speech backends/voxtral backends/opus backends/trl backends/llama-cpp-quantization backends/kokoros backends/sam3-cpp backends/qwen3-tts-cpp backends/vibevoice-cpp backends/localvqe backends/tinygrad backends/sherpa-onnx backends/ds4 backends/ds4-darwin backends/liquid-audio
GOCMD=go
GOTEST=$(GOCMD) test
@@ -991,6 +991,19 @@ test-extra-backend-whisper-transcription: docker-build-whisper
BACKEND_TEST_CAPS=health,load,transcription \
$(MAKE) test-extra-backend
## Audio transcription wrapper for the parakeet-cpp (parakeet.cpp ggml port)
## backend. Mirrors test-extra-backend-whisper-transcription: drives the
## AudioTranscription / AudioTranscriptionStream RPCs against a published
## Parakeet GGUF using the JFK 11s clip from whisper.cpp's CI samples. Not
## part of the default test suite - run explicitly once the pinned model URL
## is reachable.
test-extra-backend-parakeet-cpp-transcription: docker-build-parakeet-cpp
BACKEND_IMAGE=local-ai-backend:parakeet-cpp \
BACKEND_TEST_MODEL_URL=https://huggingface.co/mudler/parakeet-cpp-gguf/resolve/main/tdt_ctc-110m-f16.gguf \
BACKEND_TEST_AUDIO_URL=https://github.com/ggml-org/whisper.cpp/raw/master/samples/jfk.wav \
BACKEND_TEST_CAPS=health,load,transcription \
$(MAKE) test-extra-backend
## LocalVQE audio transform (joint AEC + noise suppression + dereverb).
## Exercises the audio_transform capability end-to-end: batch transform
## of a real WAV fixture and bidi streaming of synthetic silent frames.
@@ -1149,6 +1162,7 @@ BACKEND_HUGGINGFACE = huggingface|golang|.|false|true
BACKEND_SILERO_VAD = silero-vad|golang|.|false|true
BACKEND_STABLEDIFFUSION_GGML = stablediffusion-ggml|golang|.|--progress=plain|true
BACKEND_WHISPER = whisper|golang|.|false|true
BACKEND_PARAKEET_CPP = parakeet-cpp|golang|.|false|true
BACKEND_VOXTRAL = voxtral|golang|.|false|true
BACKEND_ACESTEP_CPP = acestep-cpp|golang|.|false|true
BACKEND_QWEN3_TTS_CPP = qwen3-tts-cpp|golang|.|false|true
@@ -1236,6 +1250,7 @@ $(eval $(call generate-docker-build-target,$(BACKEND_HUGGINGFACE)))
$(eval $(call generate-docker-build-target,$(BACKEND_SILERO_VAD)))
$(eval $(call generate-docker-build-target,$(BACKEND_STABLEDIFFUSION_GGML)))
$(eval $(call generate-docker-build-target,$(BACKEND_WHISPER)))
$(eval $(call generate-docker-build-target,$(BACKEND_PARAKEET_CPP)))
$(eval $(call generate-docker-build-target,$(BACKEND_VOXTRAL)))
$(eval $(call generate-docker-build-target,$(BACKEND_OPUS)))
$(eval $(call generate-docker-build-target,$(BACKEND_RERANKERS)))

View File

@@ -122,6 +122,35 @@
nvidia-cuda-12: "cuda12-whisper"
nvidia-l4t-cuda-12: "nvidia-l4t-arm64-whisper"
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-whisper"
- &parakeetcpp
name: "parakeet-cpp"
alias: "parakeet-cpp"
license: mit
icon: https://avatars.githubusercontent.com/u/95302084
description: |
parakeet.cpp is a C++/ggml port of NVIDIA NeMo Parakeet automatic speech recognition (ASR) models.
It supports the tdt, ctc, rnnt and hybrid decoder families as well as cache-aware streaming transcription,
and runs on CPU, NVIDIA CUDA, AMD ROCm/HIP, Intel SYCL and NVIDIA Jetson (L4T) targets.
urls:
- https://github.com/mudler/parakeet.cpp
tags:
- audio-transcription
- CPU
- GPU
- CUDA
- HIP
capabilities:
default: "cpu-parakeet-cpp"
nvidia: "cuda12-parakeet-cpp"
intel: "intel-sycl-f16-parakeet-cpp"
metal: "metal-parakeet-cpp"
amd: "rocm-parakeet-cpp"
vulkan: "vulkan-parakeet-cpp"
nvidia-l4t: "nvidia-l4t-arm64-parakeet-cpp"
nvidia-cuda-13: "cuda13-parakeet-cpp"
nvidia-cuda-12: "cuda12-parakeet-cpp"
nvidia-l4t-cuda-12: "nvidia-l4t-arm64-parakeet-cpp"
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-parakeet-cpp"
- &voxtral
name: "voxtral"
alias: "voxtral"
@@ -1928,6 +1957,121 @@
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-whisper"
mirrors:
- localai/localai-backends:master-gpu-nvidia-cuda-13-whisper
## parakeet-cpp
- !!merge <<: *parakeetcpp
name: "parakeet-cpp-development"
capabilities:
default: "cpu-parakeet-cpp-development"
nvidia: "cuda12-parakeet-cpp-development"
intel: "intel-sycl-f16-parakeet-cpp-development"
metal: "metal-parakeet-cpp-development"
amd: "rocm-parakeet-cpp-development"
vulkan: "vulkan-parakeet-cpp-development"
nvidia-l4t: "nvidia-l4t-arm64-parakeet-cpp-development"
nvidia-cuda-13: "cuda13-parakeet-cpp-development"
nvidia-cuda-12: "cuda12-parakeet-cpp-development"
nvidia-l4t-cuda-12: "nvidia-l4t-arm64-parakeet-cpp-development"
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-parakeet-cpp-development"
- !!merge <<: *parakeetcpp
name: "nvidia-l4t-arm64-parakeet-cpp"
uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-arm64-parakeet-cpp"
mirrors:
- localai/localai-backends:latest-nvidia-l4t-arm64-parakeet-cpp
- !!merge <<: *parakeetcpp
name: "nvidia-l4t-arm64-parakeet-cpp-development"
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-arm64-parakeet-cpp"
mirrors:
- localai/localai-backends:master-nvidia-l4t-arm64-parakeet-cpp
- !!merge <<: *parakeetcpp
name: "cuda13-nvidia-l4t-arm64-parakeet-cpp"
uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-cuda-13-arm64-parakeet-cpp"
mirrors:
- localai/localai-backends:latest-nvidia-l4t-cuda-13-arm64-parakeet-cpp
- !!merge <<: *parakeetcpp
name: "cuda13-nvidia-l4t-arm64-parakeet-cpp-development"
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-cuda-13-arm64-parakeet-cpp"
mirrors:
- localai/localai-backends:master-nvidia-l4t-cuda-13-arm64-parakeet-cpp
- !!merge <<: *parakeetcpp
name: "cpu-parakeet-cpp"
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-parakeet-cpp"
mirrors:
- localai/localai-backends:latest-cpu-parakeet-cpp
- !!merge <<: *parakeetcpp
name: "cpu-parakeet-cpp-development"
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-parakeet-cpp"
mirrors:
- localai/localai-backends:master-cpu-parakeet-cpp
- !!merge <<: *parakeetcpp
name: "metal-parakeet-cpp"
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-parakeet-cpp"
mirrors:
- localai/localai-backends:latest-metal-darwin-arm64-parakeet-cpp
- !!merge <<: *parakeetcpp
name: "metal-parakeet-cpp-development"
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-parakeet-cpp"
mirrors:
- localai/localai-backends:master-metal-darwin-arm64-parakeet-cpp
- !!merge <<: *parakeetcpp
name: "cuda12-parakeet-cpp"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-parakeet-cpp"
mirrors:
- localai/localai-backends:latest-gpu-nvidia-cuda-12-parakeet-cpp
- !!merge <<: *parakeetcpp
name: "cuda12-parakeet-cpp-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-parakeet-cpp"
mirrors:
- localai/localai-backends:master-gpu-nvidia-cuda-12-parakeet-cpp
- !!merge <<: *parakeetcpp
name: "rocm-parakeet-cpp"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-parakeet-cpp"
mirrors:
- localai/localai-backends:latest-gpu-rocm-hipblas-parakeet-cpp
- !!merge <<: *parakeetcpp
name: "rocm-parakeet-cpp-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-parakeet-cpp"
mirrors:
- localai/localai-backends:master-gpu-rocm-hipblas-parakeet-cpp
- !!merge <<: *parakeetcpp
name: "intel-sycl-f32-parakeet-cpp"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f32-parakeet-cpp"
mirrors:
- localai/localai-backends:latest-gpu-intel-sycl-f32-parakeet-cpp
- !!merge <<: *parakeetcpp
name: "intel-sycl-f32-parakeet-cpp-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f32-parakeet-cpp"
mirrors:
- localai/localai-backends:master-gpu-intel-sycl-f32-parakeet-cpp
- !!merge <<: *parakeetcpp
name: "intel-sycl-f16-parakeet-cpp"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f16-parakeet-cpp"
mirrors:
- localai/localai-backends:latest-gpu-intel-sycl-f16-parakeet-cpp
- !!merge <<: *parakeetcpp
name: "intel-sycl-f16-parakeet-cpp-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-parakeet-cpp"
mirrors:
- localai/localai-backends:master-gpu-intel-sycl-f16-parakeet-cpp
- !!merge <<: *parakeetcpp
name: "vulkan-parakeet-cpp"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-vulkan-parakeet-cpp"
mirrors:
- localai/localai-backends:latest-gpu-vulkan-parakeet-cpp
- !!merge <<: *parakeetcpp
name: "vulkan-parakeet-cpp-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-vulkan-parakeet-cpp"
mirrors:
- localai/localai-backends:master-gpu-vulkan-parakeet-cpp
- !!merge <<: *parakeetcpp
name: "cuda13-parakeet-cpp"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-13-parakeet-cpp"
mirrors:
- localai/localai-backends:latest-gpu-nvidia-cuda-13-parakeet-cpp
- !!merge <<: *parakeetcpp
name: "cuda13-parakeet-cpp-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-parakeet-cpp"
mirrors:
- localai/localai-backends:master-gpu-nvidia-cuda-13-parakeet-cpp
## stablediffusion-ggml
- !!merge <<: *stablediffusionggml
name: "cpu-stablediffusion-ggml"

View File

@@ -18,6 +18,14 @@ function inferBackendPath(item) {
if (item.dockerfile.endsWith("python")) {
return `backend/python/${item.backend}/`;
}
// parakeet-cpp is a Go backend (Dockerfile.golang) wrapping the parakeet.cpp
// ggml port via purego. It lives in backend/go/parakeet-cpp/; this explicit
// branch (placed before the generic golang one, which would also resolve it
// correctly) documents the mapping and guards against a future
// dockerfile-suffix change.
if (item.backend === "parakeet-cpp") {
return `backend/go/parakeet-cpp/`;
}
if (item.dockerfile.endsWith("golang")) {
return `backend/go/${item.backend}/`;
}