fix: try to add whisperx and faster-whisper for more variants (#9278)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
Ettore Di Giacinto
2026-04-08 21:23:38 +02:00
committed by GitHub
parent 285f7d4340
commit e00ce981f0
12 changed files with 206 additions and 3 deletions

View File

@@ -105,6 +105,19 @@ jobs:
dockerfile: "./backend/Dockerfile.python"
context: "./"
ubuntu-version: '2404'
- build-type: ''
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-cpu-faster-whisper'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:24.04"
skip-drivers: 'true'
backend: "faster-whisper"
dockerfile: "./backend/Dockerfile.python"
context: "./"
ubuntu-version: '2404'
- build-type: ''
cuda-major-version: ""
cuda-minor-version: ""
@@ -965,6 +978,32 @@ jobs:
backend: "mlx-distributed"
dockerfile: "./backend/Dockerfile.python"
context: "./"
- build-type: 'l4t'
cuda-major-version: "13"
cuda-minor-version: "0"
platforms: 'linux/arm64'
tag-latest: 'auto'
tag-suffix: '-nvidia-l4t-cuda-13-arm64-whisperx'
runs-on: 'ubuntu-24.04-arm'
base-image: "ubuntu:24.04"
skip-drivers: 'false'
ubuntu-version: '2404'
backend: "whisperx"
dockerfile: "./backend/Dockerfile.python"
context: "./"
- build-type: 'l4t'
cuda-major-version: "13"
cuda-minor-version: "0"
platforms: 'linux/arm64'
tag-latest: 'auto'
tag-suffix: '-nvidia-l4t-cuda-13-arm64-faster-whisper'
runs-on: 'ubuntu-24.04-arm'
base-image: "ubuntu:24.04"
skip-drivers: 'false'
ubuntu-version: '2404'
backend: "faster-whisper"
dockerfile: "./backend/Dockerfile.python"
context: "./"
- build-type: 'cublas'
cuda-major-version: "13"
cuda-minor-version: "0"
@@ -1644,6 +1683,32 @@ jobs:
dockerfile: "./backend/Dockerfile.python"
context: "./"
ubuntu-version: '2204'
- build-type: 'l4t'
cuda-major-version: "12"
cuda-minor-version: "0"
platforms: 'linux/arm64'
tag-latest: 'auto'
tag-suffix: '-nvidia-l4t-whisperx'
runs-on: 'ubuntu-24.04-arm'
base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
skip-drivers: 'true'
backend: "whisperx"
dockerfile: "./backend/Dockerfile.python"
context: "./"
ubuntu-version: '2204'
- build-type: 'l4t'
cuda-major-version: "12"
cuda-minor-version: "0"
platforms: 'linux/arm64'
tag-latest: 'auto'
tag-suffix: '-nvidia-l4t-faster-whisper'
runs-on: 'ubuntu-24.04-arm'
base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
skip-drivers: 'true'
backend: "faster-whisper"
dockerfile: "./backend/Dockerfile.python"
context: "./"
ubuntu-version: '2204'
# SYCL additional backends
- build-type: 'intel'
cuda-major-version: ""

View File

@@ -428,6 +428,7 @@ prepare-test-extra: protogen-python
$(MAKE) -C backend/python/qwen-asr
$(MAKE) -C backend/python/nemo
$(MAKE) -C backend/python/voxcpm
$(MAKE) -C backend/python/faster-whisper
$(MAKE) -C backend/python/whisperx
$(MAKE) -C backend/python/ace-step
$(MAKE) -C backend/python/trl
@@ -449,6 +450,7 @@ test-extra: prepare-test-extra
$(MAKE) -C backend/python/qwen-asr test
$(MAKE) -C backend/python/nemo test
$(MAKE) -C backend/python/voxcpm test
$(MAKE) -C backend/python/faster-whisper test
$(MAKE) -C backend/python/whisperx test
$(MAKE) -C backend/python/ace-step test
$(MAKE) -C backend/python/trl test

View File

@@ -400,12 +400,15 @@
license: MIT
name: "faster-whisper"
capabilities:
default: "cpu-faster-whisper"
nvidia: "cuda12-faster-whisper"
intel: "intel-faster-whisper"
amd: "rocm-faster-whisper"
metal: "metal-faster-whisper"
nvidia-cuda-13: "cuda13-faster-whisper"
nvidia-cuda-12: "cuda12-faster-whisper"
nvidia-l4t: "nvidia-l4t-arm64-faster-whisper"
nvidia-l4t-cuda-12: "nvidia-l4t-arm64-faster-whisper"
- &moonshine
description: |
Moonshine is a fast, accurate, and efficient speech-to-text transcription model using ONNX Runtime.
@@ -446,6 +449,8 @@
default: "cpu-whisperx"
nvidia-cuda-13: "cuda13-whisperx"
nvidia-cuda-12: "cuda12-whisperx"
nvidia-l4t: "nvidia-l4t-arm64-whisperx"
nvidia-l4t-cuda-12: "nvidia-l4t-arm64-whisperx"
- &kokoro
icon: https://avatars.githubusercontent.com/u/166769057?v=4
description: |
@@ -2082,11 +2087,13 @@
- !!merge <<: *faster-whisper
name: "faster-whisper-development"
capabilities:
default: "cpu-faster-whisper-development"
nvidia: "cuda12-faster-whisper-development"
intel: "intel-faster-whisper-development"
amd: "rocm-faster-whisper-development"
metal: "metal-faster-whisper-development"
nvidia-cuda-13: "cuda13-faster-whisper-development"
nvidia-l4t: "nvidia-l4t-arm64-faster-whisper-development"
- !!merge <<: *faster-whisper
name: "cuda12-faster-whisper-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-faster-whisper"
@@ -2127,6 +2134,36 @@
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-faster-whisper"
mirrors:
- localai/localai-backends:master-metal-darwin-arm64-faster-whisper
- !!merge <<: *faster-whisper
name: "cuda12-faster-whisper"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-faster-whisper"
mirrors:
- localai/localai-backends:latest-gpu-nvidia-cuda-12-faster-whisper
- !!merge <<: *faster-whisper
name: "rocm-faster-whisper"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-faster-whisper"
mirrors:
- localai/localai-backends:latest-gpu-rocm-hipblas-faster-whisper
- !!merge <<: *faster-whisper
name: "cpu-faster-whisper"
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-faster-whisper"
mirrors:
- localai/localai-backends:latest-cpu-faster-whisper
- !!merge <<: *faster-whisper
name: "cpu-faster-whisper-development"
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-faster-whisper"
mirrors:
- localai/localai-backends:master-cpu-faster-whisper
- !!merge <<: *faster-whisper
name: "nvidia-l4t-arm64-faster-whisper"
uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-faster-whisper"
mirrors:
- localai/localai-backends:latest-nvidia-l4t-faster-whisper
- !!merge <<: *faster-whisper
name: "nvidia-l4t-arm64-faster-whisper-development"
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-faster-whisper"
mirrors:
- localai/localai-backends:master-nvidia-l4t-faster-whisper
## moonshine
- !!merge <<: *moonshine
name: "moonshine-development"
@@ -2185,6 +2222,7 @@
default: "cpu-whisperx-development"
nvidia-cuda-13: "cuda13-whisperx-development"
nvidia-cuda-12: "cuda12-whisperx-development"
nvidia-l4t: "nvidia-l4t-arm64-whisperx-development"
- !!merge <<: *whisperx
name: "cpu-whisperx"
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-whisperx"
@@ -2235,6 +2273,16 @@
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-whisperx"
mirrors:
- localai/localai-backends:master-metal-darwin-arm64-whisperx
- !!merge <<: *whisperx
name: "nvidia-l4t-arm64-whisperx"
uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-whisperx"
mirrors:
- localai/localai-backends:latest-nvidia-l4t-whisperx
- !!merge <<: *whisperx
name: "nvidia-l4t-arm64-whisperx-development"
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-whisperx"
mirrors:
- localai/localai-backends:master-nvidia-l4t-whisperx
## coqui
- !!merge <<: *coqui

View File

@@ -16,4 +16,14 @@ if [ "x${BUILD_PROFILE}" == "xintel" ]; then
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
fi
if [ "x${BUILD_PROFILE}" == "xl4t13" ]; then
PYTHON_VERSION="3.12"
PYTHON_PATCH="12"
PY_STANDALONE_TAG="20251120"
fi
if [ "x${BUILD_PROFILE}" == "xl4t12" ]; then
USE_PIP=true
fi
installRequirements

View File

@@ -0,0 +1,3 @@
--extra-index-url https://pypi.jetson-ai-lab.io/jp6/cu129/
torch
faster-whisper

View File

@@ -0,0 +1,3 @@
--extra-index-url https://download.pytorch.org/whl/cu130
torch
faster-whisper

View File

@@ -8,8 +8,21 @@ else
source $backend_dir/../common/libbackend.sh
fi
if [ "x${BUILD_PROFILE}" != "xmetal" ] && [ "x${BUILD_PROFILE}" != "xmps" ]; then
EXTRA_PIP_INSTALL_FLAGS+=" --index-strategy unsafe-best-match"
if [ "x${BUILD_PROFILE}" == "xl4t13" ]; then
PYTHON_VERSION="3.12"
PYTHON_PATCH="12"
PY_STANDALONE_TAG="20251120"
fi
if [ "x${BUILD_PROFILE}" == "xl4t12" ]; then
USE_PIP=true
fi
# --index-strategy is a uv-only flag; skip it when using pip
if [ "x${USE_PIP}" != "xtrue" ]; then
if [ "x${BUILD_PROFILE}" != "xmetal" ] && [ "x${BUILD_PROFILE}" != "xmps" ]; then
EXTRA_PIP_INSTALL_FLAGS+=" --index-strategy unsafe-best-match"
fi
fi
installRequirements

View File

@@ -0,0 +1,3 @@
--extra-index-url https://pypi.jetson-ai-lab.io/jp6/cu129/
torch
whisperx @ git+https://github.com/m-bain/whisperX.git

View File

@@ -0,0 +1,3 @@
--extra-index-url https://download.pytorch.org/whl/cu130
torch
whisperx @ git+https://github.com/m-bain/whisperX.git

View File

@@ -71,6 +71,10 @@ func (m *GalleryBackend) IsCompatibleWith(systemState *system.SystemState) bool
return true
}
if systemState.CapabilityFilterDisabled() {
return true
}
// Meta backends are compatible if the system capability matches one of the keys
if m.IsMeta() {
capability := systemState.Capability(m.CapabilitiesMap)

View File

@@ -18,7 +18,8 @@ const (
Intel = "intel"
// Private constants - only used within this package
defaultCapability = "default"
defaultCapability = "default"
disableCapability = "disable"
nvidiaL4T = "nvidia-l4t"
darwinX86 = "darwin-x86"
metal = "metal"
@@ -56,6 +57,12 @@ func init() {
cuda12DirExists = err == nil
}
// CapabilityFilterDisabled returns true when capability-based backend filtering
// is disabled via LOCALAI_FORCE_META_BACKEND_CAPABILITY=disable.
func (s *SystemState) CapabilityFilterDisabled() bool {
return s.getSystemCapabilities() == disableCapability
}
func (s *SystemState) Capability(capMap map[string]string) string {
reportedCapability := s.getSystemCapabilities()
@@ -196,6 +203,10 @@ func (s *SystemState) DetectedCapability() string {
// with the current system capability. This function uses getSystemCapabilities to ensure
// consistency with capability detection (including VRAM checks, environment overrides, etc.).
func (s *SystemState) IsBackendCompatible(name, uri string) bool {
if s.CapabilityFilterDisabled() {
return true
}
combined := strings.ToLower(name + " " + uri)
capability := s.getSystemCapabilities()

View File

@@ -127,3 +127,41 @@ var _ = Describe("getSystemCapabilities", func() {
}),
)
})
var _ = Describe("CapabilityFilterDisabled", func() {
var origEnv string
BeforeEach(func() {
origEnv = os.Getenv(capabilityEnv)
})
AfterEach(func() {
if origEnv != "" {
os.Setenv(capabilityEnv, origEnv)
} else {
os.Unsetenv(capabilityEnv)
}
})
It("returns true when capability is set to disable", func() {
os.Setenv(capabilityEnv, "disable")
s := &SystemState{}
Expect(s.CapabilityFilterDisabled()).To(BeTrue())
})
It("returns false when capability is not set to disable", func() {
os.Setenv(capabilityEnv, "nvidia")
s := &SystemState{}
Expect(s.CapabilityFilterDisabled()).To(BeFalse())
})
It("makes IsBackendCompatible return true for all backends when disabled", func() {
os.Setenv(capabilityEnv, "disable")
s := &SystemState{}
Expect(s.IsBackendCompatible("cuda12-whisperx", "quay.io/nvidia-cuda-12")).To(BeTrue())
Expect(s.IsBackendCompatible("rocm-whisperx", "quay.io/rocm")).To(BeTrue())
Expect(s.IsBackendCompatible("metal-whisperx", "quay.io/metal-darwin")).To(BeTrue())
Expect(s.IsBackendCompatible("intel-whisperx", "quay.io/intel-sycl")).To(BeTrue())
Expect(s.IsBackendCompatible("cpu-whisperx", "quay.io/cpu")).To(BeTrue())
})
})