fix: try to add whisperx and faster-whisper for more variants (#9278)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-06-08 16:57:08 -04:00 · 2026-04-08 21:23:38 +02:00
parent 285f7d4340
commit e00ce981f0
12 changed files with 206 additions and 3 deletions
--- a/.github/workflows/backend.yml
+++ b/.github/workflows/backend.yml
@@ -105,6 +105,19 @@ jobs:
            dockerfile: "./backend/Dockerfile.python"
            context: "./"
            ubuntu-version: '2404'
+          - build-type: ''
+            cuda-major-version: ""
+            cuda-minor-version: ""
+            platforms: 'linux/amd64'
+            tag-latest: 'auto'
+            tag-suffix: '-cpu-faster-whisper'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:24.04"
+            skip-drivers: 'true'
+            backend: "faster-whisper"
+            dockerfile: "./backend/Dockerfile.python"
+            context: "./"
+            ubuntu-version: '2404'
          - build-type: ''
            cuda-major-version: ""
            cuda-minor-version: ""
@@ -965,6 +978,32 @@ jobs:
            backend: "mlx-distributed"
            dockerfile: "./backend/Dockerfile.python"
            context: "./"
+          - build-type: 'l4t'
+            cuda-major-version: "13"
+            cuda-minor-version: "0"
+            platforms: 'linux/arm64'
+            tag-latest: 'auto'
+            tag-suffix: '-nvidia-l4t-cuda-13-arm64-whisperx'
+            runs-on: 'ubuntu-24.04-arm'
+            base-image: "ubuntu:24.04"
+            skip-drivers: 'false'
+            ubuntu-version: '2404'
+            backend: "whisperx"
+            dockerfile: "./backend/Dockerfile.python"
+            context: "./"
+          - build-type: 'l4t'
+            cuda-major-version: "13"
+            cuda-minor-version: "0"
+            platforms: 'linux/arm64'
+            tag-latest: 'auto'
+            tag-suffix: '-nvidia-l4t-cuda-13-arm64-faster-whisper'
+            runs-on: 'ubuntu-24.04-arm'
+            base-image: "ubuntu:24.04"
+            skip-drivers: 'false'
+            ubuntu-version: '2404'
+            backend: "faster-whisper"
+            dockerfile: "./backend/Dockerfile.python"
+            context: "./"
          - build-type: 'cublas'
            cuda-major-version: "13"
            cuda-minor-version: "0"
@@ -1644,6 +1683,32 @@ jobs:
            dockerfile: "./backend/Dockerfile.python"
            context: "./"
            ubuntu-version: '2204'
+          - build-type: 'l4t'
+            cuda-major-version: "12"
+            cuda-minor-version: "0"
+            platforms: 'linux/arm64'
+            tag-latest: 'auto'
+            tag-suffix: '-nvidia-l4t-whisperx'
+            runs-on: 'ubuntu-24.04-arm'
+            base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
+            skip-drivers: 'true'
+            backend: "whisperx"
+            dockerfile: "./backend/Dockerfile.python"
+            context: "./"
+            ubuntu-version: '2204'
+          - build-type: 'l4t'
+            cuda-major-version: "12"
+            cuda-minor-version: "0"
+            platforms: 'linux/arm64'
+            tag-latest: 'auto'
+            tag-suffix: '-nvidia-l4t-faster-whisper'
+            runs-on: 'ubuntu-24.04-arm'
+            base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
+            skip-drivers: 'true'
+            backend: "faster-whisper"
+            dockerfile: "./backend/Dockerfile.python"
+            context: "./"
+            ubuntu-version: '2204'
          # SYCL additional backends
          - build-type: 'intel'
            cuda-major-version: ""
--- a/2
+++ b/2
@@ -428,6 +428,7 @@ prepare-test-extra: protogen-python
 	$(MAKE) -C backend/python/qwen-asr
 	$(MAKE) -C backend/python/nemo
 	$(MAKE) -C backend/python/voxcpm
+	$(MAKE) -C backend/python/faster-whisper
 	$(MAKE) -C backend/python/whisperx
 	$(MAKE) -C backend/python/ace-step
 	$(MAKE) -C backend/python/trl
@@ -449,6 +450,7 @@ test-extra: prepare-test-extra
 	$(MAKE) -C backend/python/qwen-asr test
 	$(MAKE) -C backend/python/nemo test
 	$(MAKE) -C backend/python/voxcpm test
+	$(MAKE) -C backend/python/faster-whisper test
 	$(MAKE) -C backend/python/whisperx test
 	$(MAKE) -C backend/python/ace-step test
 	$(MAKE) -C backend/python/trl test
--- a/backend/index.yaml
+++ b/backend/index.yaml
@@ -400,12 +400,15 @@
  license: MIT
  name: "faster-whisper"
  capabilities:
+    default: "cpu-faster-whisper"
    nvidia: "cuda12-faster-whisper"
    intel: "intel-faster-whisper"
    amd: "rocm-faster-whisper"
    metal: "metal-faster-whisper"
    nvidia-cuda-13: "cuda13-faster-whisper"
    nvidia-cuda-12: "cuda12-faster-whisper"
+    nvidia-l4t: "nvidia-l4t-arm64-faster-whisper"
+    nvidia-l4t-cuda-12: "nvidia-l4t-arm64-faster-whisper"
 - &moonshine
  description: |
    Moonshine is a fast, accurate, and efficient speech-to-text transcription model using ONNX Runtime.
@@ -446,6 +449,8 @@
    default: "cpu-whisperx"
    nvidia-cuda-13: "cuda13-whisperx"
    nvidia-cuda-12: "cuda12-whisperx"
+    nvidia-l4t: "nvidia-l4t-arm64-whisperx"
+    nvidia-l4t-cuda-12: "nvidia-l4t-arm64-whisperx"
 - &kokoro
  icon: https://avatars.githubusercontent.com/u/166769057?v=4
  description: |
@@ -2082,11 +2087,13 @@
 - !!merge <<: *faster-whisper
  name: "faster-whisper-development"
  capabilities:
+    default: "cpu-faster-whisper-development"
    nvidia: "cuda12-faster-whisper-development"
    intel: "intel-faster-whisper-development"
    amd: "rocm-faster-whisper-development"
    metal: "metal-faster-whisper-development"
    nvidia-cuda-13: "cuda13-faster-whisper-development"
+    nvidia-l4t: "nvidia-l4t-arm64-faster-whisper-development"
 - !!merge <<: *faster-whisper
  name: "cuda12-faster-whisper-development"
  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-faster-whisper"
@@ -2127,6 +2134,36 @@
  uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-faster-whisper"
  mirrors:
    - localai/localai-backends:master-metal-darwin-arm64-faster-whisper
+- !!merge <<: *faster-whisper
+  name: "cuda12-faster-whisper"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-faster-whisper"
+  mirrors:
+    - localai/localai-backends:latest-gpu-nvidia-cuda-12-faster-whisper
+- !!merge <<: *faster-whisper
+  name: "rocm-faster-whisper"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-faster-whisper"
+  mirrors:
+    - localai/localai-backends:latest-gpu-rocm-hipblas-faster-whisper
+- !!merge <<: *faster-whisper
+  name: "cpu-faster-whisper"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-faster-whisper"
+  mirrors:
+    - localai/localai-backends:latest-cpu-faster-whisper
+- !!merge <<: *faster-whisper
+  name: "cpu-faster-whisper-development"
+  uri: "quay.io/go-skynet/local-ai-backends:master-cpu-faster-whisper"
+  mirrors:
+    - localai/localai-backends:master-cpu-faster-whisper
+- !!merge <<: *faster-whisper
+  name: "nvidia-l4t-arm64-faster-whisper"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-faster-whisper"
+  mirrors:
+    - localai/localai-backends:latest-nvidia-l4t-faster-whisper
+- !!merge <<: *faster-whisper
+  name: "nvidia-l4t-arm64-faster-whisper-development"
+  uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-faster-whisper"
+  mirrors:
+    - localai/localai-backends:master-nvidia-l4t-faster-whisper
 ## moonshine
 - !!merge <<: *moonshine
  name: "moonshine-development"
@@ -2185,6 +2222,7 @@
    default: "cpu-whisperx-development"
    nvidia-cuda-13: "cuda13-whisperx-development"
    nvidia-cuda-12: "cuda12-whisperx-development"
+    nvidia-l4t: "nvidia-l4t-arm64-whisperx-development"
 - !!merge <<: *whisperx
  name: "cpu-whisperx"
  uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-whisperx"
@@ -2235,6 +2273,16 @@
  uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-whisperx"
  mirrors:
    - localai/localai-backends:master-metal-darwin-arm64-whisperx
+- !!merge <<: *whisperx
+  name: "nvidia-l4t-arm64-whisperx"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-whisperx"
+  mirrors:
+    - localai/localai-backends:latest-nvidia-l4t-whisperx
+- !!merge <<: *whisperx
+  name: "nvidia-l4t-arm64-whisperx-development"
+  uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-whisperx"
+  mirrors:
+    - localai/localai-backends:master-nvidia-l4t-whisperx
 ## coqui

 - !!merge <<: *coqui
--- a/backend/python/faster-whisper/install.sh
+++ b/backend/python/faster-whisper/install.sh
@@ -16,4 +16,14 @@ if [ "x${BUILD_PROFILE}" == "xintel" ]; then
    EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
 fi

+if [ "x${BUILD_PROFILE}" == "xl4t13" ]; then
+  PYTHON_VERSION="3.12"
+  PYTHON_PATCH="12"
+  PY_STANDALONE_TAG="20251120"
+fi
+
+if [ "x${BUILD_PROFILE}" == "xl4t12" ]; then
+    USE_PIP=true
+fi
+
 installRequirements
--- a/backend/python/faster-whisper/requirements-l4t12.txt
+++ b/backend/python/faster-whisper/requirements-l4t12.txt
@@ -0,0 +1,3 @@
+--extra-index-url https://pypi.jetson-ai-lab.io/jp6/cu129/
+torch
+faster-whisper
--- a/backend/python/faster-whisper/requirements-l4t13.txt
+++ b/backend/python/faster-whisper/requirements-l4t13.txt
@@ -0,0 +1,3 @@
+--extra-index-url https://download.pytorch.org/whl/cu130
+torch
+faster-whisper
--- a/backend/python/whisperx/install.sh
+++ b/backend/python/whisperx/install.sh
@@ -8,8 +8,21 @@ else
    source $backend_dir/../common/libbackend.sh
 fi

-if [ "x${BUILD_PROFILE}" != "xmetal" ] && [ "x${BUILD_PROFILE}" != "xmps" ]; then
-    EXTRA_PIP_INSTALL_FLAGS+=" --index-strategy unsafe-best-match"
+if [ "x${BUILD_PROFILE}" == "xl4t13" ]; then
+  PYTHON_VERSION="3.12"
+  PYTHON_PATCH="12"
+  PY_STANDALONE_TAG="20251120"
+fi
+
+if [ "x${BUILD_PROFILE}" == "xl4t12" ]; then
+    USE_PIP=true
+fi
+
+# --index-strategy is a uv-only flag; skip it when using pip
+if [ "x${USE_PIP}" != "xtrue" ]; then
+    if [ "x${BUILD_PROFILE}" != "xmetal" ] && [ "x${BUILD_PROFILE}" != "xmps" ]; then
+        EXTRA_PIP_INSTALL_FLAGS+=" --index-strategy unsafe-best-match"
+    fi
 fi

 installRequirements
--- a/backend/python/whisperx/requirements-l4t12.txt
+++ b/backend/python/whisperx/requirements-l4t12.txt
@@ -0,0 +1,3 @@
+--extra-index-url https://pypi.jetson-ai-lab.io/jp6/cu129/
+torch
+whisperx @ git+https://github.com/m-bain/whisperX.git
--- a/backend/python/whisperx/requirements-l4t13.txt
+++ b/backend/python/whisperx/requirements-l4t13.txt
@@ -0,0 +1,3 @@
+--extra-index-url https://download.pytorch.org/whl/cu130
+torch
+whisperx @ git+https://github.com/m-bain/whisperX.git
--- a/core/gallery/backend_types.go
+++ b/core/gallery/backend_types.go
@@ -71,6 +71,10 @@ func (m *GalleryBackend) IsCompatibleWith(systemState *system.SystemState) bool
 		return true
 	}

+	if systemState.CapabilityFilterDisabled() {
+		return true
+	}
+
 	// Meta backends are compatible if the system capability matches one of the keys
 	if m.IsMeta() {
 		capability := systemState.Capability(m.CapabilitiesMap)
--- a/pkg/system/capabilities.go
+++ b/pkg/system/capabilities.go
@@ -18,7 +18,8 @@ const (
 	Intel  = "intel"

 	// Private constants - only used within this package
-	defaultCapability = "default"
+	defaultCapability  = "default"
+	disableCapability  = "disable"
 	nvidiaL4T         = "nvidia-l4t"
 	darwinX86         = "darwin-x86"
 	metal             = "metal"
@@ -56,6 +57,12 @@ func init() {
 	cuda12DirExists = err == nil
 }

+// CapabilityFilterDisabled returns true when capability-based backend filtering
+// is disabled via LOCALAI_FORCE_META_BACKEND_CAPABILITY=disable.
+func (s *SystemState) CapabilityFilterDisabled() bool {
+	return s.getSystemCapabilities() == disableCapability
+}
+
 func (s *SystemState) Capability(capMap map[string]string) string {
 	reportedCapability := s.getSystemCapabilities()

@@ -196,6 +203,10 @@ func (s *SystemState) DetectedCapability() string {
 // with the current system capability. This function uses getSystemCapabilities to ensure
 // consistency with capability detection (including VRAM checks, environment overrides, etc.).
 func (s *SystemState) IsBackendCompatible(name, uri string) bool {
+	if s.CapabilityFilterDisabled() {
+		return true
+	}
+
 	combined := strings.ToLower(name + " " + uri)
 	capability := s.getSystemCapabilities()

--- a/pkg/system/capabilities_test.go
+++ b/pkg/system/capabilities_test.go
@@ -127,3 +127,41 @@ var _ = Describe("getSystemCapabilities", func() {
 		}),
 	)
 })
+
+var _ = Describe("CapabilityFilterDisabled", func() {
+	var origEnv string
+
+	BeforeEach(func() {
+		origEnv = os.Getenv(capabilityEnv)
+	})
+
+	AfterEach(func() {
+		if origEnv != "" {
+			os.Setenv(capabilityEnv, origEnv)
+		} else {
+			os.Unsetenv(capabilityEnv)
+		}
+	})
+
+	It("returns true when capability is set to disable", func() {
+		os.Setenv(capabilityEnv, "disable")
+		s := &SystemState{}
+		Expect(s.CapabilityFilterDisabled()).To(BeTrue())
+	})
+
+	It("returns false when capability is not set to disable", func() {
+		os.Setenv(capabilityEnv, "nvidia")
+		s := &SystemState{}
+		Expect(s.CapabilityFilterDisabled()).To(BeFalse())
+	})
+
+	It("makes IsBackendCompatible return true for all backends when disabled", func() {
+		os.Setenv(capabilityEnv, "disable")
+		s := &SystemState{}
+		Expect(s.IsBackendCompatible("cuda12-whisperx", "quay.io/nvidia-cuda-12")).To(BeTrue())
+		Expect(s.IsBackendCompatible("rocm-whisperx", "quay.io/rocm")).To(BeTrue())
+		Expect(s.IsBackendCompatible("metal-whisperx", "quay.io/metal-darwin")).To(BeTrue())
+		Expect(s.IsBackendCompatible("intel-whisperx", "quay.io/intel-sycl")).To(BeTrue())
+		Expect(s.IsBackendCompatible("cpu-whisperx", "quay.io/cpu")).To(BeTrue())
+	})
+})