diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml index 0ec9bcf58..aa16c6494 100644 --- a/.github/workflows/backend.yml +++ b/.github/workflows/backend.yml @@ -105,6 +105,19 @@ jobs: dockerfile: "./backend/Dockerfile.python" context: "./" ubuntu-version: '2404' + - build-type: '' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'auto' + tag-suffix: '-cpu-faster-whisper' + runs-on: 'ubuntu-latest' + base-image: "ubuntu:24.04" + skip-drivers: 'true' + backend: "faster-whisper" + dockerfile: "./backend/Dockerfile.python" + context: "./" + ubuntu-version: '2404' - build-type: '' cuda-major-version: "" cuda-minor-version: "" @@ -965,6 +978,32 @@ jobs: backend: "mlx-distributed" dockerfile: "./backend/Dockerfile.python" context: "./" + - build-type: 'l4t' + cuda-major-version: "13" + cuda-minor-version: "0" + platforms: 'linux/arm64' + tag-latest: 'auto' + tag-suffix: '-nvidia-l4t-cuda-13-arm64-whisperx' + runs-on: 'ubuntu-24.04-arm' + base-image: "ubuntu:24.04" + skip-drivers: 'false' + ubuntu-version: '2404' + backend: "whisperx" + dockerfile: "./backend/Dockerfile.python" + context: "./" + - build-type: 'l4t' + cuda-major-version: "13" + cuda-minor-version: "0" + platforms: 'linux/arm64' + tag-latest: 'auto' + tag-suffix: '-nvidia-l4t-cuda-13-arm64-faster-whisper' + runs-on: 'ubuntu-24.04-arm' + base-image: "ubuntu:24.04" + skip-drivers: 'false' + ubuntu-version: '2404' + backend: "faster-whisper" + dockerfile: "./backend/Dockerfile.python" + context: "./" - build-type: 'cublas' cuda-major-version: "13" cuda-minor-version: "0" @@ -1644,6 +1683,32 @@ jobs: dockerfile: "./backend/Dockerfile.python" context: "./" ubuntu-version: '2204' + - build-type: 'l4t' + cuda-major-version: "12" + cuda-minor-version: "0" + platforms: 'linux/arm64' + tag-latest: 'auto' + tag-suffix: '-nvidia-l4t-whisperx' + runs-on: 'ubuntu-24.04-arm' + base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" + skip-drivers: 'true' + backend: "whisperx" + dockerfile: "./backend/Dockerfile.python" + context: "./" + ubuntu-version: '2204' + - build-type: 'l4t' + cuda-major-version: "12" + cuda-minor-version: "0" + platforms: 'linux/arm64' + tag-latest: 'auto' + tag-suffix: '-nvidia-l4t-faster-whisper' + runs-on: 'ubuntu-24.04-arm' + base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" + skip-drivers: 'true' + backend: "faster-whisper" + dockerfile: "./backend/Dockerfile.python" + context: "./" + ubuntu-version: '2204' # SYCL additional backends - build-type: 'intel' cuda-major-version: "" diff --git a/Makefile b/Makefile index 3b428473f..eeffc8e2b 100644 --- a/Makefile +++ b/Makefile @@ -428,6 +428,7 @@ prepare-test-extra: protogen-python $(MAKE) -C backend/python/qwen-asr $(MAKE) -C backend/python/nemo $(MAKE) -C backend/python/voxcpm + $(MAKE) -C backend/python/faster-whisper $(MAKE) -C backend/python/whisperx $(MAKE) -C backend/python/ace-step $(MAKE) -C backend/python/trl @@ -449,6 +450,7 @@ test-extra: prepare-test-extra $(MAKE) -C backend/python/qwen-asr test $(MAKE) -C backend/python/nemo test $(MAKE) -C backend/python/voxcpm test + $(MAKE) -C backend/python/faster-whisper test $(MAKE) -C backend/python/whisperx test $(MAKE) -C backend/python/ace-step test $(MAKE) -C backend/python/trl test diff --git a/backend/index.yaml b/backend/index.yaml index a5b9b90fb..c424f5e42 100644 --- a/backend/index.yaml +++ b/backend/index.yaml @@ -400,12 +400,15 @@ license: MIT name: "faster-whisper" capabilities: + default: "cpu-faster-whisper" nvidia: "cuda12-faster-whisper" intel: "intel-faster-whisper" amd: "rocm-faster-whisper" metal: "metal-faster-whisper" nvidia-cuda-13: "cuda13-faster-whisper" nvidia-cuda-12: "cuda12-faster-whisper" + nvidia-l4t: "nvidia-l4t-arm64-faster-whisper" + nvidia-l4t-cuda-12: "nvidia-l4t-arm64-faster-whisper" - &moonshine description: | Moonshine is a fast, accurate, and efficient speech-to-text transcription model using ONNX Runtime. @@ -446,6 +449,8 @@ default: "cpu-whisperx" nvidia-cuda-13: "cuda13-whisperx" nvidia-cuda-12: "cuda12-whisperx" + nvidia-l4t: "nvidia-l4t-arm64-whisperx" + nvidia-l4t-cuda-12: "nvidia-l4t-arm64-whisperx" - &kokoro icon: https://avatars.githubusercontent.com/u/166769057?v=4 description: | @@ -2082,11 +2087,13 @@ - !!merge <<: *faster-whisper name: "faster-whisper-development" capabilities: + default: "cpu-faster-whisper-development" nvidia: "cuda12-faster-whisper-development" intel: "intel-faster-whisper-development" amd: "rocm-faster-whisper-development" metal: "metal-faster-whisper-development" nvidia-cuda-13: "cuda13-faster-whisper-development" + nvidia-l4t: "nvidia-l4t-arm64-faster-whisper-development" - !!merge <<: *faster-whisper name: "cuda12-faster-whisper-development" uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-faster-whisper" @@ -2127,6 +2134,36 @@ uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-faster-whisper" mirrors: - localai/localai-backends:master-metal-darwin-arm64-faster-whisper +- !!merge <<: *faster-whisper + name: "cuda12-faster-whisper" + uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-faster-whisper" + mirrors: + - localai/localai-backends:latest-gpu-nvidia-cuda-12-faster-whisper +- !!merge <<: *faster-whisper + name: "rocm-faster-whisper" + uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-faster-whisper" + mirrors: + - localai/localai-backends:latest-gpu-rocm-hipblas-faster-whisper +- !!merge <<: *faster-whisper + name: "cpu-faster-whisper" + uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-faster-whisper" + mirrors: + - localai/localai-backends:latest-cpu-faster-whisper +- !!merge <<: *faster-whisper + name: "cpu-faster-whisper-development" + uri: "quay.io/go-skynet/local-ai-backends:master-cpu-faster-whisper" + mirrors: + - localai/localai-backends:master-cpu-faster-whisper +- !!merge <<: *faster-whisper + name: "nvidia-l4t-arm64-faster-whisper" + uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-faster-whisper" + mirrors: + - localai/localai-backends:latest-nvidia-l4t-faster-whisper +- !!merge <<: *faster-whisper + name: "nvidia-l4t-arm64-faster-whisper-development" + uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-faster-whisper" + mirrors: + - localai/localai-backends:master-nvidia-l4t-faster-whisper ## moonshine - !!merge <<: *moonshine name: "moonshine-development" @@ -2185,6 +2222,7 @@ default: "cpu-whisperx-development" nvidia-cuda-13: "cuda13-whisperx-development" nvidia-cuda-12: "cuda12-whisperx-development" + nvidia-l4t: "nvidia-l4t-arm64-whisperx-development" - !!merge <<: *whisperx name: "cpu-whisperx" uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-whisperx" @@ -2235,6 +2273,16 @@ uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-whisperx" mirrors: - localai/localai-backends:master-metal-darwin-arm64-whisperx +- !!merge <<: *whisperx + name: "nvidia-l4t-arm64-whisperx" + uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-whisperx" + mirrors: + - localai/localai-backends:latest-nvidia-l4t-whisperx +- !!merge <<: *whisperx + name: "nvidia-l4t-arm64-whisperx-development" + uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-whisperx" + mirrors: + - localai/localai-backends:master-nvidia-l4t-whisperx ## coqui - !!merge <<: *coqui diff --git a/backend/python/faster-whisper/install.sh b/backend/python/faster-whisper/install.sh index 32befa8e6..c93dfe0b5 100755 --- a/backend/python/faster-whisper/install.sh +++ b/backend/python/faster-whisper/install.sh @@ -16,4 +16,14 @@ if [ "x${BUILD_PROFILE}" == "xintel" ]; then EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match" fi +if [ "x${BUILD_PROFILE}" == "xl4t13" ]; then + PYTHON_VERSION="3.12" + PYTHON_PATCH="12" + PY_STANDALONE_TAG="20251120" +fi + +if [ "x${BUILD_PROFILE}" == "xl4t12" ]; then + USE_PIP=true +fi + installRequirements diff --git a/backend/python/faster-whisper/requirements-l4t12.txt b/backend/python/faster-whisper/requirements-l4t12.txt new file mode 100644 index 000000000..64805a359 --- /dev/null +++ b/backend/python/faster-whisper/requirements-l4t12.txt @@ -0,0 +1,3 @@ +--extra-index-url https://pypi.jetson-ai-lab.io/jp6/cu129/ +torch +faster-whisper diff --git a/backend/python/faster-whisper/requirements-l4t13.txt b/backend/python/faster-whisper/requirements-l4t13.txt new file mode 100644 index 000000000..f7438cf20 --- /dev/null +++ b/backend/python/faster-whisper/requirements-l4t13.txt @@ -0,0 +1,3 @@ +--extra-index-url https://download.pytorch.org/whl/cu130 +torch +faster-whisper diff --git a/backend/python/whisperx/install.sh b/backend/python/whisperx/install.sh index 7a3490001..c5a5e7cef 100755 --- a/backend/python/whisperx/install.sh +++ b/backend/python/whisperx/install.sh @@ -8,8 +8,21 @@ else source $backend_dir/../common/libbackend.sh fi -if [ "x${BUILD_PROFILE}" != "xmetal" ] && [ "x${BUILD_PROFILE}" != "xmps" ]; then - EXTRA_PIP_INSTALL_FLAGS+=" --index-strategy unsafe-best-match" +if [ "x${BUILD_PROFILE}" == "xl4t13" ]; then + PYTHON_VERSION="3.12" + PYTHON_PATCH="12" + PY_STANDALONE_TAG="20251120" +fi + +if [ "x${BUILD_PROFILE}" == "xl4t12" ]; then + USE_PIP=true +fi + +# --index-strategy is a uv-only flag; skip it when using pip +if [ "x${USE_PIP}" != "xtrue" ]; then + if [ "x${BUILD_PROFILE}" != "xmetal" ] && [ "x${BUILD_PROFILE}" != "xmps" ]; then + EXTRA_PIP_INSTALL_FLAGS+=" --index-strategy unsafe-best-match" + fi fi installRequirements diff --git a/backend/python/whisperx/requirements-l4t12.txt b/backend/python/whisperx/requirements-l4t12.txt new file mode 100644 index 000000000..45ef64b99 --- /dev/null +++ b/backend/python/whisperx/requirements-l4t12.txt @@ -0,0 +1,3 @@ +--extra-index-url https://pypi.jetson-ai-lab.io/jp6/cu129/ +torch +whisperx @ git+https://github.com/m-bain/whisperX.git diff --git a/backend/python/whisperx/requirements-l4t13.txt b/backend/python/whisperx/requirements-l4t13.txt new file mode 100644 index 000000000..c0e918b87 --- /dev/null +++ b/backend/python/whisperx/requirements-l4t13.txt @@ -0,0 +1,3 @@ +--extra-index-url https://download.pytorch.org/whl/cu130 +torch +whisperx @ git+https://github.com/m-bain/whisperX.git diff --git a/core/gallery/backend_types.go b/core/gallery/backend_types.go index 0fb6e7f24..02ac9b275 100644 --- a/core/gallery/backend_types.go +++ b/core/gallery/backend_types.go @@ -71,6 +71,10 @@ func (m *GalleryBackend) IsCompatibleWith(systemState *system.SystemState) bool return true } + if systemState.CapabilityFilterDisabled() { + return true + } + // Meta backends are compatible if the system capability matches one of the keys if m.IsMeta() { capability := systemState.Capability(m.CapabilitiesMap) diff --git a/pkg/system/capabilities.go b/pkg/system/capabilities.go index 9141d5929..4001ff38e 100644 --- a/pkg/system/capabilities.go +++ b/pkg/system/capabilities.go @@ -18,7 +18,8 @@ const ( Intel = "intel" // Private constants - only used within this package - defaultCapability = "default" + defaultCapability = "default" + disableCapability = "disable" nvidiaL4T = "nvidia-l4t" darwinX86 = "darwin-x86" metal = "metal" @@ -56,6 +57,12 @@ func init() { cuda12DirExists = err == nil } +// CapabilityFilterDisabled returns true when capability-based backend filtering +// is disabled via LOCALAI_FORCE_META_BACKEND_CAPABILITY=disable. +func (s *SystemState) CapabilityFilterDisabled() bool { + return s.getSystemCapabilities() == disableCapability +} + func (s *SystemState) Capability(capMap map[string]string) string { reportedCapability := s.getSystemCapabilities() @@ -196,6 +203,10 @@ func (s *SystemState) DetectedCapability() string { // with the current system capability. This function uses getSystemCapabilities to ensure // consistency with capability detection (including VRAM checks, environment overrides, etc.). func (s *SystemState) IsBackendCompatible(name, uri string) bool { + if s.CapabilityFilterDisabled() { + return true + } + combined := strings.ToLower(name + " " + uri) capability := s.getSystemCapabilities() diff --git a/pkg/system/capabilities_test.go b/pkg/system/capabilities_test.go index a267cf611..eb3a459b5 100644 --- a/pkg/system/capabilities_test.go +++ b/pkg/system/capabilities_test.go @@ -127,3 +127,41 @@ var _ = Describe("getSystemCapabilities", func() { }), ) }) + +var _ = Describe("CapabilityFilterDisabled", func() { + var origEnv string + + BeforeEach(func() { + origEnv = os.Getenv(capabilityEnv) + }) + + AfterEach(func() { + if origEnv != "" { + os.Setenv(capabilityEnv, origEnv) + } else { + os.Unsetenv(capabilityEnv) + } + }) + + It("returns true when capability is set to disable", func() { + os.Setenv(capabilityEnv, "disable") + s := &SystemState{} + Expect(s.CapabilityFilterDisabled()).To(BeTrue()) + }) + + It("returns false when capability is not set to disable", func() { + os.Setenv(capabilityEnv, "nvidia") + s := &SystemState{} + Expect(s.CapabilityFilterDisabled()).To(BeFalse()) + }) + + It("makes IsBackendCompatible return true for all backends when disabled", func() { + os.Setenv(capabilityEnv, "disable") + s := &SystemState{} + Expect(s.IsBackendCompatible("cuda12-whisperx", "quay.io/nvidia-cuda-12")).To(BeTrue()) + Expect(s.IsBackendCompatible("rocm-whisperx", "quay.io/rocm")).To(BeTrue()) + Expect(s.IsBackendCompatible("metal-whisperx", "quay.io/metal-darwin")).To(BeTrue()) + Expect(s.IsBackendCompatible("intel-whisperx", "quay.io/intel-sycl")).To(BeTrue()) + Expect(s.IsBackendCompatible("cpu-whisperx", "quay.io/cpu")).To(BeTrue()) + }) +})