mirror of
https://github.com/mudler/LocalAI.git
synced 2026-04-16 12:59:33 -04:00
fix: try to add whisperx and faster-whisper for more variants (#9278)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
committed by
GitHub
parent
285f7d4340
commit
e00ce981f0
65
.github/workflows/backend.yml
vendored
65
.github/workflows/backend.yml
vendored
@@ -105,6 +105,19 @@ jobs:
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./"
|
||||
ubuntu-version: '2404'
|
||||
- build-type: ''
|
||||
cuda-major-version: ""
|
||||
cuda-minor-version: ""
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-cpu-faster-whisper'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:24.04"
|
||||
skip-drivers: 'true'
|
||||
backend: "faster-whisper"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./"
|
||||
ubuntu-version: '2404'
|
||||
- build-type: ''
|
||||
cuda-major-version: ""
|
||||
cuda-minor-version: ""
|
||||
@@ -965,6 +978,32 @@ jobs:
|
||||
backend: "mlx-distributed"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./"
|
||||
- build-type: 'l4t'
|
||||
cuda-major-version: "13"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/arm64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-nvidia-l4t-cuda-13-arm64-whisperx'
|
||||
runs-on: 'ubuntu-24.04-arm'
|
||||
base-image: "ubuntu:24.04"
|
||||
skip-drivers: 'false'
|
||||
ubuntu-version: '2404'
|
||||
backend: "whisperx"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./"
|
||||
- build-type: 'l4t'
|
||||
cuda-major-version: "13"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/arm64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-nvidia-l4t-cuda-13-arm64-faster-whisper'
|
||||
runs-on: 'ubuntu-24.04-arm'
|
||||
base-image: "ubuntu:24.04"
|
||||
skip-drivers: 'false'
|
||||
ubuntu-version: '2404'
|
||||
backend: "faster-whisper"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "13"
|
||||
cuda-minor-version: "0"
|
||||
@@ -1644,6 +1683,32 @@ jobs:
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./"
|
||||
ubuntu-version: '2204'
|
||||
- build-type: 'l4t'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/arm64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-nvidia-l4t-whisperx'
|
||||
runs-on: 'ubuntu-24.04-arm'
|
||||
base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
|
||||
skip-drivers: 'true'
|
||||
backend: "whisperx"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./"
|
||||
ubuntu-version: '2204'
|
||||
- build-type: 'l4t'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/arm64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-nvidia-l4t-faster-whisper'
|
||||
runs-on: 'ubuntu-24.04-arm'
|
||||
base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
|
||||
skip-drivers: 'true'
|
||||
backend: "faster-whisper"
|
||||
dockerfile: "./backend/Dockerfile.python"
|
||||
context: "./"
|
||||
ubuntu-version: '2204'
|
||||
# SYCL additional backends
|
||||
- build-type: 'intel'
|
||||
cuda-major-version: ""
|
||||
|
||||
2
Makefile
2
Makefile
@@ -428,6 +428,7 @@ prepare-test-extra: protogen-python
|
||||
$(MAKE) -C backend/python/qwen-asr
|
||||
$(MAKE) -C backend/python/nemo
|
||||
$(MAKE) -C backend/python/voxcpm
|
||||
$(MAKE) -C backend/python/faster-whisper
|
||||
$(MAKE) -C backend/python/whisperx
|
||||
$(MAKE) -C backend/python/ace-step
|
||||
$(MAKE) -C backend/python/trl
|
||||
@@ -449,6 +450,7 @@ test-extra: prepare-test-extra
|
||||
$(MAKE) -C backend/python/qwen-asr test
|
||||
$(MAKE) -C backend/python/nemo test
|
||||
$(MAKE) -C backend/python/voxcpm test
|
||||
$(MAKE) -C backend/python/faster-whisper test
|
||||
$(MAKE) -C backend/python/whisperx test
|
||||
$(MAKE) -C backend/python/ace-step test
|
||||
$(MAKE) -C backend/python/trl test
|
||||
|
||||
@@ -400,12 +400,15 @@
|
||||
license: MIT
|
||||
name: "faster-whisper"
|
||||
capabilities:
|
||||
default: "cpu-faster-whisper"
|
||||
nvidia: "cuda12-faster-whisper"
|
||||
intel: "intel-faster-whisper"
|
||||
amd: "rocm-faster-whisper"
|
||||
metal: "metal-faster-whisper"
|
||||
nvidia-cuda-13: "cuda13-faster-whisper"
|
||||
nvidia-cuda-12: "cuda12-faster-whisper"
|
||||
nvidia-l4t: "nvidia-l4t-arm64-faster-whisper"
|
||||
nvidia-l4t-cuda-12: "nvidia-l4t-arm64-faster-whisper"
|
||||
- &moonshine
|
||||
description: |
|
||||
Moonshine is a fast, accurate, and efficient speech-to-text transcription model using ONNX Runtime.
|
||||
@@ -446,6 +449,8 @@
|
||||
default: "cpu-whisperx"
|
||||
nvidia-cuda-13: "cuda13-whisperx"
|
||||
nvidia-cuda-12: "cuda12-whisperx"
|
||||
nvidia-l4t: "nvidia-l4t-arm64-whisperx"
|
||||
nvidia-l4t-cuda-12: "nvidia-l4t-arm64-whisperx"
|
||||
- &kokoro
|
||||
icon: https://avatars.githubusercontent.com/u/166769057?v=4
|
||||
description: |
|
||||
@@ -2082,11 +2087,13 @@
|
||||
- !!merge <<: *faster-whisper
|
||||
name: "faster-whisper-development"
|
||||
capabilities:
|
||||
default: "cpu-faster-whisper-development"
|
||||
nvidia: "cuda12-faster-whisper-development"
|
||||
intel: "intel-faster-whisper-development"
|
||||
amd: "rocm-faster-whisper-development"
|
||||
metal: "metal-faster-whisper-development"
|
||||
nvidia-cuda-13: "cuda13-faster-whisper-development"
|
||||
nvidia-l4t: "nvidia-l4t-arm64-faster-whisper-development"
|
||||
- !!merge <<: *faster-whisper
|
||||
name: "cuda12-faster-whisper-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-faster-whisper"
|
||||
@@ -2127,6 +2134,36 @@
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-faster-whisper"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-metal-darwin-arm64-faster-whisper
|
||||
- !!merge <<: *faster-whisper
|
||||
name: "cuda12-faster-whisper"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-faster-whisper"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-gpu-nvidia-cuda-12-faster-whisper
|
||||
- !!merge <<: *faster-whisper
|
||||
name: "rocm-faster-whisper"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-faster-whisper"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-gpu-rocm-hipblas-faster-whisper
|
||||
- !!merge <<: *faster-whisper
|
||||
name: "cpu-faster-whisper"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-faster-whisper"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-cpu-faster-whisper
|
||||
- !!merge <<: *faster-whisper
|
||||
name: "cpu-faster-whisper-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-faster-whisper"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-cpu-faster-whisper
|
||||
- !!merge <<: *faster-whisper
|
||||
name: "nvidia-l4t-arm64-faster-whisper"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-faster-whisper"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-nvidia-l4t-faster-whisper
|
||||
- !!merge <<: *faster-whisper
|
||||
name: "nvidia-l4t-arm64-faster-whisper-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-faster-whisper"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-nvidia-l4t-faster-whisper
|
||||
## moonshine
|
||||
- !!merge <<: *moonshine
|
||||
name: "moonshine-development"
|
||||
@@ -2185,6 +2222,7 @@
|
||||
default: "cpu-whisperx-development"
|
||||
nvidia-cuda-13: "cuda13-whisperx-development"
|
||||
nvidia-cuda-12: "cuda12-whisperx-development"
|
||||
nvidia-l4t: "nvidia-l4t-arm64-whisperx-development"
|
||||
- !!merge <<: *whisperx
|
||||
name: "cpu-whisperx"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-whisperx"
|
||||
@@ -2235,6 +2273,16 @@
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-whisperx"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-metal-darwin-arm64-whisperx
|
||||
- !!merge <<: *whisperx
|
||||
name: "nvidia-l4t-arm64-whisperx"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-whisperx"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-nvidia-l4t-whisperx
|
||||
- !!merge <<: *whisperx
|
||||
name: "nvidia-l4t-arm64-whisperx-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-whisperx"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-nvidia-l4t-whisperx
|
||||
## coqui
|
||||
|
||||
- !!merge <<: *coqui
|
||||
|
||||
@@ -16,4 +16,14 @@ if [ "x${BUILD_PROFILE}" == "xintel" ]; then
|
||||
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
|
||||
fi
|
||||
|
||||
if [ "x${BUILD_PROFILE}" == "xl4t13" ]; then
|
||||
PYTHON_VERSION="3.12"
|
||||
PYTHON_PATCH="12"
|
||||
PY_STANDALONE_TAG="20251120"
|
||||
fi
|
||||
|
||||
if [ "x${BUILD_PROFILE}" == "xl4t12" ]; then
|
||||
USE_PIP=true
|
||||
fi
|
||||
|
||||
installRequirements
|
||||
|
||||
3
backend/python/faster-whisper/requirements-l4t12.txt
Normal file
3
backend/python/faster-whisper/requirements-l4t12.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
--extra-index-url https://pypi.jetson-ai-lab.io/jp6/cu129/
|
||||
torch
|
||||
faster-whisper
|
||||
3
backend/python/faster-whisper/requirements-l4t13.txt
Normal file
3
backend/python/faster-whisper/requirements-l4t13.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/cu130
|
||||
torch
|
||||
faster-whisper
|
||||
@@ -8,8 +8,21 @@ else
|
||||
source $backend_dir/../common/libbackend.sh
|
||||
fi
|
||||
|
||||
if [ "x${BUILD_PROFILE}" != "xmetal" ] && [ "x${BUILD_PROFILE}" != "xmps" ]; then
|
||||
EXTRA_PIP_INSTALL_FLAGS+=" --index-strategy unsafe-best-match"
|
||||
if [ "x${BUILD_PROFILE}" == "xl4t13" ]; then
|
||||
PYTHON_VERSION="3.12"
|
||||
PYTHON_PATCH="12"
|
||||
PY_STANDALONE_TAG="20251120"
|
||||
fi
|
||||
|
||||
if [ "x${BUILD_PROFILE}" == "xl4t12" ]; then
|
||||
USE_PIP=true
|
||||
fi
|
||||
|
||||
# --index-strategy is a uv-only flag; skip it when using pip
|
||||
if [ "x${USE_PIP}" != "xtrue" ]; then
|
||||
if [ "x${BUILD_PROFILE}" != "xmetal" ] && [ "x${BUILD_PROFILE}" != "xmps" ]; then
|
||||
EXTRA_PIP_INSTALL_FLAGS+=" --index-strategy unsafe-best-match"
|
||||
fi
|
||||
fi
|
||||
|
||||
installRequirements
|
||||
|
||||
3
backend/python/whisperx/requirements-l4t12.txt
Normal file
3
backend/python/whisperx/requirements-l4t12.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
--extra-index-url https://pypi.jetson-ai-lab.io/jp6/cu129/
|
||||
torch
|
||||
whisperx @ git+https://github.com/m-bain/whisperX.git
|
||||
3
backend/python/whisperx/requirements-l4t13.txt
Normal file
3
backend/python/whisperx/requirements-l4t13.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/cu130
|
||||
torch
|
||||
whisperx @ git+https://github.com/m-bain/whisperX.git
|
||||
@@ -71,6 +71,10 @@ func (m *GalleryBackend) IsCompatibleWith(systemState *system.SystemState) bool
|
||||
return true
|
||||
}
|
||||
|
||||
if systemState.CapabilityFilterDisabled() {
|
||||
return true
|
||||
}
|
||||
|
||||
// Meta backends are compatible if the system capability matches one of the keys
|
||||
if m.IsMeta() {
|
||||
capability := systemState.Capability(m.CapabilitiesMap)
|
||||
|
||||
@@ -18,7 +18,8 @@ const (
|
||||
Intel = "intel"
|
||||
|
||||
// Private constants - only used within this package
|
||||
defaultCapability = "default"
|
||||
defaultCapability = "default"
|
||||
disableCapability = "disable"
|
||||
nvidiaL4T = "nvidia-l4t"
|
||||
darwinX86 = "darwin-x86"
|
||||
metal = "metal"
|
||||
@@ -56,6 +57,12 @@ func init() {
|
||||
cuda12DirExists = err == nil
|
||||
}
|
||||
|
||||
// CapabilityFilterDisabled returns true when capability-based backend filtering
|
||||
// is disabled via LOCALAI_FORCE_META_BACKEND_CAPABILITY=disable.
|
||||
func (s *SystemState) CapabilityFilterDisabled() bool {
|
||||
return s.getSystemCapabilities() == disableCapability
|
||||
}
|
||||
|
||||
func (s *SystemState) Capability(capMap map[string]string) string {
|
||||
reportedCapability := s.getSystemCapabilities()
|
||||
|
||||
@@ -196,6 +203,10 @@ func (s *SystemState) DetectedCapability() string {
|
||||
// with the current system capability. This function uses getSystemCapabilities to ensure
|
||||
// consistency with capability detection (including VRAM checks, environment overrides, etc.).
|
||||
func (s *SystemState) IsBackendCompatible(name, uri string) bool {
|
||||
if s.CapabilityFilterDisabled() {
|
||||
return true
|
||||
}
|
||||
|
||||
combined := strings.ToLower(name + " " + uri)
|
||||
capability := s.getSystemCapabilities()
|
||||
|
||||
|
||||
@@ -127,3 +127,41 @@ var _ = Describe("getSystemCapabilities", func() {
|
||||
}),
|
||||
)
|
||||
})
|
||||
|
||||
var _ = Describe("CapabilityFilterDisabled", func() {
|
||||
var origEnv string
|
||||
|
||||
BeforeEach(func() {
|
||||
origEnv = os.Getenv(capabilityEnv)
|
||||
})
|
||||
|
||||
AfterEach(func() {
|
||||
if origEnv != "" {
|
||||
os.Setenv(capabilityEnv, origEnv)
|
||||
} else {
|
||||
os.Unsetenv(capabilityEnv)
|
||||
}
|
||||
})
|
||||
|
||||
It("returns true when capability is set to disable", func() {
|
||||
os.Setenv(capabilityEnv, "disable")
|
||||
s := &SystemState{}
|
||||
Expect(s.CapabilityFilterDisabled()).To(BeTrue())
|
||||
})
|
||||
|
||||
It("returns false when capability is not set to disable", func() {
|
||||
os.Setenv(capabilityEnv, "nvidia")
|
||||
s := &SystemState{}
|
||||
Expect(s.CapabilityFilterDisabled()).To(BeFalse())
|
||||
})
|
||||
|
||||
It("makes IsBackendCompatible return true for all backends when disabled", func() {
|
||||
os.Setenv(capabilityEnv, "disable")
|
||||
s := &SystemState{}
|
||||
Expect(s.IsBackendCompatible("cuda12-whisperx", "quay.io/nvidia-cuda-12")).To(BeTrue())
|
||||
Expect(s.IsBackendCompatible("rocm-whisperx", "quay.io/rocm")).To(BeTrue())
|
||||
Expect(s.IsBackendCompatible("metal-whisperx", "quay.io/metal-darwin")).To(BeTrue())
|
||||
Expect(s.IsBackendCompatible("intel-whisperx", "quay.io/intel-sycl")).To(BeTrue())
|
||||
Expect(s.IsBackendCompatible("cpu-whisperx", "quay.io/cpu")).To(BeTrue())
|
||||
})
|
||||
})
|
||||
|
||||
Reference in New Issue
Block a user