chore(whisperx): drop ROCm/hipblas build target (#9474)

whisperx has no upstream AMD GPU support and its core transcription path (faster-whisper -> ctranslate2) falls back to CPU on AMD since the PyPI ctranslate2 is CUDA-only. The torch rocm wheels would accelerate only the alignment/diarization stages, producing a misleadingly half-working image. Drop the hipblas variant rather than shipping a partially accelerated build users can't distinguish from the real thing. AMD hosts now fall through the capability map to cpu-whisperx / cpu-whisperx-development. Also removes the now-dangling rocm-whisperx assertion from pkg/system/capabilities_test.go and the ROCm mention from the whisperx row in docs/content/reference/compatibility-table.md. Assisted-by: Claude Code:claude-opus-4-7
2026-04-29 03:24:49 -04:00 · 2026-04-21 21:50:18 +02:00
parent a7dbb2a83d
commit 39573ecd2a
5 changed files with 1 additions and 33 deletions
--- a/.github/workflows/backend.yml
+++ b/.github/workflows/backend.yml
@@ -1624,19 +1624,6 @@ jobs:
            dockerfile: "./backend/Dockerfile.python"
            context: "./"
            ubuntu-version: '2404'
-          - build-type: 'hipblas'
-            cuda-major-version: ""
-            cuda-minor-version: ""
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-rocm-hipblas-whisperx'
-            runs-on: 'bigger-runner'
-            base-image: "rocm/dev-ubuntu-24.04:7.2.1"
-            skip-drivers: 'false'
-            backend: "whisperx"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./"
-            ubuntu-version: '2404'
          - build-type: 'hipblas'
            cuda-major-version: ""
            cuda-minor-version: ""
--- a/backend/index.yaml
+++ b/backend/index.yaml
@@ -587,7 +587,6 @@
  alias: "whisperx"
  capabilities:
    nvidia: "cuda12-whisperx"
-    amd: "rocm-whisperx"
    metal: "metal-whisperx"
    default: "cpu-whisperx"
    nvidia-cuda-13: "cuda13-whisperx"
@@ -2745,7 +2744,6 @@
  name: "whisperx-development"
  capabilities:
    nvidia: "cuda12-whisperx-development"
-    amd: "rocm-whisperx-development"
    metal: "metal-whisperx-development"
    default: "cpu-whisperx-development"
    nvidia-cuda-13: "cuda13-whisperx-development"
@@ -2771,16 +2769,6 @@
  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-whisperx"
  mirrors:
    - localai/localai-backends:master-gpu-nvidia-cuda-12-whisperx
- !!merge <<: *whisperx
-  name: "rocm-whisperx"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-whisperx"
-  mirrors:
-    - localai/localai-backends:latest-gpu-rocm-hipblas-whisperx
- !!merge <<: *whisperx
-  name: "rocm-whisperx-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-whisperx"
-  mirrors:
-    - localai/localai-backends:master-gpu-rocm-hipblas-whisperx
 - !!merge <<: *whisperx
  name: "cuda13-whisperx"
  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-13-whisperx"
--- a/backend/python/whisperx/requirements-hipblas.txt
+++ b/backend/python/whisperx/requirements-hipblas.txt
@@ -1,6 +0,0 @@
-# whisperx hard-pins torch~=2.8.0, which is not available in the rocm7.x indexes
-# (they start at torch 2.10). Keep rocm6.4 wheels here — they still load against
-# the rocm7.2.1 runtime via AMD's forward-compatibility window.
--extra-index-url https://download.pytorch.org/whl/rocm6.4
-torch==2.8.0+rocm6.4
-whisperx @ git+https://github.com/m-bain/whisperX.git
--- a/docs/content/reference/compatibility-table.md
+++ b/docs/content/reference/compatibility-table.md
@@ -33,7 +33,7 @@ LocalAI will attempt to automatically load models which are not explicitly confi
 |---------|-------------|-------------|
 | [whisper.cpp](https://github.com/ggml-org/whisper.cpp) | OpenAI Whisper in C/C++ | CPU, CUDA 12/13, ROCm, Intel SYCL, Vulkan, Metal, Jetson L4T |
 | [faster-whisper](https://github.com/SYSTRAN/faster-whisper) | Fast Whisper with CTranslate2 | CUDA 12/13, ROCm, Intel, Metal |
-| [WhisperX](https://github.com/m-bain/whisperX) | Word-level timestamps and speaker diarization | CPU, CUDA 12/13, ROCm, Metal |
+| [WhisperX](https://github.com/m-bain/whisperX) | Word-level timestamps and speaker diarization | CPU, CUDA 12/13, Metal |
 | [moonshine](https://github.com/moonshine-ai/moonshine) | Ultra-fast transcription for low-end devices | CPU, CUDA 12/13, Metal |
 | [voxtral](https://github.com/mudler/voxtral.c) | Voxtral Realtime 4B speech-to-text in C | CPU, Metal |
 | [Qwen3-ASR](https://github.com/QwenLM/Qwen3-ASR) | Qwen3 automatic speech recognition | CPU, CUDA 12/13, ROCm, Intel, Metal, Jetson L4T |
--- a/pkg/system/capabilities_test.go
+++ b/pkg/system/capabilities_test.go
@@ -159,7 +159,6 @@ var _ = Describe("CapabilityFilterDisabled", func() {
 		os.Setenv(capabilityEnv, "disable")
 		s := &SystemState{}
 		Expect(s.IsBackendCompatible("cuda12-whisperx", "quay.io/nvidia-cuda-12")).To(BeTrue())
-		Expect(s.IsBackendCompatible("rocm-whisperx", "quay.io/rocm")).To(BeTrue())
 		Expect(s.IsBackendCompatible("metal-whisperx", "quay.io/metal-darwin")).To(BeTrue())
 		Expect(s.IsBackendCompatible("intel-whisperx", "quay.io/intel-sycl")).To(BeTrue())
 		Expect(s.IsBackendCompatible("cpu-whisperx", "quay.io/cpu")).To(BeTrue())