From e169492543d575d2db56f1ac3c145242045f716e Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Thu, 26 Feb 2026 23:08:24 +0000
Subject: [PATCH] fix: this backend is CUDA only

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 .github/workflows/backend.yml                 | 42 ---------------
 .github/workflows/test-extra.yml              | 19 -------
 backend/index.yaml                            | 54 ++-----------------
 .../faster-qwen3-tts/requirements-cpu.txt     |  4 --
 .../faster-qwen3-tts/requirements-hipblas.txt |  4 --
 .../faster-qwen3-tts/requirements-intel.txt   |  4 --
 .../faster-qwen3-tts/requirements-mps.txt     |  3 --
 7 files changed, 4 insertions(+), 126 deletions(-)
 delete mode 100644 backend/python/faster-qwen3-tts/requirements-cpu.txt
 delete mode 100644 backend/python/faster-qwen3-tts/requirements-hipblas.txt
 delete mode 100644 backend/python/faster-qwen3-tts/requirements-intel.txt
 delete mode 100644 backend/python/faster-qwen3-tts/requirements-mps.txt

diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml
index c33d69f72..b9f9da409 100644
--- a/.github/workflows/backend.yml
+++ b/.github/workflows/backend.yml
@@ -1149,19 +1149,6 @@ jobs:
             dockerfile: "./backend/Dockerfile.python"
             context: "./"
             ubuntu-version: '2404'
-          - build-type: 'hipblas'
-            cuda-major-version: ""
-            cuda-minor-version: ""
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-rocm-hipblas-faster-qwen3-tts'
-            runs-on: 'arc-runner-set'
-            base-image: "rocm/dev-ubuntu-24.04:6.4.4"
-            skip-drivers: 'false'
-            backend: "faster-qwen3-tts"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./"
-            ubuntu-version: '2404'
           - build-type: 'hipblas'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -1515,19 +1502,6 @@ jobs:
             dockerfile: "./backend/Dockerfile.python"
             context: "./"
             ubuntu-version: '2404'
-          - build-type: 'intel'
-            cuda-major-version: ""
-            cuda-minor-version: ""
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-intel-faster-qwen3-tts'
-            runs-on: 'arc-runner-set'
-            base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
-            skip-drivers: 'false'
-            backend: "faster-qwen3-tts"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./"
-            ubuntu-version: '2404'
           - build-type: 'intel'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -1980,19 +1954,6 @@ jobs:
             dockerfile: "./backend/Dockerfile.python"
             context: "./"
             ubuntu-version: '2404'
-          - build-type: ''
-            cuda-major-version: ""
-            cuda-minor-version: ""
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-cpu-faster-qwen3-tts'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:24.04"
-            skip-drivers: 'false'
-            backend: "faster-qwen3-tts"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./"
-            ubuntu-version: '2404'
           - build-type: ''
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -2079,9 +2040,6 @@ jobs:
           - backend: "qwen-tts"
             tag-suffix: "-metal-darwin-arm64-qwen-tts"
             build-type: "mps"
-          - backend: "faster-qwen3-tts"
-            tag-suffix: "-metal-darwin-arm64-faster-qwen3-tts"
-            build-type: "mps"
           - backend: "voxcpm"
             tag-suffix: "-metal-darwin-arm64-voxcpm"
             build-type: "mps"
diff --git a/.github/workflows/test-extra.yml b/.github/workflows/test-extra.yml
index c3cb8abcc..fee41fe7f 100644
--- a/.github/workflows/test-extra.yml
+++ b/.github/workflows/test-extra.yml
@@ -304,25 +304,6 @@ jobs:
         run: |
           make --jobs=5 --output-sync=target -C backend/python/qwen-tts
           make --jobs=5 --output-sync=target -C backend/python/qwen-tts test
-  tests-faster-qwen3-tts:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Clone
-        uses: actions/checkout@v6
-        with:
-          submodules: true
-      - name: Dependencies
-        run: |
-          sudo apt-get update
-          sudo apt-get install -y build-essential ffmpeg
-          sudo apt-get install -y ca-certificates cmake curl patch python3-pip
-          # Install UV
-          curl -LsSf https://astral.sh/uv/install.sh | sh
-          pip install --user --no-cache-dir grpcio-tools==1.64.1
-      - name: Test faster-qwen3-tts
-        run: |
-          make --jobs=5 --output-sync=target -C backend/python/faster-qwen3-tts
-          make --jobs=5 --output-sync=target -C backend/python/faster-qwen3-tts test
   tests-qwen-asr:
     runs-on: ubuntu-latest
     steps:
diff --git a/backend/index.yaml b/backend/index.yaml
index 1ed53165f..e518170ca 100644
--- a/backend/index.yaml
+++ b/backend/index.yaml
@@ -543,13 +543,10 @@
   alias: "faster-qwen3-tts"
   capabilities:
     nvidia: "cuda12-faster-qwen3-tts"
-    intel: "intel-faster-qwen3-tts"
-    amd: "rocm-faster-qwen3-tts"
-    nvidia-l4t: "nvidia-l4t-faster-qwen3-tts"
-    metal: "metal-faster-qwen3-tts"
-    default: "cpu-faster-qwen3-tts"
+    default: "cuda12-faster-qwen3-tts"
     nvidia-cuda-13: "cuda13-faster-qwen3-tts"
     nvidia-cuda-12: "cuda12-faster-qwen3-tts"
+    nvidia-l4t: "nvidia-l4t-faster-qwen3-tts"
     nvidia-l4t-cuda-12: "nvidia-l4t-faster-qwen3-tts"
     nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-faster-qwen3-tts"
   icon: https://cdn-avatars.huggingface.co/v1/production/uploads/620760a26e3b7210c2ff1943/-s1gyJfvbE1RgO5iBeNOi.png
@@ -2309,25 +2306,12 @@
   name: "faster-qwen3-tts-development"
   capabilities:
     nvidia: "cuda12-faster-qwen3-tts-development"
-    intel: "intel-faster-qwen3-tts-development"
-    amd: "rocm-faster-qwen3-tts-development"
-    nvidia-l4t: "nvidia-l4t-faster-qwen3-tts-development"
-    metal: "metal-faster-qwen3-tts-development"
-    default: "cpu-faster-qwen3-tts-development"
+    default: "cuda12-faster-qwen3-tts-development"
     nvidia-cuda-13: "cuda13-faster-qwen3-tts-development"
     nvidia-cuda-12: "cuda12-faster-qwen3-tts-development"
+    nvidia-l4t: "nvidia-l4t-faster-qwen3-tts-development"
     nvidia-l4t-cuda-12: "nvidia-l4t-faster-qwen3-tts-development"
     nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-faster-qwen3-tts-development"
-- !!merge <<: *faster-qwen3-tts
-  name: "cpu-faster-qwen3-tts"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-faster-qwen3-tts"
-  mirrors:
-    - localai/localai-backends:latest-cpu-faster-qwen3-tts
-- !!merge <<: *faster-qwen3-tts
-  name: "cpu-faster-qwen3-tts-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-cpu-faster-qwen3-tts"
-  mirrors:
-    - localai/localai-backends:master-cpu-faster-qwen3-tts
 - !!merge <<: *faster-qwen3-tts
   name: "cuda12-faster-qwen3-tts"
   uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-faster-qwen3-tts"
@@ -2348,26 +2332,6 @@
   uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-faster-qwen3-tts"
   mirrors:
     - localai/localai-backends:master-gpu-nvidia-cuda-13-faster-qwen3-tts
-- !!merge <<: *faster-qwen3-tts
-  name: "intel-faster-qwen3-tts"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-faster-qwen3-tts"
-  mirrors:
-    - localai/localai-backends:latest-gpu-intel-faster-qwen3-tts
-- !!merge <<: *faster-qwen3-tts
-  name: "intel-faster-qwen3-tts-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-faster-qwen3-tts"
-  mirrors:
-    - localai/localai-backends:master-gpu-intel-faster-qwen3-tts
-- !!merge <<: *faster-qwen3-tts
-  name: "rocm-faster-qwen3-tts"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-faster-qwen3-tts"
-  mirrors:
-    - localai/localai-backends:latest-gpu-rocm-hipblas-faster-qwen3-tts
-- !!merge <<: *faster-qwen3-tts
-  name: "rocm-faster-qwen3-tts-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-faster-qwen3-tts"
-  mirrors:
-    - localai/localai-backends:master-gpu-rocm-hipblas-faster-qwen3-tts
 - !!merge <<: *faster-qwen3-tts
   name: "nvidia-l4t-faster-qwen3-tts"
   uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-faster-qwen3-tts"
@@ -2388,16 +2352,6 @@
   uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-cuda-13-arm64-faster-qwen3-tts"
   mirrors:
     - localai/localai-backends:master-nvidia-l4t-cuda-13-arm64-faster-qwen3-tts
-- !!merge <<: *faster-qwen3-tts
-  name: "metal-faster-qwen3-tts"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-faster-qwen3-tts"
-  mirrors:
-    - localai/localai-backends:latest-metal-darwin-arm64-faster-qwen3-tts
-- !!merge <<: *faster-qwen3-tts
-  name: "metal-faster-qwen3-tts-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-faster-qwen3-tts"
-  mirrors:
-    - localai/localai-backends:master-metal-darwin-arm64-faster-qwen3-tts
 ## qwen-asr
 - !!merge <<: *qwen-asr
   name: "qwen-asr-development"
diff --git a/backend/python/faster-qwen3-tts/requirements-cpu.txt b/backend/python/faster-qwen3-tts/requirements-cpu.txt
deleted file mode 100644
index cef3137a1..000000000
--- a/backend/python/faster-qwen3-tts/requirements-cpu.txt
+++ /dev/null
@@ -1,4 +0,0 @@
---extra-index-url https://download.pytorch.org/whl/cpu
-torch
-torchaudio
-faster-qwen3-tts
diff --git a/backend/python/faster-qwen3-tts/requirements-hipblas.txt b/backend/python/faster-qwen3-tts/requirements-hipblas.txt
deleted file mode 100644
index 10524cf84..000000000
--- a/backend/python/faster-qwen3-tts/requirements-hipblas.txt
+++ /dev/null
@@ -1,4 +0,0 @@
---extra-index-url https://download.pytorch.org/whl/rocm6.3
-torch==2.7.1+rocm6.3
-torchaudio==2.7.1+rocm6.3
-faster-qwen3-tts
diff --git a/backend/python/faster-qwen3-tts/requirements-intel.txt b/backend/python/faster-qwen3-tts/requirements-intel.txt
deleted file mode 100644
index d6f3433bf..000000000
--- a/backend/python/faster-qwen3-tts/requirements-intel.txt
+++ /dev/null
@@ -1,4 +0,0 @@
---extra-index-url https://download.pytorch.org/whl/xpu
-torch
-torchaudio
-faster-qwen3-tts
diff --git a/backend/python/faster-qwen3-tts/requirements-mps.txt b/backend/python/faster-qwen3-tts/requirements-mps.txt
deleted file mode 100644
index 21b7a4eab..000000000
--- a/backend/python/faster-qwen3-tts/requirements-mps.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-torch
-torchaudio
-faster-qwen3-tts