From dfc6efb88d7188d008abd99537df3c5563fc89f4 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Fri, 27 Feb 2026 08:16:51 +0100 Subject: [PATCH] feat(backends): add faster-qwen3-tts (#8664) * feat(backends): add faster-qwen3-tts Signed-off-by: Ettore Di Giacinto * fix: this backend is CUDA only Signed-off-by: Ettore Di Giacinto * fix: add requirements-install.txt with setuptools for build isolation The faster-qwen3-tts backend requires setuptools to build packages like sox that have setuptools as a build dependency. This ensures the build completes successfully in CI. Signed-off-by: LocalAI Bot --------- Signed-off-by: Ettore Di Giacinto Signed-off-by: LocalAI Bot Co-authored-by: Ettore Di Giacinto --- .github/workflows/backend.yml | 54 ++++- Makefile | 8 +- backend/index.yaml | 73 +++++++ backend/python/faster-qwen3-tts/Makefile | 23 +++ backend/python/faster-qwen3-tts/backend.py | 193 ++++++++++++++++++ backend/python/faster-qwen3-tts/install.sh | 13 ++ .../requirements-cublas12.txt | 4 + .../requirements-cublas13.txt | 4 + .../faster-qwen3-tts/requirements-install.txt | 1 + .../faster-qwen3-tts/requirements-l4t12.txt | 4 + .../faster-qwen3-tts/requirements-l4t13.txt | 4 + .../python/faster-qwen3-tts/requirements.txt | 8 + backend/python/faster-qwen3-tts/run.sh | 9 + backend/python/faster-qwen3-tts/test.py | 104 ++++++++++ backend/python/faster-qwen3-tts/test.sh | 11 + 15 files changed, 510 insertions(+), 3 deletions(-) create mode 100644 backend/python/faster-qwen3-tts/Makefile create mode 100644 backend/python/faster-qwen3-tts/backend.py create mode 100644 backend/python/faster-qwen3-tts/install.sh create mode 100644 backend/python/faster-qwen3-tts/requirements-cublas12.txt create mode 100644 backend/python/faster-qwen3-tts/requirements-cublas13.txt create mode 100644 backend/python/faster-qwen3-tts/requirements-install.txt create mode 100644 backend/python/faster-qwen3-tts/requirements-l4t12.txt create mode 100644 backend/python/faster-qwen3-tts/requirements-l4t13.txt create mode 100644 backend/python/faster-qwen3-tts/requirements.txt create mode 100644 backend/python/faster-qwen3-tts/run.sh create mode 100644 backend/python/faster-qwen3-tts/test.py create mode 100644 backend/python/faster-qwen3-tts/test.sh diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml index 18ced610a..b9f9da409 100644 --- a/.github/workflows/backend.yml +++ b/.github/workflows/backend.yml @@ -210,6 +210,19 @@ jobs: dockerfile: "./backend/Dockerfile.python" context: "./" ubuntu-version: '2404' + - build-type: 'cublas' + cuda-major-version: "12" + cuda-minor-version: "8" + platforms: 'linux/amd64' + tag-latest: 'auto' + tag-suffix: '-gpu-nvidia-cuda-12-faster-qwen3-tts' + runs-on: 'ubuntu-latest' + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "faster-qwen3-tts" + dockerfile: "./backend/Dockerfile.python" + context: "./" + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "8" @@ -575,6 +588,19 @@ jobs: dockerfile: "./backend/Dockerfile.python" context: "./" ubuntu-version: '2404' + - build-type: 'cublas' + cuda-major-version: "13" + cuda-minor-version: "0" + platforms: 'linux/amd64' + tag-latest: 'auto' + tag-suffix: '-gpu-nvidia-cuda-13-faster-qwen3-tts' + runs-on: 'ubuntu-latest' + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "faster-qwen3-tts" + dockerfile: "./backend/Dockerfile.python" + context: "./" + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "13" cuda-minor-version: "0" @@ -705,6 +731,19 @@ jobs: backend: "qwen-tts" dockerfile: "./backend/Dockerfile.python" context: "./" + - build-type: 'l4t' + cuda-major-version: "13" + cuda-minor-version: "0" + platforms: 'linux/arm64' + tag-latest: 'auto' + tag-suffix: '-nvidia-l4t-cuda-13-arm64-faster-qwen3-tts' + runs-on: 'ubuntu-24.04-arm' + base-image: "ubuntu:24.04" + skip-drivers: 'false' + ubuntu-version: '2404' + backend: "faster-qwen3-tts" + dockerfile: "./backend/Dockerfile.python" + context: "./" - build-type: 'l4t' cuda-major-version: "13" cuda-minor-version: "0" @@ -1306,6 +1345,19 @@ jobs: dockerfile: "./backend/Dockerfile.python" context: "./" ubuntu-version: '2204' + - build-type: 'l4t' + cuda-major-version: "12" + cuda-minor-version: "0" + platforms: 'linux/arm64' + tag-latest: 'auto' + tag-suffix: '-nvidia-l4t-faster-qwen3-tts' + runs-on: 'ubuntu-24.04-arm' + base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" + skip-drivers: 'true' + backend: "faster-qwen3-tts" + dockerfile: "./backend/Dockerfile.python" + context: "./" + ubuntu-version: '2204' - build-type: 'l4t' cuda-major-version: "12" cuda-minor-version: "0" @@ -1905,7 +1957,7 @@ jobs: - build-type: '' cuda-major-version: "" cuda-minor-version: "" - platforms: 'linux/amd64' + platforms: 'linux/amd64,linux/arm64' tag-latest: 'auto' tag-suffix: '-cpu-voxcpm' runs-on: 'ubuntu-latest' diff --git a/Makefile b/Makefile index daefa00c7..704124c9a 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ # Disable parallel execution for backend builds -.NOTPARALLEL: backends/diffusers backends/llama-cpp backends/outetts backends/piper backends/stablediffusion-ggml backends/whisper backends/faster-whisper backends/silero-vad backends/local-store backends/huggingface backends/rfdetr backends/kitten-tts backends/kokoro backends/chatterbox backends/llama-cpp-darwin backends/neutts build-darwin-python-backend build-darwin-go-backend backends/mlx backends/diffuser-darwin backends/mlx-vlm backends/mlx-audio backends/stablediffusion-ggml-darwin backends/vllm backends/vllm-omni backends/moonshine backends/pocket-tts backends/qwen-tts backends/qwen-asr backends/nemo backends/voxcpm backends/whisperx backends/ace-step backends/voxtral +.NOTPARALLEL: backends/diffusers backends/llama-cpp backends/outetts backends/piper backends/stablediffusion-ggml backends/whisper backends/faster-whisper backends/silero-vad backends/local-store backends/huggingface backends/rfdetr backends/kitten-tts backends/kokoro backends/chatterbox backends/llama-cpp-darwin backends/neutts build-darwin-python-backend build-darwin-go-backend backends/mlx backends/diffuser-darwin backends/mlx-vlm backends/mlx-audio backends/stablediffusion-ggml-darwin backends/vllm backends/vllm-omni backends/moonshine backends/pocket-tts backends/qwen-tts backends/faster-qwen3-tts backends/qwen-asr backends/nemo backends/voxcpm backends/whisperx backends/ace-step backends/voxtral GOCMD=go GOTEST=$(GOCMD) test @@ -317,6 +317,7 @@ prepare-test-extra: protogen-python $(MAKE) -C backend/python/moonshine $(MAKE) -C backend/python/pocket-tts $(MAKE) -C backend/python/qwen-tts + $(MAKE) -C backend/python/faster-qwen3-tts $(MAKE) -C backend/python/qwen-asr $(MAKE) -C backend/python/nemo $(MAKE) -C backend/python/voxcpm @@ -334,6 +335,7 @@ test-extra: prepare-test-extra $(MAKE) -C backend/python/moonshine test $(MAKE) -C backend/python/pocket-tts test $(MAKE) -C backend/python/qwen-tts test + $(MAKE) -C backend/python/faster-qwen3-tts test $(MAKE) -C backend/python/qwen-asr test $(MAKE) -C backend/python/nemo test $(MAKE) -C backend/python/voxcpm test @@ -473,6 +475,7 @@ BACKEND_VIBEVOICE = vibevoice|python|.|--progress=plain|true BACKEND_MOONSHINE = moonshine|python|.|false|true BACKEND_POCKET_TTS = pocket-tts|python|.|false|true BACKEND_QWEN_TTS = qwen-tts|python|.|false|true +BACKEND_FASTER_QWEN3_TTS = faster-qwen3-tts|python|.|false|true BACKEND_QWEN_ASR = qwen-asr|python|.|false|true BACKEND_NEMO = nemo|python|.|false|true BACKEND_VOXCPM = voxcpm|python|.|false|true @@ -525,6 +528,7 @@ $(eval $(call generate-docker-build-target,$(BACKEND_VIBEVOICE))) $(eval $(call generate-docker-build-target,$(BACKEND_MOONSHINE))) $(eval $(call generate-docker-build-target,$(BACKEND_POCKET_TTS))) $(eval $(call generate-docker-build-target,$(BACKEND_QWEN_TTS))) +$(eval $(call generate-docker-build-target,$(BACKEND_FASTER_QWEN3_TTS))) $(eval $(call generate-docker-build-target,$(BACKEND_QWEN_ASR))) $(eval $(call generate-docker-build-target,$(BACKEND_NEMO))) $(eval $(call generate-docker-build-target,$(BACKEND_VOXCPM))) @@ -535,7 +539,7 @@ $(eval $(call generate-docker-build-target,$(BACKEND_ACE_STEP))) docker-save-%: backend-images docker save local-ai-backend:$* -o backend-images/$*.tar -docker-build-backends: docker-build-llama-cpp docker-build-rerankers docker-build-vllm docker-build-vllm-omni docker-build-transformers docker-build-outetts docker-build-diffusers docker-build-kokoro docker-build-faster-whisper docker-build-coqui docker-build-chatterbox docker-build-vibevoice docker-build-moonshine docker-build-pocket-tts docker-build-qwen-tts docker-build-qwen-asr docker-build-nemo docker-build-voxcpm docker-build-whisperx docker-build-ace-step docker-build-voxtral +docker-build-backends: docker-build-llama-cpp docker-build-rerankers docker-build-vllm docker-build-vllm-omni docker-build-transformers docker-build-outetts docker-build-diffusers docker-build-kokoro docker-build-faster-whisper docker-build-coqui docker-build-chatterbox docker-build-vibevoice docker-build-moonshine docker-build-pocket-tts docker-build-qwen-tts docker-build-faster-qwen3-tts docker-build-qwen-asr docker-build-nemo docker-build-voxcpm docker-build-whisperx docker-build-ace-step docker-build-voxtral ######################################################## ### Mock Backend for E2E Tests diff --git a/backend/index.yaml b/backend/index.yaml index 428454603..e518170ca 100644 --- a/backend/index.yaml +++ b/backend/index.yaml @@ -528,6 +528,28 @@ nvidia-l4t-cuda-12: "nvidia-l4t-qwen-tts" nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-qwen-tts" icon: https://cdn-avatars.huggingface.co/v1/production/uploads/620760a26e3b7210c2ff1943/-s1gyJfvbE1RgO5iBeNOi.png +- &faster-qwen3-tts + urls: + - https://github.com/andimarafioti/faster-qwen3-tts + - https://pypi.org/project/faster-qwen3-tts/ + description: | + Real-time Qwen3-TTS inference using CUDA graph capture. Voice clone only; requires NVIDIA GPU with CUDA. + tags: + - text-to-speech + - TTS + - voice-clone + license: apache-2.0 + name: "faster-qwen3-tts" + alias: "faster-qwen3-tts" + capabilities: + nvidia: "cuda12-faster-qwen3-tts" + default: "cuda12-faster-qwen3-tts" + nvidia-cuda-13: "cuda13-faster-qwen3-tts" + nvidia-cuda-12: "cuda12-faster-qwen3-tts" + nvidia-l4t: "nvidia-l4t-faster-qwen3-tts" + nvidia-l4t-cuda-12: "nvidia-l4t-faster-qwen3-tts" + nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-faster-qwen3-tts" + icon: https://cdn-avatars.huggingface.co/v1/production/uploads/620760a26e3b7210c2ff1943/-s1gyJfvbE1RgO5iBeNOi.png - &qwen-asr urls: - https://github.com/QwenLM/Qwen3-ASR @@ -2279,6 +2301,57 @@ uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-qwen-tts" mirrors: - localai/localai-backends:master-metal-darwin-arm64-qwen-tts +## faster-qwen3-tts +- !!merge <<: *faster-qwen3-tts + name: "faster-qwen3-tts-development" + capabilities: + nvidia: "cuda12-faster-qwen3-tts-development" + default: "cuda12-faster-qwen3-tts-development" + nvidia-cuda-13: "cuda13-faster-qwen3-tts-development" + nvidia-cuda-12: "cuda12-faster-qwen3-tts-development" + nvidia-l4t: "nvidia-l4t-faster-qwen3-tts-development" + nvidia-l4t-cuda-12: "nvidia-l4t-faster-qwen3-tts-development" + nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-faster-qwen3-tts-development" +- !!merge <<: *faster-qwen3-tts + name: "cuda12-faster-qwen3-tts" + uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-faster-qwen3-tts" + mirrors: + - localai/localai-backends:latest-gpu-nvidia-cuda-12-faster-qwen3-tts +- !!merge <<: *faster-qwen3-tts + name: "cuda12-faster-qwen3-tts-development" + uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-faster-qwen3-tts" + mirrors: + - localai/localai-backends:master-gpu-nvidia-cuda-12-faster-qwen3-tts +- !!merge <<: *faster-qwen3-tts + name: "cuda13-faster-qwen3-tts" + uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-13-faster-qwen3-tts" + mirrors: + - localai/localai-backends:latest-gpu-nvidia-cuda-13-faster-qwen3-tts +- !!merge <<: *faster-qwen3-tts + name: "cuda13-faster-qwen3-tts-development" + uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-faster-qwen3-tts" + mirrors: + - localai/localai-backends:master-gpu-nvidia-cuda-13-faster-qwen3-tts +- !!merge <<: *faster-qwen3-tts + name: "nvidia-l4t-faster-qwen3-tts" + uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-faster-qwen3-tts" + mirrors: + - localai/localai-backends:latest-nvidia-l4t-faster-qwen3-tts +- !!merge <<: *faster-qwen3-tts + name: "nvidia-l4t-faster-qwen3-tts-development" + uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-faster-qwen3-tts" + mirrors: + - localai/localai-backends:master-nvidia-l4t-faster-qwen3-tts +- !!merge <<: *faster-qwen3-tts + name: "cuda13-nvidia-l4t-arm64-faster-qwen3-tts" + uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-cuda-13-arm64-faster-qwen3-tts" + mirrors: + - localai/localai-backends:latest-nvidia-l4t-cuda-13-arm64-faster-qwen3-tts +- !!merge <<: *faster-qwen3-tts + name: "cuda13-nvidia-l4t-arm64-faster-qwen3-tts-development" + uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-cuda-13-arm64-faster-qwen3-tts" + mirrors: + - localai/localai-backends:master-nvidia-l4t-cuda-13-arm64-faster-qwen3-tts ## qwen-asr - !!merge <<: *qwen-asr name: "qwen-asr-development" diff --git a/backend/python/faster-qwen3-tts/Makefile b/backend/python/faster-qwen3-tts/Makefile new file mode 100644 index 000000000..a1f9a37df --- /dev/null +++ b/backend/python/faster-qwen3-tts/Makefile @@ -0,0 +1,23 @@ +.PHONY: faster-qwen3-tts +faster-qwen3-tts: + bash install.sh + +.PHONY: run +run: faster-qwen3-tts + @echo "Running faster-qwen3-tts..." + bash run.sh + @echo "faster-qwen3-tts run." + +.PHONY: test +test: faster-qwen3-tts + @echo "Testing faster-qwen3-tts..." + bash test.sh + @echo "faster-qwen3-tts tested." + +.PHONY: protogen-clean +protogen-clean: + $(RM) backend_pb2_grpc.py backend_pb2.py + +.PHONY: clean +clean: protogen-clean + rm -rf venv __pycache__ diff --git a/backend/python/faster-qwen3-tts/backend.py b/backend/python/faster-qwen3-tts/backend.py new file mode 100644 index 000000000..d3bec3247 --- /dev/null +++ b/backend/python/faster-qwen3-tts/backend.py @@ -0,0 +1,193 @@ +#!/usr/bin/env python3 +""" +gRPC server of LocalAI for Faster Qwen3-TTS (CUDA graph capture, voice clone only). +""" +from concurrent import futures +import time +import argparse +import signal +import sys +import os +import traceback +import backend_pb2 +import backend_pb2_grpc +import torch +import soundfile as sf + +import grpc + + +def is_float(s): + try: + float(s) + return True + except ValueError: + return False + + +def is_int(s): + try: + int(s) + return True + except ValueError: + return False + + +_ONE_DAY_IN_SECONDS = 60 * 60 * 24 +MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1')) + + +class BackendServicer(backend_pb2_grpc.BackendServicer): + def Health(self, request, context): + return backend_pb2.Reply(message=bytes("OK", 'utf-8')) + + def LoadModel(self, request, context): + if not torch.cuda.is_available(): + return backend_pb2.Result( + success=False, + message="faster-qwen3-tts requires NVIDIA GPU with CUDA" + ) + + self.options = {} + for opt in request.Options: + if ":" not in opt: + continue + key, value = opt.split(":", 1) + if is_float(value): + value = float(value) + elif is_int(value): + value = int(value) + elif value.lower() in ["true", "false"]: + value = value.lower() == "true" + self.options[key] = value + + model_path = request.Model or "Qwen/Qwen3-TTS-12Hz-0.6B-Base" + self.audio_path = request.AudioPath if hasattr(request, 'AudioPath') and request.AudioPath else None + self.model_file = request.ModelFile if hasattr(request, 'ModelFile') and request.ModelFile else None + self.model_path = request.ModelPath if hasattr(request, 'ModelPath') and request.ModelPath else None + + from faster_qwen3_tts import FasterQwen3TTS + print(f"Loading model from: {model_path}", file=sys.stderr) + try: + self.model = FasterQwen3TTS.from_pretrained(model_path) + except Exception as e: + print(f"[ERROR] Loading model: {type(e).__name__}: {e}", file=sys.stderr) + print(traceback.format_exc(), file=sys.stderr) + return backend_pb2.Result(success=False, message=str(e)) + + print(f"Model loaded successfully: {model_path}", file=sys.stderr) + return backend_pb2.Result(message="Model loaded successfully", success=True) + + def _get_ref_audio_path(self, request): + if not self.audio_path: + return None + if os.path.isabs(self.audio_path): + return self.audio_path + if self.model_file: + model_file_base = os.path.dirname(self.model_file) + ref_path = os.path.join(model_file_base, self.audio_path) + if os.path.exists(ref_path): + return ref_path + if self.model_path: + ref_path = os.path.join(self.model_path, self.audio_path) + if os.path.exists(ref_path): + return ref_path + return self.audio_path + + def TTS(self, request, context): + try: + if not request.dst: + return backend_pb2.Result( + success=False, + message="dst (output path) is required" + ) + text = request.text.strip() + if not text: + return backend_pb2.Result( + success=False, + message="Text is empty" + ) + + language = request.language if hasattr(request, 'language') and request.language else None + if not language or language == "": + language = "English" + + ref_audio = self._get_ref_audio_path(request) + if not ref_audio: + return backend_pb2.Result( + success=False, + message="AudioPath is required for voice clone (set in LoadModel)" + ) + ref_text = self.options.get("ref_text") + if not ref_text and hasattr(request, 'ref_text') and request.ref_text: + ref_text = request.ref_text + if not ref_text: + return backend_pb2.Result( + success=False, + message="ref_text is required for voice clone (set via LoadModel Options, e.g. ref_text:Your reference transcript)" + ) + + chunk_size = self.options.get("chunk_size") + generation_kwargs = {} + if chunk_size is not None: + generation_kwargs["chunk_size"] = int(chunk_size) + + audio_list, sr = self.model.generate_voice_clone( + text=text, + language=language, + ref_audio=ref_audio, + ref_text=ref_text, + **generation_kwargs + ) + + if audio_list is None or (isinstance(audio_list, list) and len(audio_list) == 0): + return backend_pb2.Result( + success=False, + message="No audio output generated" + ) + audio_data = audio_list[0] if isinstance(audio_list, list) else audio_list + sf.write(request.dst, audio_data, sr) + print(f"Saved output to {request.dst}", file=sys.stderr) + + except Exception as err: + print(f"Error in TTS: {err}", file=sys.stderr) + print(traceback.format_exc(), file=sys.stderr) + return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}") + + return backend_pb2.Result(success=True) + + +def serve(address): + server = grpc.server( + futures.ThreadPoolExecutor(max_workers=MAX_WORKERS), + options=[ + ('grpc.max_message_length', 50 * 1024 * 1024), + ('grpc.max_send_message_length', 50 * 1024 * 1024), + ('grpc.max_receive_message_length', 50 * 1024 * 1024), + ] + ) + backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server) + server.add_insecure_port(address) + server.start() + print("Server started. Listening on: " + address, file=sys.stderr) + + def signal_handler(sig, frame): + print("Received termination signal. Shutting down...") + server.stop(0) + sys.exit(0) + + signal.signal(signal.SIGINT, signal_handler) + signal.signal(signal.SIGTERM, signal_handler) + + try: + while True: + time.sleep(_ONE_DAY_IN_SECONDS) + except KeyboardInterrupt: + server.stop(0) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Run the gRPC server.") + parser.add_argument("--addr", default="localhost:50051", help="The address to bind the server to.") + args = parser.parse_args() + serve(args.addr) diff --git a/backend/python/faster-qwen3-tts/install.sh b/backend/python/faster-qwen3-tts/install.sh new file mode 100644 index 000000000..b7d487873 --- /dev/null +++ b/backend/python/faster-qwen3-tts/install.sh @@ -0,0 +1,13 @@ +#!/bin/bash +set -e + +EXTRA_PIP_INSTALL_FLAGS="--no-build-isolation" + +backend_dir=$(dirname $0) +if [ -d $backend_dir/common ]; then + source $backend_dir/common/libbackend.sh +else + source $backend_dir/../common/libbackend.sh +fi + +installRequirements diff --git a/backend/python/faster-qwen3-tts/requirements-cublas12.txt b/backend/python/faster-qwen3-tts/requirements-cublas12.txt new file mode 100644 index 000000000..4ccb3f6f8 --- /dev/null +++ b/backend/python/faster-qwen3-tts/requirements-cublas12.txt @@ -0,0 +1,4 @@ +--extra-index-url https://download.pytorch.org/whl/cu121 +torch +torchaudio +faster-qwen3-tts diff --git a/backend/python/faster-qwen3-tts/requirements-cublas13.txt b/backend/python/faster-qwen3-tts/requirements-cublas13.txt new file mode 100644 index 000000000..f802d5413 --- /dev/null +++ b/backend/python/faster-qwen3-tts/requirements-cublas13.txt @@ -0,0 +1,4 @@ +--extra-index-url https://download.pytorch.org/whl/cu130 +torch +torchaudio +faster-qwen3-tts diff --git a/backend/python/faster-qwen3-tts/requirements-install.txt b/backend/python/faster-qwen3-tts/requirements-install.txt new file mode 100644 index 000000000..49fe098d9 --- /dev/null +++ b/backend/python/faster-qwen3-tts/requirements-install.txt @@ -0,0 +1 @@ +setuptools diff --git a/backend/python/faster-qwen3-tts/requirements-l4t12.txt b/backend/python/faster-qwen3-tts/requirements-l4t12.txt new file mode 100644 index 000000000..078c26026 --- /dev/null +++ b/backend/python/faster-qwen3-tts/requirements-l4t12.txt @@ -0,0 +1,4 @@ +--extra-index-url https://pypi.jetson-ai-lab.io/jp6/cu129/ +torch +torchaudio +faster-qwen3-tts diff --git a/backend/python/faster-qwen3-tts/requirements-l4t13.txt b/backend/python/faster-qwen3-tts/requirements-l4t13.txt new file mode 100644 index 000000000..f802d5413 --- /dev/null +++ b/backend/python/faster-qwen3-tts/requirements-l4t13.txt @@ -0,0 +1,4 @@ +--extra-index-url https://download.pytorch.org/whl/cu130 +torch +torchaudio +faster-qwen3-tts diff --git a/backend/python/faster-qwen3-tts/requirements.txt b/backend/python/faster-qwen3-tts/requirements.txt new file mode 100644 index 000000000..67dfc9d05 --- /dev/null +++ b/backend/python/faster-qwen3-tts/requirements.txt @@ -0,0 +1,8 @@ +grpcio==1.71.0 +protobuf +certifi +packaging==24.1 +soundfile +setuptools +six +sox diff --git a/backend/python/faster-qwen3-tts/run.sh b/backend/python/faster-qwen3-tts/run.sh new file mode 100644 index 000000000..eae121f37 --- /dev/null +++ b/backend/python/faster-qwen3-tts/run.sh @@ -0,0 +1,9 @@ +#!/bin/bash +backend_dir=$(dirname $0) +if [ -d $backend_dir/common ]; then + source $backend_dir/common/libbackend.sh +else + source $backend_dir/../common/libbackend.sh +fi + +startBackend $@ diff --git a/backend/python/faster-qwen3-tts/test.py b/backend/python/faster-qwen3-tts/test.py new file mode 100644 index 000000000..f13af4bf9 --- /dev/null +++ b/backend/python/faster-qwen3-tts/test.py @@ -0,0 +1,104 @@ +""" +Tests for the faster-qwen3-tts gRPC backend. +""" +import unittest +import subprocess +import time +import os +import sys +import tempfile +import backend_pb2 +import backend_pb2_grpc +import grpc + + +class TestBackendServicer(unittest.TestCase): + def setUp(self): + self.service = subprocess.Popen( + ["python3", "backend.py", "--addr", "localhost:50052"], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + cwd=os.path.dirname(os.path.abspath(__file__)), + ) + time.sleep(15) + + def tearDown(self): + self.service.terminate() + try: + self.service.communicate(timeout=5) + except subprocess.TimeoutExpired: + self.service.kill() + self.service.communicate() + + def test_health(self): + with grpc.insecure_channel("localhost:50052") as channel: + stub = backend_pb2_grpc.BackendStub(channel) + reply = stub.Health(backend_pb2.HealthMessage(), timeout=5.0) + self.assertEqual(reply.message, b"OK") + + def test_load_model_requires_cuda(self): + with grpc.insecure_channel("localhost:50052") as channel: + stub = backend_pb2_grpc.BackendStub(channel) + response = stub.LoadModel( + backend_pb2.ModelOptions( + Model="Qwen/Qwen3-TTS-12Hz-0.6B-Base", + CUDA=True, + ), + timeout=10.0, + ) + self.assertFalse(response.success) + + @unittest.skipUnless( + __import__("torch").cuda.is_available(), + "faster-qwen3-tts TTS requires CUDA", + ) + def test_tts(self): + import soundfile as sf + try: + with grpc.insecure_channel("localhost:50052") as channel: + stub = backend_pb2_grpc.BackendStub(channel) + ref_audio = tempfile.NamedTemporaryFile(suffix='.wav', delete=False) + ref_audio.close() + try: + sr = 22050 + duration = 1.0 + samples = int(sr * duration) + sf.write(ref_audio.name, [0.0] * samples, sr) + + response = stub.LoadModel( + backend_pb2.ModelOptions( + Model="Qwen/Qwen3-TTS-12Hz-0.6B-Base", + AudioPath=ref_audio.name, + Options=["ref_text:Hello world"], + ), + timeout=600.0, + ) + self.assertTrue(response.success, response.message) + + with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as out: + output_path = out.name + try: + tts_response = stub.TTS( + backend_pb2.TTSRequest( + text="Test output.", + dst=output_path, + language="English", + ), + timeout=120.0, + ) + self.assertTrue(tts_response.success, tts_response.message) + self.assertTrue(os.path.exists(output_path)) + self.assertGreater(os.path.getsize(output_path), 0) + finally: + if os.path.exists(output_path): + os.unlink(output_path) + finally: + if os.path.exists(ref_audio.name): + os.unlink(ref_audio.name) + except Exception as err: + self.fail(f"TTS test failed: {err}") + + +if __name__ == "__main__": + unittest.main() diff --git a/backend/python/faster-qwen3-tts/test.sh b/backend/python/faster-qwen3-tts/test.sh new file mode 100644 index 000000000..eb59f2aaf --- /dev/null +++ b/backend/python/faster-qwen3-tts/test.sh @@ -0,0 +1,11 @@ +#!/bin/bash +set -e + +backend_dir=$(dirname $0) +if [ -d $backend_dir/common ]; then + source $backend_dir/common/libbackend.sh +else + source $backend_dir/../common/libbackend.sh +fi + +runUnittests