From 7a0e6ae6d225deec1ccb6c5b1763747cb3694ec4 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 11 Apr 2026 23:14:26 +0200 Subject: [PATCH] feat(qwen3tts.cpp): add new backend (#9316) Signed-off-by: Ettore Di Giacinto --- .github/workflows/backend.yml | 122 ++++++++++++ .github/workflows/bump_deps.yaml | 4 + .github/workflows/test-extra.yml | 33 ++++ Makefile | 6 +- README.md | 4 +- backend/go/qwen3-tts-cpp/CMakeLists.txt | 56 ++++++ backend/go/qwen3-tts-cpp/Makefile | 126 +++++++++++++ .../go/qwen3-tts-cpp/cpp/goqwen3ttscpp.cpp | 161 ++++++++++++++++ backend/go/qwen3-tts-cpp/cpp/goqwen3ttscpp.h | 12 ++ backend/go/qwen3-tts-cpp/goqwen3ttscpp.go | 74 ++++++++ backend/go/qwen3-tts-cpp/main.go | 47 +++++ backend/go/qwen3-tts-cpp/package.sh | 64 +++++++ backend/go/qwen3-tts-cpp/qwen3ttscpp_test.go | 173 ++++++++++++++++++ backend/go/qwen3-tts-cpp/run.sh | 52 ++++++ backend/go/qwen3-tts-cpp/test.sh | 52 ++++++ backend/index.yaml | 125 +++++++++++++ gallery/index.yaml | 61 ++++++ 17 files changed, 1168 insertions(+), 4 deletions(-) create mode 100644 backend/go/qwen3-tts-cpp/CMakeLists.txt create mode 100644 backend/go/qwen3-tts-cpp/Makefile create mode 100644 backend/go/qwen3-tts-cpp/cpp/goqwen3ttscpp.cpp create mode 100644 backend/go/qwen3-tts-cpp/cpp/goqwen3ttscpp.h create mode 100644 backend/go/qwen3-tts-cpp/goqwen3ttscpp.go create mode 100644 backend/go/qwen3-tts-cpp/main.go create mode 100755 backend/go/qwen3-tts-cpp/package.sh create mode 100644 backend/go/qwen3-tts-cpp/qwen3ttscpp_test.go create mode 100755 backend/go/qwen3-tts-cpp/run.sh create mode 100755 backend/go/qwen3-tts-cpp/test.sh diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml index 410cb97ad..978b49121 100644 --- a/.github/workflows/backend.yml +++ b/.github/workflows/backend.yml @@ -613,6 +613,19 @@ jobs: dockerfile: "./backend/Dockerfile.golang" context: "./" ubuntu-version: '2404' + - build-type: 'cublas' + cuda-major-version: "12" + cuda-minor-version: "8" + platforms: 'linux/amd64' + tag-latest: 'auto' + tag-suffix: '-gpu-nvidia-cuda-12-qwen3-tts-cpp' + runs-on: 'ubuntu-latest' + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "qwen3-tts-cpp" + dockerfile: "./backend/Dockerfile.golang" + context: "./" + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "8" @@ -1225,6 +1238,19 @@ jobs: dockerfile: "./backend/Dockerfile.golang" context: "./" ubuntu-version: '2404' + - build-type: 'cublas' + cuda-major-version: "13" + cuda-minor-version: "0" + platforms: 'linux/amd64' + tag-latest: 'auto' + tag-suffix: '-gpu-nvidia-cuda-13-qwen3-tts-cpp' + runs-on: 'ubuntu-latest' + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "qwen3-tts-cpp" + dockerfile: "./backend/Dockerfile.golang" + context: "./" + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "13" cuda-minor-version: "0" @@ -1238,6 +1264,19 @@ jobs: backend: "acestep-cpp" dockerfile: "./backend/Dockerfile.golang" context: "./" + - build-type: 'cublas' + cuda-major-version: "13" + cuda-minor-version: "0" + platforms: 'linux/arm64' + skip-drivers: 'false' + tag-latest: 'auto' + tag-suffix: '-nvidia-l4t-cuda-13-arm64-qwen3-tts-cpp' + base-image: "ubuntu:24.04" + ubuntu-version: '2404' + runs-on: 'ubuntu-24.04-arm' + backend: "qwen3-tts-cpp" + dockerfile: "./backend/Dockerfile.golang" + context: "./" - build-type: 'cublas' cuda-major-version: "13" cuda-minor-version: "0" @@ -2222,6 +2261,85 @@ jobs: dockerfile: "./backend/Dockerfile.golang" context: "./" ubuntu-version: '2404' + # qwen3-tts-cpp + - build-type: '' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64,linux/arm64' + tag-latest: 'auto' + tag-suffix: '-cpu-qwen3-tts-cpp' + runs-on: 'ubuntu-latest' + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "qwen3-tts-cpp" + dockerfile: "./backend/Dockerfile.golang" + context: "./" + ubuntu-version: '2404' + - build-type: 'sycl_f32' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'auto' + tag-suffix: '-gpu-intel-sycl-f32-qwen3-tts-cpp' + runs-on: 'ubuntu-latest' + base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04" + skip-drivers: 'false' + backend: "qwen3-tts-cpp" + dockerfile: "./backend/Dockerfile.golang" + context: "./" + ubuntu-version: '2404' + - build-type: 'sycl_f16' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'auto' + tag-suffix: '-gpu-intel-sycl-f16-qwen3-tts-cpp' + runs-on: 'ubuntu-latest' + base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04" + skip-drivers: 'false' + backend: "qwen3-tts-cpp" + dockerfile: "./backend/Dockerfile.golang" + context: "./" + ubuntu-version: '2404' + - build-type: 'vulkan' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64,linux/arm64' + tag-latest: 'auto' + tag-suffix: '-gpu-vulkan-qwen3-tts-cpp' + runs-on: 'ubuntu-latest' + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "qwen3-tts-cpp" + dockerfile: "./backend/Dockerfile.golang" + context: "./" + ubuntu-version: '2404' + - build-type: 'cublas' + cuda-major-version: "12" + cuda-minor-version: "0" + platforms: 'linux/arm64' + skip-drivers: 'false' + tag-latest: 'auto' + tag-suffix: '-nvidia-l4t-arm64-qwen3-tts-cpp' + base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" + runs-on: 'ubuntu-24.04-arm' + backend: "qwen3-tts-cpp" + dockerfile: "./backend/Dockerfile.golang" + context: "./" + ubuntu-version: '2204' + - build-type: 'hipblas' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'auto' + tag-suffix: '-gpu-rocm-hipblas-qwen3-tts-cpp' + base-image: "rocm/dev-ubuntu-24.04:6.4.4" + runs-on: 'ubuntu-latest' + skip-drivers: 'false' + backend: "qwen3-tts-cpp" + dockerfile: "./backend/Dockerfile.golang" + context: "./" + ubuntu-version: '2404' # voxtral - build-type: '' cuda-major-version: "" @@ -2527,6 +2645,10 @@ jobs: tag-suffix: "-metal-darwin-arm64-acestep-cpp" build-type: "metal" lang: "go" + - backend: "qwen3-tts-cpp" + tag-suffix: "-metal-darwin-arm64-qwen3-tts-cpp" + build-type: "metal" + lang: "go" - backend: "voxtral" tag-suffix: "-metal-darwin-arm64-voxtral" build-type: "metal" diff --git a/.github/workflows/bump_deps.yaml b/.github/workflows/bump_deps.yaml index 50bd56c69..211d3e4ab 100644 --- a/.github/workflows/bump_deps.yaml +++ b/.github/workflows/bump_deps.yaml @@ -38,6 +38,10 @@ jobs: variable: "SAM3_VERSION" branch: "main" file: "backend/go/sam3-cpp/Makefile" + - repository: "predict-woo/qwen3-tts.cpp" + variable: "QWEN3TTS_CPP_VERSION" + branch: "main" + file: "backend/go/qwen3-tts-cpp/Makefile" runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 diff --git a/.github/workflows/test-extra.yml b/.github/workflows/test-extra.yml index a7396cf7a..0992dfdd9 100644 --- a/.github/workflows/test-extra.yml +++ b/.github/workflows/test-extra.yml @@ -30,6 +30,7 @@ jobs: voxcpm: ${{ steps.detect.outputs.voxcpm }} llama-cpp-quantization: ${{ steps.detect.outputs.llama-cpp-quantization }} acestep-cpp: ${{ steps.detect.outputs.acestep-cpp }} + qwen3-tts-cpp: ${{ steps.detect.outputs.qwen3-tts-cpp }} voxtral: ${{ steps.detect.outputs.voxtral }} kokoros: ${{ steps.detect.outputs.kokoros }} steps: @@ -496,6 +497,38 @@ jobs: - name: Test acestep-cpp run: | make --jobs=5 --output-sync=target -C backend/go/acestep-cpp test + tests-qwen3-tts-cpp: + needs: detect-changes + if: needs.detect-changes.outputs.qwen3-tts-cpp == 'true' || needs.detect-changes.outputs.run-all == 'true' + runs-on: ubuntu-latest + steps: + - name: Clone + uses: actions/checkout@v6 + with: + submodules: true + - name: Dependencies + run: | + sudo apt-get update + sudo apt-get install -y build-essential cmake curl libopenblas-dev ffmpeg + - name: Setup Go + uses: actions/setup-go@v5 + - name: Display Go version + run: go version + - name: Proto Dependencies + run: | + # Install protoc + curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v26.1/protoc-26.1-linux-x86_64.zip -o protoc.zip && \ + unzip -j -d /usr/local/bin protoc.zip bin/protoc && \ + rm protoc.zip + go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 + go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af + PATH="$PATH:$HOME/go/bin" make protogen-go + - name: Build qwen3-tts-cpp + run: | + make --jobs=5 --output-sync=target -C backend/go/qwen3-tts-cpp + - name: Test qwen3-tts-cpp + run: | + make --jobs=5 --output-sync=target -C backend/go/qwen3-tts-cpp test tests-voxtral: needs: detect-changes if: needs.detect-changes.outputs.voxtral == 'true' || needs.detect-changes.outputs.run-all == 'true' diff --git a/Makefile b/Makefile index 816d256fb..53457b531 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ # Disable parallel execution for backend builds -.NOTPARALLEL: backends/diffusers backends/llama-cpp backends/outetts backends/piper backends/stablediffusion-ggml backends/whisper backends/faster-whisper backends/silero-vad backends/local-store backends/huggingface backends/rfdetr backends/kitten-tts backends/kokoro backends/chatterbox backends/llama-cpp-darwin backends/neutts build-darwin-python-backend build-darwin-go-backend backends/mlx backends/diffuser-darwin backends/mlx-vlm backends/mlx-audio backends/mlx-distributed backends/stablediffusion-ggml-darwin backends/vllm backends/vllm-omni backends/moonshine backends/pocket-tts backends/qwen-tts backends/faster-qwen3-tts backends/qwen-asr backends/nemo backends/voxcpm backends/whisperx backends/ace-step backends/acestep-cpp backends/fish-speech backends/voxtral backends/opus backends/trl backends/llama-cpp-quantization backends/kokoros backends/sam3-cpp +.NOTPARALLEL: backends/diffusers backends/llama-cpp backends/outetts backends/piper backends/stablediffusion-ggml backends/whisper backends/faster-whisper backends/silero-vad backends/local-store backends/huggingface backends/rfdetr backends/kitten-tts backends/kokoro backends/chatterbox backends/llama-cpp-darwin backends/neutts build-darwin-python-backend build-darwin-go-backend backends/mlx backends/diffuser-darwin backends/mlx-vlm backends/mlx-audio backends/mlx-distributed backends/stablediffusion-ggml-darwin backends/vllm backends/vllm-omni backends/moonshine backends/pocket-tts backends/qwen-tts backends/faster-qwen3-tts backends/qwen-asr backends/nemo backends/voxcpm backends/whisperx backends/ace-step backends/acestep-cpp backends/fish-speech backends/voxtral backends/opus backends/trl backends/llama-cpp-quantization backends/kokoros backends/sam3-cpp backends/qwen3-tts-cpp GOCMD=go GOTEST=$(GOCMD) test @@ -559,6 +559,7 @@ BACKEND_STABLEDIFFUSION_GGML = stablediffusion-ggml|golang|.|--progress=plain|tr BACKEND_WHISPER = whisper|golang|.|false|true BACKEND_VOXTRAL = voxtral|golang|.|false|true BACKEND_ACESTEP_CPP = acestep-cpp|golang|.|false|true +BACKEND_QWEN3_TTS_CPP = qwen3-tts-cpp|golang|.|false|true BACKEND_OPUS = opus|golang|.|false|true # Python backends with root context @@ -651,6 +652,7 @@ $(eval $(call generate-docker-build-target,$(BACKEND_VOXCPM))) $(eval $(call generate-docker-build-target,$(BACKEND_WHISPERX))) $(eval $(call generate-docker-build-target,$(BACKEND_ACE_STEP))) $(eval $(call generate-docker-build-target,$(BACKEND_ACESTEP_CPP))) +$(eval $(call generate-docker-build-target,$(BACKEND_QWEN3_TTS_CPP))) $(eval $(call generate-docker-build-target,$(BACKEND_MLX_DISTRIBUTED))) $(eval $(call generate-docker-build-target,$(BACKEND_TRL))) $(eval $(call generate-docker-build-target,$(BACKEND_LLAMA_CPP_QUANTIZATION))) @@ -661,7 +663,7 @@ $(eval $(call generate-docker-build-target,$(BACKEND_SAM3_CPP))) docker-save-%: backend-images docker save local-ai-backend:$* -o backend-images/$*.tar -docker-build-backends: docker-build-llama-cpp docker-build-rerankers docker-build-vllm docker-build-vllm-omni docker-build-transformers docker-build-outetts docker-build-diffusers docker-build-kokoro docker-build-faster-whisper docker-build-coqui docker-build-chatterbox docker-build-vibevoice docker-build-moonshine docker-build-pocket-tts docker-build-qwen-tts docker-build-fish-speech docker-build-faster-qwen3-tts docker-build-qwen-asr docker-build-nemo docker-build-voxcpm docker-build-whisperx docker-build-ace-step docker-build-acestep-cpp docker-build-voxtral docker-build-mlx-distributed docker-build-trl docker-build-llama-cpp-quantization docker-build-kokoros docker-build-sam3-cpp +docker-build-backends: docker-build-llama-cpp docker-build-rerankers docker-build-vllm docker-build-vllm-omni docker-build-transformers docker-build-outetts docker-build-diffusers docker-build-kokoro docker-build-faster-whisper docker-build-coqui docker-build-chatterbox docker-build-vibevoice docker-build-moonshine docker-build-pocket-tts docker-build-qwen-tts docker-build-fish-speech docker-build-faster-qwen3-tts docker-build-qwen-asr docker-build-nemo docker-build-voxcpm docker-build-whisperx docker-build-ace-step docker-build-acestep-cpp docker-build-voxtral docker-build-mlx-distributed docker-build-trl docker-build-llama-cpp-quantization docker-build-kokoros docker-build-sam3-cpp docker-build-qwen3-tts-cpp ######################################################## ### Mock Backend for E2E Tests diff --git a/README.md b/README.md index 0ef44d287..ed27c75f8 100644 --- a/README.md +++ b/README.md @@ -32,7 +32,7 @@ **LocalAI** is the open-source AI engine. Run any model - LLMs, vision, voice, image, video - on any hardware. No GPU required. - **Drop-in API compatibility** — OpenAI, Anthropic, ElevenLabs APIs -- **35+ backends** — llama.cpp, vLLM, transformers, whisper, diffusers, MLX... +- **36+ backends** — llama.cpp, vLLM, transformers, whisper, diffusers, MLX... - **Any hardware** — NVIDIA, AMD, Intel, Apple Silicon, Vulkan, or CPU-only - **Multi-user ready** — API key auth, user quotas, role-based access - **Built-in AI agents** — autonomous agents with tool use, RAG, MCP, and skills @@ -185,7 +185,7 @@ For older news and full release notes, see [GitHub Releases](https://github.com/ ## Supported Backends & Acceleration -LocalAI supports **35+ backends** including llama.cpp, vLLM, transformers, whisper.cpp, diffusers, MLX, MLX-VLM, and many more. Hardware acceleration is available for **NVIDIA** (CUDA 12/13), **AMD** (ROCm), **Intel** (oneAPI/SYCL), **Apple Silicon** (Metal), **Vulkan**, and **NVIDIA Jetson** (L4T). All backends can be installed on-the-fly from the [Backend Gallery](https://localai.io/backends/). +LocalAI supports **36+ backends** including llama.cpp, vLLM, transformers, whisper.cpp, diffusers, MLX, MLX-VLM, and many more. Hardware acceleration is available for **NVIDIA** (CUDA 12/13), **AMD** (ROCm), **Intel** (oneAPI/SYCL), **Apple Silicon** (Metal), **Vulkan**, and **NVIDIA Jetson** (L4T). All backends can be installed on-the-fly from the [Backend Gallery](https://localai.io/backends/). See the full [Backend & Model Compatibility Table](https://localai.io/model-compatibility/) and [GPU Acceleration guide](https://localai.io/features/gpu-acceleration/). diff --git a/backend/go/qwen3-tts-cpp/CMakeLists.txt b/backend/go/qwen3-tts-cpp/CMakeLists.txt new file mode 100644 index 000000000..abf10621b --- /dev/null +++ b/backend/go/qwen3-tts-cpp/CMakeLists.txt @@ -0,0 +1,56 @@ +cmake_minimum_required(VERSION 3.14) +project(goqwen3ttscpp LANGUAGES C CXX) +set(CMAKE_POSITION_INDEPENDENT_CODE ON) +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) + +set(QWEN3TTS_DIR ${CMAKE_CURRENT_SOURCE_DIR}/sources/qwen3-tts.cpp) + +# Override upstream's CMAKE_CUDA_ARCHITECTURES before add_subdirectory. +if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES) + set(CMAKE_CUDA_ARCHITECTURES "75-virtual;80-virtual;86-real;89-real") +endif() + +# Build ggml from the upstream's submodule FIRST, so that ggml/ggml-base/ggml-cpu +# CMake targets exist when the upstream project references them by name. +# The upstream CMakeLists.txt uses target_link_libraries(... ggml ggml-base ggml-cpu) +# with target_link_directories pointing at a pre-built ggml/build/. By adding ggml +# as a subdirectory here, CMake resolves those names as targets instead. +add_subdirectory(${QWEN3TTS_DIR}/ggml ggml EXCLUDE_FROM_ALL) + +# Now add the upstream project +add_subdirectory(${QWEN3TTS_DIR} qwen3tts EXCLUDE_FROM_ALL) + +add_library(goqwen3ttscpp MODULE cpp/goqwen3ttscpp.cpp) +target_link_libraries(goqwen3ttscpp PRIVATE qwen3_tts) + +target_include_directories(goqwen3ttscpp PRIVATE ${QWEN3TTS_DIR}/src) +target_include_directories(goqwen3ttscpp SYSTEM PRIVATE ${QWEN3TTS_DIR}/ggml/include) + +# Link GPU backends if available +foreach(backend blas cuda metal vulkan) + if(TARGET ggml-${backend}) + target_link_libraries(goqwen3ttscpp PRIVATE ggml-${backend}) + string(TOUPPER ${backend} BACKEND_UPPER) + target_compile_definitions(goqwen3ttscpp PRIVATE QWEN3TTS_HAVE_${BACKEND_UPPER}) + if(backend STREQUAL "cuda") + find_package(CUDAToolkit QUIET) + if(CUDAToolkit_FOUND) + target_link_libraries(goqwen3ttscpp PRIVATE CUDA::cudart) + endif() + endif() + endif() +endforeach() + +if(MSVC) + target_compile_options(goqwen3ttscpp PRIVATE /W4 /wd4100 /wd4505) +else() + target_compile_options(goqwen3ttscpp PRIVATE -Wall -Wextra -Wshadow -Wconversion + -Wno-unused-parameter -Wno-unused-function -Wno-sign-conversion) +endif() + +if(CMAKE_CXX_COMPILER_ID MATCHES "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 9.0) + target_link_libraries(goqwen3ttscpp PRIVATE stdc++fs) +endif() + +set_property(TARGET goqwen3ttscpp PROPERTY CXX_STANDARD 17) +set_target_properties(goqwen3ttscpp PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) diff --git a/backend/go/qwen3-tts-cpp/Makefile b/backend/go/qwen3-tts-cpp/Makefile new file mode 100644 index 000000000..fe83c3fe2 --- /dev/null +++ b/backend/go/qwen3-tts-cpp/Makefile @@ -0,0 +1,126 @@ +CMAKE_ARGS?= +BUILD_TYPE?= +NATIVE?=false + +GOCMD?=go +GO_TAGS?= +JOBS?=$(shell nproc --ignore=1) + +# qwen3-tts.cpp version +QWEN3TTS_REPO?=https://github.com/predict-woo/qwen3-tts.cpp +QWEN3TTS_CPP_VERSION?=7a762e2ad4bacc6fdda81d81bf10a09ffb546f29 +SO_TARGET?=libgoqwen3ttscpp.so + +CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF + +ifeq ($(NATIVE),false) + CMAKE_ARGS+=-DGGML_NATIVE=OFF +endif + +ifeq ($(BUILD_TYPE),cublas) + CMAKE_ARGS+=-DGGML_CUDA=ON +else ifeq ($(BUILD_TYPE),openblas) + CMAKE_ARGS+=-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS +else ifeq ($(BUILD_TYPE),clblas) + CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path +else ifeq ($(BUILD_TYPE),hipblas) + CMAKE_ARGS+=-DGGML_HIPBLAS=ON +else ifeq ($(BUILD_TYPE),vulkan) + CMAKE_ARGS+=-DGGML_VULKAN=ON +else ifeq ($(OS),Darwin) + ifneq ($(BUILD_TYPE),metal) + CMAKE_ARGS+=-DGGML_METAL=OFF + else + CMAKE_ARGS+=-DGGML_METAL=ON + CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON + endif +endif + +ifeq ($(BUILD_TYPE),sycl_f16) + CMAKE_ARGS+=-DGGML_SYCL=ON \ + -DCMAKE_C_COMPILER=icx \ + -DCMAKE_CXX_COMPILER=icpx \ + -DGGML_SYCL_F16=ON +endif + +ifeq ($(BUILD_TYPE),sycl_f32) + CMAKE_ARGS+=-DGGML_SYCL=ON \ + -DCMAKE_C_COMPILER=icx \ + -DCMAKE_CXX_COMPILER=icpx +endif + +sources/qwen3-tts.cpp: + mkdir -p sources/qwen3-tts.cpp + cd sources/qwen3-tts.cpp && \ + git init && \ + git remote add origin $(QWEN3TTS_REPO) && \ + git fetch origin && \ + git checkout $(QWEN3TTS_CPP_VERSION) && \ + git submodule update --init --recursive --depth 1 --single-branch + +# Detect OS +UNAME_S := $(shell uname -s) + +# Only build CPU variants on Linux +ifeq ($(UNAME_S),Linux) + VARIANT_TARGETS = libgoqwen3ttscpp-avx.so libgoqwen3ttscpp-avx2.so libgoqwen3ttscpp-avx512.so libgoqwen3ttscpp-fallback.so +else + # On non-Linux (e.g., Darwin), build only fallback variant + VARIANT_TARGETS = libgoqwen3ttscpp-fallback.so +endif + +qwen3-tts-cpp: main.go goqwen3ttscpp.go $(VARIANT_TARGETS) + CGO_ENABLED=0 $(GOCMD) build -tags "$(GO_TAGS)" -o qwen3-tts-cpp ./ + +package: qwen3-tts-cpp + bash package.sh + +build: package + +clean: purge + rm -rf libgoqwen3ttscpp*.so package sources/qwen3-tts.cpp qwen3-tts-cpp + +purge: + rm -rf build* + +# Variants must build sequentially +.NOTPARALLEL: + +# Build all variants (Linux only) +ifeq ($(UNAME_S),Linux) +libgoqwen3ttscpp-avx.so: sources/qwen3-tts.cpp + $(info ${GREEN}I qwen3-tts-cpp build info:avx${RESET}) + SO_TARGET=libgoqwen3ttscpp-avx.so CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgoqwen3ttscpp-custom + rm -rf build-libgoqwen3ttscpp-avx.so + +libgoqwen3ttscpp-avx2.so: sources/qwen3-tts.cpp + $(info ${GREEN}I qwen3-tts-cpp build info:avx2${RESET}) + SO_TARGET=libgoqwen3ttscpp-avx2.so CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on -DGGML_BMI2=on" $(MAKE) libgoqwen3ttscpp-custom + rm -rf build-libgoqwen3ttscpp-avx2.so + +libgoqwen3ttscpp-avx512.so: sources/qwen3-tts.cpp + $(info ${GREEN}I qwen3-tts-cpp build info:avx512${RESET}) + SO_TARGET=libgoqwen3ttscpp-avx512.so CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=on -DGGML_FMA=on -DGGML_F16C=on -DGGML_BMI2=on" $(MAKE) libgoqwen3ttscpp-custom + rm -rf build-libgoqwen3ttscpp-avx512.so +endif + +# Build fallback variant (all platforms) +libgoqwen3ttscpp-fallback.so: sources/qwen3-tts.cpp + $(info ${GREEN}I qwen3-tts-cpp build info:fallback${RESET}) + SO_TARGET=libgoqwen3ttscpp-fallback.so CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgoqwen3ttscpp-custom + rm -rf build-libgoqwen3ttscpp-fallback.so + +libgoqwen3ttscpp-custom: CMakeLists.txt cpp/goqwen3ttscpp.cpp cpp/goqwen3ttscpp.h + mkdir -p build-$(SO_TARGET) && \ + cd build-$(SO_TARGET) && \ + cmake .. $(CMAKE_ARGS) && \ + cmake --build . --config Release -j$(JOBS) --target goqwen3ttscpp && \ + cd .. && \ + mv build-$(SO_TARGET)/libgoqwen3ttscpp.so ./$(SO_TARGET) + +test: qwen3-tts-cpp + @echo "Running qwen3-tts-cpp tests..." + bash test.sh + @echo "qwen3-tts-cpp tests completed." + +all: qwen3-tts-cpp package diff --git a/backend/go/qwen3-tts-cpp/cpp/goqwen3ttscpp.cpp b/backend/go/qwen3-tts-cpp/cpp/goqwen3ttscpp.cpp new file mode 100644 index 000000000..b2de53620 --- /dev/null +++ b/backend/go/qwen3-tts-cpp/cpp/goqwen3ttscpp.cpp @@ -0,0 +1,161 @@ +#include "goqwen3ttscpp.h" +#include "ggml-backend.h" +#include "qwen3_tts.h" + +#include +#include +#include +#include +#include + +using namespace qwen3_tts; + +// Global engine (loaded once, reused across requests) +static Qwen3TTS *g_engine = nullptr; +static bool g_loaded = false; +static int g_threads = 4; + +static void ggml_log_cb(enum ggml_log_level level, const char *log, void *data) { + const char *level_str; + if (!log) + return; + switch (level) { + case GGML_LOG_LEVEL_DEBUG: + level_str = "DEBUG"; + break; + case GGML_LOG_LEVEL_INFO: + level_str = "INFO"; + break; + case GGML_LOG_LEVEL_WARN: + level_str = "WARN"; + break; + case GGML_LOG_LEVEL_ERROR: + level_str = "ERROR"; + break; + default: + level_str = "?????"; + break; + } + fprintf(stderr, "[%-5s] ", level_str); + fputs(log, stderr); + fflush(stderr); +} + +// Map language string to language_id token used by the model +static int language_to_id(const char *lang) { + if (!lang || lang[0] == '\0') + return 2050; // default: English + std::string l(lang); + if (l == "en") + return 2050; + if (l == "ru") + return 2069; + if (l == "zh") + return 2055; + if (l == "ja") + return 2058; + if (l == "ko") + return 2064; + if (l == "de") + return 2053; + if (l == "fr") + return 2061; + if (l == "es") + return 2054; + if (l == "it") + return 2056; + if (l == "pt") + return 2057; + fprintf(stderr, "[qwen3-tts-cpp] Unknown language '%s', defaulting to English\n", + lang); + return 2050; +} + +int load_model(const char *model_dir, int n_threads) { + ggml_log_set(ggml_log_cb, nullptr); + ggml_backend_load_all(); + + if (n_threads <= 0) + n_threads = 4; + g_threads = n_threads; + + fprintf(stderr, "[qwen3-tts-cpp] Loading models from %s (threads=%d)\n", + model_dir, n_threads); + + g_engine = new Qwen3TTS(); + if (!g_engine->load_models(model_dir)) { + fprintf(stderr, "[qwen3-tts-cpp] FATAL: failed to load models from %s\n", + model_dir); + delete g_engine; + g_engine = nullptr; + return 1; + } + + g_loaded = true; + fprintf(stderr, "[qwen3-tts-cpp] Models loaded successfully\n"); + return 0; +} + +int synthesize(const char *text, const char *ref_audio_path, const char *dst, + const char *language, float temperature, float top_p, + int top_k, float repetition_penalty, int max_audio_tokens, + int n_threads) { + if (!g_loaded || !g_engine) { + fprintf(stderr, "[qwen3-tts-cpp] ERROR: models not loaded\n"); + return 1; + } + + if (!text || !dst) { + fprintf(stderr, "[qwen3-tts-cpp] ERROR: text and dst are required\n"); + return 2; + } + + tts_params params; + params.max_audio_tokens = max_audio_tokens > 0 ? max_audio_tokens : 4096; + params.temperature = temperature; + params.top_p = top_p; + params.top_k = top_k; + params.repetition_penalty = repetition_penalty; + params.n_threads = n_threads > 0 ? n_threads : g_threads; + params.language_id = language_to_id(language); + + fprintf(stderr, "[qwen3-tts-cpp] Synthesizing: text='%.50s%s', lang_id=%d, " + "temp=%.2f, threads=%d\n", + text, (strlen(text) > 50 ? "..." : ""), params.language_id, + temperature, params.n_threads); + + tts_result result; + bool has_ref = ref_audio_path && ref_audio_path[0] != '\0'; + + if (has_ref) { + fprintf(stderr, "[qwen3-tts-cpp] Voice cloning with ref: %s\n", + ref_audio_path); + result = g_engine->synthesize_with_voice(text, ref_audio_path, params); + } else { + result = g_engine->synthesize(text, params); + } + + if (!result.success) { + fprintf(stderr, "[qwen3-tts-cpp] ERROR: synthesis failed: %s\n", + result.error_msg.c_str()); + return 3; + } + + int n_samples = (int)result.audio.size(); + if (n_samples == 0) { + fprintf(stderr, "[qwen3-tts-cpp] ERROR: synthesis produced no samples\n"); + return 4; + } + + fprintf(stderr, + "[qwen3-tts-cpp] Synthesis done: %d samples (%.2fs @ 24kHz)\n", + n_samples, (float)n_samples / 24000.0f); + + if (!save_audio_file(dst, result.audio, result.sample_rate)) { + fprintf(stderr, "[qwen3-tts-cpp] ERROR: failed to write %s\n", dst); + return 5; + } + + fprintf(stderr, "[qwen3-tts-cpp] Wrote %s\n", dst); + return 0; +} diff --git a/backend/go/qwen3-tts-cpp/cpp/goqwen3ttscpp.h b/backend/go/qwen3-tts-cpp/cpp/goqwen3ttscpp.h new file mode 100644 index 000000000..51b1c216d --- /dev/null +++ b/backend/go/qwen3-tts-cpp/cpp/goqwen3ttscpp.h @@ -0,0 +1,12 @@ +#pragma once + +#include +#include + +extern "C" { +int load_model(const char *model_dir, int n_threads); +int synthesize(const char *text, const char *ref_audio_path, const char *dst, + const char *language, float temperature, float top_p, + int top_k, float repetition_penalty, int max_audio_tokens, + int n_threads); +} diff --git a/backend/go/qwen3-tts-cpp/goqwen3ttscpp.go b/backend/go/qwen3-tts-cpp/goqwen3ttscpp.go new file mode 100644 index 000000000..484e06053 --- /dev/null +++ b/backend/go/qwen3-tts-cpp/goqwen3ttscpp.go @@ -0,0 +1,74 @@ +package main + +import ( + "fmt" + "os" + "path/filepath" + + "github.com/mudler/LocalAI/pkg/grpc/base" + pb "github.com/mudler/LocalAI/pkg/grpc/proto" +) + +var ( + CppLoadModel func(modelDir string, nThreads int) int + CppSynthesize func(text, refAudioPath, dst, language string, + temperature, topP float32, topK int, + repetitionPenalty float32, maxAudioTokens, nThreads int) int +) + +type Qwen3TtsCpp struct { + base.SingleThread + threads int +} + +func (q *Qwen3TtsCpp) Load(opts *pb.ModelOptions) error { + // ModelFile is the model directory path (containing GGUF files) + modelDir := opts.ModelFile + if modelDir == "" { + modelDir = opts.ModelPath + } + + // Resolve relative paths + if !filepath.IsAbs(modelDir) && opts.ModelPath != "" { + modelDir = filepath.Join(opts.ModelPath, modelDir) + } + + threads := int(opts.Threads) + if threads <= 0 { + threads = 4 + } + q.threads = threads + + fmt.Fprintf(os.Stderr, "[qwen3-tts-cpp] Loading models from: %s (threads=%d)\n", modelDir, threads) + + if ret := CppLoadModel(modelDir, threads); ret != 0 { + return fmt.Errorf("failed to load qwen3-tts model (error code: %d)", ret) + } + + return nil +} + +func (q *Qwen3TtsCpp) TTS(req *pb.TTSRequest) error { + text := req.Text + voice := req.Voice // reference audio path for voice cloning (empty = no cloning) + dst := req.Dst + language := "" + if req.Language != nil { + language = *req.Language + } + + // Synthesis parameters with sensible defaults + temperature := float32(0.9) + topP := float32(0.8) + topK := 50 + repetitionPenalty := float32(1.05) + maxAudioTokens := 4096 + + if ret := CppSynthesize(text, voice, dst, language, + temperature, topP, topK, repetitionPenalty, + maxAudioTokens, q.threads); ret != 0 { + return fmt.Errorf("failed to synthesize audio (error code: %d)", ret) + } + + return nil +} diff --git a/backend/go/qwen3-tts-cpp/main.go b/backend/go/qwen3-tts-cpp/main.go new file mode 100644 index 000000000..d10239ccc --- /dev/null +++ b/backend/go/qwen3-tts-cpp/main.go @@ -0,0 +1,47 @@ +package main + +// Note: this is started internally by LocalAI and a server is allocated for each model +import ( + "flag" + "os" + + "github.com/ebitengine/purego" + grpc "github.com/mudler/LocalAI/pkg/grpc" +) + +var ( + addr = flag.String("addr", "localhost:50051", "the address to connect to") +) + +type LibFuncs struct { + FuncPtr any + Name string +} + +func main() { + // Get library name from environment variable, default to fallback + libName := os.Getenv("QWEN3TTS_LIBRARY") + if libName == "" { + libName = "./libgoqwen3ttscpp-fallback.so" + } + + gosd, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL) + if err != nil { + panic(err) + } + + libFuncs := []LibFuncs{ + {&CppLoadModel, "load_model"}, + {&CppSynthesize, "synthesize"}, + } + + for _, lf := range libFuncs { + purego.RegisterLibFunc(lf.FuncPtr, gosd, lf.Name) + } + + flag.Parse() + + if err := grpc.StartServer(*addr, &Qwen3TtsCpp{}); err != nil { + panic(err) + } +} diff --git a/backend/go/qwen3-tts-cpp/package.sh b/backend/go/qwen3-tts-cpp/package.sh new file mode 100755 index 000000000..bb73df968 --- /dev/null +++ b/backend/go/qwen3-tts-cpp/package.sh @@ -0,0 +1,64 @@ +#!/bin/bash + +# Script to copy the appropriate libraries based on architecture +# This script is used in the final stage of the Dockerfile + +set -e + +CURDIR=$(dirname "$(realpath $0)") +REPO_ROOT="${CURDIR}/../../.." + +# Create lib directory +mkdir -p $CURDIR/package/lib + +cp -avf $CURDIR/qwen3-tts-cpp $CURDIR/package/ +cp -fv $CURDIR/libgoqwen3ttscpp-*.so $CURDIR/package/ +cp -fv $CURDIR/run.sh $CURDIR/package/ + +# Detect architecture and copy appropriate libraries +if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then + # x86_64 architecture + echo "Detected x86_64 architecture, copying x86_64 libraries..." + cp -arfLv /lib64/ld-linux-x86-64.so.2 $CURDIR/package/lib/ld.so + cp -arfLv /lib/x86_64-linux-gnu/libc.so.6 $CURDIR/package/lib/libc.so.6 + cp -arfLv /lib/x86_64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1 + cp -arfLv /lib/x86_64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6 + cp -arfLv /lib/x86_64-linux-gnu/libm.so.6 $CURDIR/package/lib/libm.so.6 + cp -arfLv /lib/x86_64-linux-gnu/libgomp.so.1 $CURDIR/package/lib/libgomp.so.1 + cp -arfLv /lib/x86_64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1 + cp -arfLv /lib/x86_64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6 + cp -arfLv /lib/x86_64-linux-gnu/libdl.so.2 $CURDIR/package/lib/libdl.so.2 + cp -arfLv /lib/x86_64-linux-gnu/librt.so.1 $CURDIR/package/lib/librt.so.1 + cp -arfLv /lib/x86_64-linux-gnu/libpthread.so.0 $CURDIR/package/lib/libpthread.so.0 +elif [ -f "/lib/ld-linux-aarch64.so.1" ]; then + # ARM64 architecture + echo "Detected ARM64 architecture, copying ARM64 libraries..." + cp -arfLv /lib/ld-linux-aarch64.so.1 $CURDIR/package/lib/ld.so + cp -arfLv /lib/aarch64-linux-gnu/libc.so.6 $CURDIR/package/lib/libc.so.6 + cp -arfLv /lib/aarch64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1 + cp -arfLv /lib/aarch64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6 + cp -arfLv /lib/aarch64-linux-gnu/libm.so.6 $CURDIR/package/lib/libm.so.6 + cp -arfLv /lib/aarch64-linux-gnu/libgomp.so.1 $CURDIR/package/lib/libgomp.so.1 + cp -arfLv /lib/aarch64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1 + cp -arfLv /lib/aarch64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6 + cp -arfLv /lib/aarch64-linux-gnu/libdl.so.2 $CURDIR/package/lib/libdl.so.2 + cp -arfLv /lib/aarch64-linux-gnu/librt.so.1 $CURDIR/package/lib/librt.so.1 + cp -arfLv /lib/aarch64-linux-gnu/libpthread.so.0 $CURDIR/package/lib/libpthread.so.0 +elif [ $(uname -s) = "Darwin" ]; then + echo "Detected Darwin" +else + echo "Error: Could not detect architecture" + exit 1 +fi + +# Package GPU libraries based on BUILD_TYPE +GPU_LIB_SCRIPT="${REPO_ROOT}/scripts/build/package-gpu-libs.sh" +if [ -f "$GPU_LIB_SCRIPT" ]; then + echo "Packaging GPU libraries for BUILD_TYPE=${BUILD_TYPE:-cpu}..." + source "$GPU_LIB_SCRIPT" "$CURDIR/package/lib" + package_gpu_libs +fi + +echo "Packaging completed successfully" +ls -liah $CURDIR/package/ +ls -liah $CURDIR/package/lib/ diff --git a/backend/go/qwen3-tts-cpp/qwen3ttscpp_test.go b/backend/go/qwen3-tts-cpp/qwen3ttscpp_test.go new file mode 100644 index 000000000..8e6b38610 --- /dev/null +++ b/backend/go/qwen3-tts-cpp/qwen3ttscpp_test.go @@ -0,0 +1,173 @@ +package main + +import ( + "context" + "os" + "os/exec" + "path/filepath" + "testing" + "time" + + pb "github.com/mudler/LocalAI/pkg/grpc/proto" + "google.golang.org/grpc" + "google.golang.org/grpc/credentials/insecure" +) + +const ( + testAddr = "localhost:50051" + startupWait = 5 * time.Second +) + +func skipIfNoModel(t *testing.T) string { + t.Helper() + modelDir := os.Getenv("QWEN3TTS_MODEL_DIR") + if modelDir == "" { + t.Skip("QWEN3TTS_MODEL_DIR not set, skipping test (set to directory with GGUF models)") + } + if _, err := os.Stat(filepath.Join(modelDir, "qwen3-tts-0.6b-f16.gguf")); os.IsNotExist(err) { + t.Skipf("TTS model file not found in %s, skipping", modelDir) + } + if _, err := os.Stat(filepath.Join(modelDir, "qwen3-tts-tokenizer-f16.gguf")); os.IsNotExist(err) { + t.Skipf("Tokenizer model file not found in %s, skipping", modelDir) + } + return modelDir +} + +func startServer(t *testing.T) *exec.Cmd { + t.Helper() + binary := os.Getenv("QWEN3TTS_BINARY") + if binary == "" { + binary = "./qwen3-tts-cpp" + } + if _, err := os.Stat(binary); os.IsNotExist(err) { + t.Skipf("Backend binary not found at %s, skipping", binary) + } + cmd := exec.Command(binary, "--addr", testAddr) + cmd.Stdout = os.Stderr + cmd.Stderr = os.Stderr + if err := cmd.Start(); err != nil { + t.Fatalf("Failed to start server: %v", err) + } + time.Sleep(startupWait) + return cmd +} + +func stopServer(cmd *exec.Cmd) { + if cmd != nil && cmd.Process != nil { + cmd.Process.Kill() + cmd.Wait() + } +} + +func dialGRPC(t *testing.T) *grpc.ClientConn { + t.Helper() + conn, err := grpc.Dial(testAddr, + grpc.WithTransportCredentials(insecure.NewCredentials()), + grpc.WithDefaultCallOptions( + grpc.MaxCallRecvMsgSize(50*1024*1024), + grpc.MaxCallSendMsgSize(50*1024*1024), + ), + ) + if err != nil { + t.Fatalf("Failed to dial gRPC: %v", err) + } + return conn +} + +func TestServerHealth(t *testing.T) { + cmd := startServer(t) + defer stopServer(cmd) + + conn := dialGRPC(t) + defer conn.Close() + + client := pb.NewBackendClient(conn) + resp, err := client.Health(context.Background(), &pb.HealthMessage{}) + if err != nil { + t.Fatalf("Health check failed: %v", err) + } + if string(resp.Message) != "OK" { + t.Fatalf("Expected OK, got %s", string(resp.Message)) + } +} + +func TestLoadModel(t *testing.T) { + modelDir := skipIfNoModel(t) + cmd := startServer(t) + defer stopServer(cmd) + + conn := dialGRPC(t) + defer conn.Close() + + client := pb.NewBackendClient(conn) + + resp, err := client.LoadModel(context.Background(), &pb.ModelOptions{ + ModelFile: modelDir, + Threads: 4, + }) + if err != nil { + t.Fatalf("LoadModel failed: %v", err) + } + if !resp.Success { + t.Fatalf("LoadModel returned failure: %s", resp.Message) + } +} + +func TestTTS(t *testing.T) { + modelDir := skipIfNoModel(t) + + tmpDir, err := os.MkdirTemp("", "qwen3tts-test") + if err != nil { + t.Fatal(err) + } + t.Cleanup(func() { os.RemoveAll(tmpDir) }) + + outputFile := filepath.Join(tmpDir, "output.wav") + + cmd := startServer(t) + defer stopServer(cmd) + + conn := dialGRPC(t) + defer conn.Close() + + client := pb.NewBackendClient(conn) + + // Load models + loadResp, err := client.LoadModel(context.Background(), &pb.ModelOptions{ + ModelFile: modelDir, + Threads: 4, + }) + if err != nil { + t.Fatalf("LoadModel failed: %v", err) + } + if !loadResp.Success { + t.Fatalf("LoadModel returned failure: %s", loadResp.Message) + } + + // Synthesize speech + language := "en" + _, err = client.TTS(context.Background(), &pb.TTSRequest{ + Text: "Hello, this is a test of the Qwen3 text to speech system.", + Dst: outputFile, + Language: &language, + }) + if err != nil { + t.Fatalf("TTS failed: %v", err) + } + + // Verify output file exists and has content + info, err := os.Stat(outputFile) + if os.IsNotExist(err) { + t.Fatal("Output audio file was not created") + } + if err != nil { + t.Fatalf("Failed to stat output file: %v", err) + } + + t.Logf("Output file size: %d bytes", info.Size()) + + // WAV header is 44 bytes minimum; any real audio should be much larger + if info.Size() < 1000 { + t.Errorf("Output file too small (%d bytes), expected real audio data", info.Size()) + } +} diff --git a/backend/go/qwen3-tts-cpp/run.sh b/backend/go/qwen3-tts-cpp/run.sh new file mode 100755 index 000000000..6416779fa --- /dev/null +++ b/backend/go/qwen3-tts-cpp/run.sh @@ -0,0 +1,52 @@ +#!/bin/bash +set -ex + +# Get the absolute current dir where the script is located +CURDIR=$(dirname "$(realpath $0)") + +cd / + +echo "CPU info:" +if [ "$(uname)" != "Darwin" ]; then + grep -e "model\sname" /proc/cpuinfo | head -1 + grep -e "flags" /proc/cpuinfo | head -1 +fi + +LIBRARY="$CURDIR/libgoqwen3ttscpp-fallback.so" + +if [ "$(uname)" != "Darwin" ]; then + if grep -q -e "\savx\s" /proc/cpuinfo ; then + echo "CPU: AVX found OK" + if [ -e $CURDIR/libgoqwen3ttscpp-avx.so ]; then + LIBRARY="$CURDIR/libgoqwen3ttscpp-avx.so" + fi + fi + + if grep -q -e "\savx2\s" /proc/cpuinfo ; then + echo "CPU: AVX2 found OK" + if [ -e $CURDIR/libgoqwen3ttscpp-avx2.so ]; then + LIBRARY="$CURDIR/libgoqwen3ttscpp-avx2.so" + fi + fi + + # Check avx 512 + if grep -q -e "\savx512f\s" /proc/cpuinfo ; then + echo "CPU: AVX512F found OK" + if [ -e $CURDIR/libgoqwen3ttscpp-avx512.so ]; then + LIBRARY="$CURDIR/libgoqwen3ttscpp-avx512.so" + fi + fi +fi + +export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH +export QWEN3TTS_LIBRARY=$LIBRARY + +# If there is a lib/ld.so, use it +if [ -f $CURDIR/lib/ld.so ]; then + echo "Using lib/ld.so" + echo "Using library: $LIBRARY" + exec $CURDIR/lib/ld.so $CURDIR/qwen3-tts-cpp "$@" +fi + +echo "Using library: $LIBRARY" +exec $CURDIR/qwen3-tts-cpp "$@" diff --git a/backend/go/qwen3-tts-cpp/test.sh b/backend/go/qwen3-tts-cpp/test.sh new file mode 100755 index 000000000..aaebfc42a --- /dev/null +++ b/backend/go/qwen3-tts-cpp/test.sh @@ -0,0 +1,52 @@ +#!/bin/bash +set -e + +CURDIR=$(dirname "$(realpath $0)") + +echo "Running qwen3-tts-cpp backend tests..." + +# The test requires: +# - QWEN3TTS_MODEL_DIR: path to directory containing GGUF model files +# - QWEN3TTS_BINARY: path to the qwen3-tts-cpp binary (defaults to ./qwen3-tts-cpp) +# +# Tests that require the model will be skipped if QWEN3TTS_MODEL_DIR is not set +# or the directory does not contain the required model files. + +cd "$CURDIR" + +# Only auto-download models when QWEN3TTS_MODEL_DIR is not explicitly set +if [ -z "$QWEN3TTS_MODEL_DIR" ]; then + export QWEN3TTS_MODEL_DIR="./qwen3-tts-models" + + if [ ! -d "$QWEN3TTS_MODEL_DIR" ]; then + echo "Creating qwen3-tts-models directory for tests..." + mkdir -p "$QWEN3TTS_MODEL_DIR" + REPO_ID="endo5501/qwen3-tts.cpp" + echo "Repository: ${REPO_ID}" + echo "" + + # Files to download (smallest model for testing) + FILES=( + "qwen3-tts-0.6b-f16.gguf" + "qwen3-tts-tokenizer-f16.gguf" + ) + + BASE_URL="https://huggingface.co/${REPO_ID}/resolve/main" + + for file in "${FILES[@]}"; do + dest="${QWEN3TTS_MODEL_DIR}/${file}" + if [ -f "${dest}" ]; then + echo " [skip] ${file} (already exists)" + else + echo " [download] ${file}..." + curl -L -o "${dest}" "${BASE_URL}/${file}" --progress-bar + echo " [done] ${file}" + fi + done + fi +fi + +# Run Go tests +go test -v -timeout 600s . + +echo "All qwen3-tts-cpp tests passed." diff --git a/backend/index.yaml b/backend/index.yaml index 6c482c12f..1546c1af3 100644 --- a/backend/index.yaml +++ b/backend/index.yaml @@ -412,6 +412,30 @@ nvidia-l4t: "nvidia-l4t-arm64-acestep-cpp" nvidia-l4t-cuda-12: "nvidia-l4t-arm64-acestep-cpp" nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-acestep-cpp" +- &qwen3ttscpp + name: "qwen3-tts-cpp" + description: | + Qwen3-TTS C++ backend using GGML. Native C++ text-to-speech with voice cloning support. + Generates 24kHz mono audio from text with optional reference audio for voice cloning via ECAPA-TDNN speaker embeddings. + urls: + - https://github.com/predict-woo/qwen3-tts.cpp + tags: + - text-to-speech + - tts + - voice-cloning + alias: "qwen3-tts-cpp" + capabilities: + default: "cpu-qwen3-tts-cpp" + nvidia: "cuda12-qwen3-tts-cpp" + nvidia-cuda-13: "cuda13-qwen3-tts-cpp" + nvidia-cuda-12: "cuda12-qwen3-tts-cpp" + intel: "intel-sycl-f16-qwen3-tts-cpp" + metal: "metal-qwen3-tts-cpp" + amd: "rocm-qwen3-tts-cpp" + vulkan: "vulkan-qwen3-tts-cpp" + nvidia-l4t: "nvidia-l4t-arm64-qwen3-tts-cpp" + nvidia-l4t-cuda-12: "nvidia-l4t-arm64-qwen3-tts-cpp" + nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-qwen3-tts-cpp" - &faster-whisper icon: https://avatars.githubusercontent.com/u/1520500?s=200&v=4 description: | @@ -2107,6 +2131,107 @@ uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-acestep-cpp" mirrors: - localai/localai-backends:master-gpu-nvidia-cuda-13-acestep-cpp +## qwen3-tts-cpp +- !!merge <<: *qwen3ttscpp + name: "nvidia-l4t-arm64-qwen3-tts-cpp" + uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-arm64-qwen3-tts-cpp" + mirrors: + - localai/localai-backends:latest-nvidia-l4t-arm64-qwen3-tts-cpp +- !!merge <<: *qwen3ttscpp + name: "nvidia-l4t-arm64-qwen3-tts-cpp-development" + uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-arm64-qwen3-tts-cpp" + mirrors: + - localai/localai-backends:master-nvidia-l4t-arm64-qwen3-tts-cpp +- !!merge <<: *qwen3ttscpp + name: "cuda13-nvidia-l4t-arm64-qwen3-tts-cpp" + uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-cuda-13-arm64-qwen3-tts-cpp" + mirrors: + - localai/localai-backends:latest-nvidia-l4t-cuda-13-arm64-qwen3-tts-cpp +- !!merge <<: *qwen3ttscpp + name: "cuda13-nvidia-l4t-arm64-qwen3-tts-cpp-development" + uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-cuda-13-arm64-qwen3-tts-cpp" + mirrors: + - localai/localai-backends:master-nvidia-l4t-cuda-13-arm64-qwen3-tts-cpp +- !!merge <<: *qwen3ttscpp + name: "cpu-qwen3-tts-cpp" + uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-qwen3-tts-cpp" + mirrors: + - localai/localai-backends:latest-cpu-qwen3-tts-cpp +- !!merge <<: *qwen3ttscpp + name: "metal-qwen3-tts-cpp" + uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-qwen3-tts-cpp" + mirrors: + - localai/localai-backends:latest-metal-darwin-arm64-qwen3-tts-cpp +- !!merge <<: *qwen3ttscpp + name: "metal-qwen3-tts-cpp-development" + uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-qwen3-tts-cpp" + mirrors: + - localai/localai-backends:master-metal-darwin-arm64-qwen3-tts-cpp +- !!merge <<: *qwen3ttscpp + name: "cpu-qwen3-tts-cpp-development" + uri: "quay.io/go-skynet/local-ai-backends:master-cpu-qwen3-tts-cpp" + mirrors: + - localai/localai-backends:master-cpu-qwen3-tts-cpp +- !!merge <<: *qwen3ttscpp + name: "cuda12-qwen3-tts-cpp" + uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-qwen3-tts-cpp" + mirrors: + - localai/localai-backends:latest-gpu-nvidia-cuda-12-qwen3-tts-cpp +- !!merge <<: *qwen3ttscpp + name: "rocm-qwen3-tts-cpp" + uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-qwen3-tts-cpp" + mirrors: + - localai/localai-backends:latest-gpu-rocm-hipblas-qwen3-tts-cpp +- !!merge <<: *qwen3ttscpp + name: "intel-sycl-f32-qwen3-tts-cpp" + uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f32-qwen3-tts-cpp" + mirrors: + - localai/localai-backends:latest-gpu-intel-sycl-f32-qwen3-tts-cpp +- !!merge <<: *qwen3ttscpp + name: "intel-sycl-f16-qwen3-tts-cpp" + uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f16-qwen3-tts-cpp" + mirrors: + - localai/localai-backends:latest-gpu-intel-sycl-f16-qwen3-tts-cpp +- !!merge <<: *qwen3ttscpp + name: "vulkan-qwen3-tts-cpp" + uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-vulkan-qwen3-tts-cpp" + mirrors: + - localai/localai-backends:latest-gpu-vulkan-qwen3-tts-cpp +- !!merge <<: *qwen3ttscpp + name: "vulkan-qwen3-tts-cpp-development" + uri: "quay.io/go-skynet/local-ai-backends:master-gpu-vulkan-qwen3-tts-cpp" + mirrors: + - localai/localai-backends:master-gpu-vulkan-qwen3-tts-cpp +- !!merge <<: *qwen3ttscpp + name: "cuda12-qwen3-tts-cpp-development" + uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-qwen3-tts-cpp" + mirrors: + - localai/localai-backends:master-gpu-nvidia-cuda-12-qwen3-tts-cpp +- !!merge <<: *qwen3ttscpp + name: "rocm-qwen3-tts-cpp-development" + uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-qwen3-tts-cpp" + mirrors: + - localai/localai-backends:master-gpu-rocm-hipblas-qwen3-tts-cpp +- !!merge <<: *qwen3ttscpp + name: "intel-sycl-f32-qwen3-tts-cpp-development" + uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f32-qwen3-tts-cpp" + mirrors: + - localai/localai-backends:master-gpu-intel-sycl-f32-qwen3-tts-cpp +- !!merge <<: *qwen3ttscpp + name: "intel-sycl-f16-qwen3-tts-cpp-development" + uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-qwen3-tts-cpp" + mirrors: + - localai/localai-backends:master-gpu-intel-sycl-f16-qwen3-tts-cpp +- !!merge <<: *qwen3ttscpp + name: "cuda13-qwen3-tts-cpp" + uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-13-qwen3-tts-cpp" + mirrors: + - localai/localai-backends:latest-gpu-nvidia-cuda-13-qwen3-tts-cpp +- !!merge <<: *qwen3ttscpp + name: "cuda13-qwen3-tts-cpp-development" + uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-qwen3-tts-cpp" + mirrors: + - localai/localai-backends:master-gpu-nvidia-cuda-13-qwen3-tts-cpp ## kokoro - !!merge <<: *kokoro name: "kokoro-development" diff --git a/gallery/index.yaml b/gallery/index.yaml index 1a29b93cf..bc407b933 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -994,6 +994,67 @@ - filename: acestep-cpp/vae-BF16.gguf uri: huggingface://Serveurperso/ACE-Step-1.5-GGUF/vae-BF16.gguf sha256: 0599862ac5d15cd308e1d2e368373aea6c02e25ebd1737ad4a4562a0901b0ef8 +- name: "qwen3-tts-cpp" + license: apache-2.0 + tags: + - tts + - text-to-speech + - qwen3-tts + - qwen3-tts-cpp + - gguf + url: "github:mudler/LocalAI/gallery/virtual.yaml@master" + urls: + - https://huggingface.co/endo5501/qwen3-tts.cpp + - https://github.com/predict-woo/qwen3-tts.cpp + description: | + Qwen3-TTS 0.6B (C++ / GGML) — native C++ text-to-speech from text input. + Generates 24kHz mono audio. Supports 10 languages (en, zh, ja, ko, de, fr, es, it, pt, ru). + Uses F16 GGUF models (~2 GB total). + overrides: + name: qwen3-tts-cpp + backend: qwen3-tts-cpp + parameters: + model: qwen3-tts-cpp + known_usecases: + - tts + files: + - filename: qwen3-tts-cpp/qwen3-tts-0.6b-f16.gguf + uri: huggingface://endo5501/qwen3-tts.cpp/qwen3-tts-0.6b-f16.gguf + sha256: 0b89770118463af8f2467d824a8de57d96df6a09f927a9769a3f7b7fffa7087d + - filename: qwen3-tts-cpp/qwen3-tts-tokenizer-f16.gguf + uri: huggingface://endo5501/qwen3-tts.cpp/qwen3-tts-tokenizer-f16.gguf + sha256: d1ad9660bd99343f4851d5a4b17e31f65648feb3559f6ea062ae6575e5cd9d90 +- name: "qwen3-tts-cpp-customvoice" + license: apache-2.0 + tags: + - tts + - text-to-speech + - voice-cloning + - qwen3-tts + - qwen3-tts-cpp + - gguf + url: "github:mudler/LocalAI/gallery/virtual.yaml@master" + urls: + - https://huggingface.co/endo5501/qwen3-tts.cpp + - https://github.com/predict-woo/qwen3-tts.cpp + description: | + Qwen3-TTS 0.6B Custom Voice (C++ / GGML) — text-to-speech with voice cloning support. + Generates 24kHz mono audio with optional reference audio for voice cloning via ECAPA-TDNN speaker embeddings. + Supports 10 languages (en, zh, ja, ko, de, fr, es, it, pt, ru). + overrides: + name: qwen3-tts-cpp-customvoice + backend: qwen3-tts-cpp + parameters: + model: qwen3-tts-cpp-customvoice + known_usecases: + - tts + files: + - filename: qwen3-tts-cpp-customvoice/qwen3-tts-0.6b-customvoice-f16.gguf + uri: huggingface://endo5501/qwen3-tts.cpp/qwen3-tts-0.6b-customvoice-f16.gguf + sha256: 40b985b71be0970d41eb042488766db556cf17290aa1cff631cabfa0bd3b0431 + - filename: qwen3-tts-cpp-customvoice/qwen3-tts-tokenizer-f16.gguf + uri: huggingface://endo5501/qwen3-tts.cpp/qwen3-tts-tokenizer-f16.gguf + sha256: d1ad9660bd99343f4851d5a4b17e31f65648feb3559f6ea062ae6575e5cd9d90 - name: "qwen3-coder-next-mxfp4_moe" url: "github:mudler/LocalAI/gallery/virtual.yaml@master" urls: