diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml index 22449a3b3..7b4e7f871 100644 --- a/.github/workflows/backend.yml +++ b/.github/workflows/backend.yml @@ -300,6 +300,19 @@ jobs: dockerfile: "./backend/Dockerfile.python" context: "./" ubuntu-version: '2404' + - build-type: 'cublas' + cuda-major-version: "12" + cuda-minor-version: "8" + platforms: 'linux/amd64' + tag-latest: 'auto' + tag-suffix: '-gpu-nvidia-cuda-12-outetts' + runs-on: 'ubuntu-latest' + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "outetts" + dockerfile: "./backend/Dockerfile.python" + context: "./" + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "8" @@ -1562,6 +1575,19 @@ jobs: dockerfile: "./backend/Dockerfile.python" context: "./" ubuntu-version: '2404' + - build-type: '' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'auto' + tag-suffix: '-cpu-outetts' + runs-on: 'ubuntu-latest' + base-image: "ubuntu:24.04" + skip-drivers: 'true' + backend: "outetts" + dockerfile: "./backend/Dockerfile.python" + context: "./" + ubuntu-version: '2404' backend-jobs-darwin: uses: ./.github/workflows/backend_build_darwin.yml strategy: @@ -1590,6 +1616,64 @@ jobs: tag-suffix: "-metal-darwin-arm64-whisper" build-type: "metal" lang: "go" + - backend: "vibevoice" + tag-suffix: "-metal-darwin-arm64-vibevoice" + build-type: "mps" + - backend: "qwen-asr" + tag-suffix: "-metal-darwin-arm64-qwen-asr" + build-type: "mps" + - backend: "qwen-tts" + tag-suffix: "-metal-darwin-arm64-qwen-tts" + build-type: "mps" + - backend: "voxcpm" + tag-suffix: "-metal-darwin-arm64-voxcpm" + build-type: "mps" + - backend: "pocket-tts" + tag-suffix: "-metal-darwin-arm64-pocket-tts" + build-type: "mps" + - backend: "moonshine" + tag-suffix: "-metal-darwin-arm64-moonshine" + build-type: "mps" + - backend: "whisperx" + tag-suffix: "-metal-darwin-arm64-whisperx" + build-type: "mps" + - backend: "rerankers" + tag-suffix: "-metal-darwin-arm64-rerankers" + build-type: "mps" + - backend: "transformers" + tag-suffix: "-metal-darwin-arm64-transformers" + build-type: "mps" + - backend: "kokoro" + tag-suffix: "-metal-darwin-arm64-kokoro" + build-type: "mps" + - backend: "faster-whisper" + tag-suffix: "-metal-darwin-arm64-faster-whisper" + build-type: "mps" + - backend: "coqui" + tag-suffix: "-metal-darwin-arm64-coqui" + build-type: "mps" + - backend: "rfdetr" + tag-suffix: "-metal-darwin-arm64-rfdetr" + build-type: "mps" + - backend: "kitten-tts" + tag-suffix: "-metal-darwin-arm64-kitten-tts" + build-type: "mps" + - backend: "piper" + tag-suffix: "-metal-darwin-arm64-piper" + build-type: "metal" + lang: "go" + - backend: "silero-vad" + tag-suffix: "-metal-darwin-arm64-silero-vad" + build-type: "metal" + lang: "go" + - backend: "local-store" + tag-suffix: "-metal-darwin-arm64-local-store" + build-type: "metal" + lang: "go" + - backend: "huggingface" + tag-suffix: "-metal-darwin-arm64-huggingface" + build-type: "metal" + lang: "go" with: backend: ${{ matrix.backend }} build-type: ${{ matrix.build-type }} diff --git a/Makefile b/Makefile index 2fc43331b..dba46f84f 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ # Disable parallel execution for backend builds -.NOTPARALLEL: backends/diffusers backends/llama-cpp backends/piper backends/stablediffusion-ggml backends/whisper backends/faster-whisper backends/silero-vad backends/local-store backends/huggingface backends/rfdetr backends/kitten-tts backends/kokoro backends/chatterbox backends/llama-cpp-darwin backends/neutts build-darwin-python-backend build-darwin-go-backend backends/mlx backends/diffuser-darwin backends/mlx-vlm backends/mlx-audio backends/stablediffusion-ggml-darwin backends/vllm backends/vllm-omni backends/moonshine backends/pocket-tts backends/qwen-tts backends/qwen-asr backends/voxcpm backends/whisperx +.NOTPARALLEL: backends/diffusers backends/llama-cpp backends/outetts backends/piper backends/stablediffusion-ggml backends/whisper backends/faster-whisper backends/silero-vad backends/local-store backends/huggingface backends/rfdetr backends/kitten-tts backends/kokoro backends/chatterbox backends/llama-cpp-darwin backends/neutts build-darwin-python-backend build-darwin-go-backend backends/mlx backends/diffuser-darwin backends/mlx-vlm backends/mlx-audio backends/stablediffusion-ggml-darwin backends/vllm backends/vllm-omni backends/moonshine backends/pocket-tts backends/qwen-tts backends/qwen-asr backends/voxcpm backends/whisperx GOCMD=go GOTEST=$(GOCMD) test @@ -308,6 +308,7 @@ protogen-go-clean: prepare-test-extra: protogen-python $(MAKE) -C backend/python/transformers + $(MAKE) -C backend/python/outetts $(MAKE) -C backend/python/diffusers $(MAKE) -C backend/python/chatterbox $(MAKE) -C backend/python/vllm @@ -322,6 +323,7 @@ prepare-test-extra: protogen-python test-extra: prepare-test-extra $(MAKE) -C backend/python/transformers test + $(MAKE) -C backend/python/outetts test $(MAKE) -C backend/python/diffusers test $(MAKE) -C backend/python/chatterbox test $(MAKE) -C backend/python/vllm test @@ -451,6 +453,7 @@ BACKEND_WHISPER = whisper|golang|.|false|true # Python backends with root context BACKEND_RERANKERS = rerankers|python|.|false|true BACKEND_TRANSFORMERS = transformers|python|.|false|true +BACKEND_OUTETTS = outetts|python|.|false|true BACKEND_FASTER_WHISPER = faster-whisper|python|.|false|true BACKEND_COQUI = coqui|python|.|false|true BACKEND_RFDETR = rfdetr|python|.|false|true @@ -499,6 +502,7 @@ $(eval $(call generate-docker-build-target,$(BACKEND_STABLEDIFFUSION_GGML))) $(eval $(call generate-docker-build-target,$(BACKEND_WHISPER))) $(eval $(call generate-docker-build-target,$(BACKEND_RERANKERS))) $(eval $(call generate-docker-build-target,$(BACKEND_TRANSFORMERS))) +$(eval $(call generate-docker-build-target,$(BACKEND_OUTETTS))) $(eval $(call generate-docker-build-target,$(BACKEND_FASTER_WHISPER))) $(eval $(call generate-docker-build-target,$(BACKEND_COQUI))) $(eval $(call generate-docker-build-target,$(BACKEND_RFDETR))) @@ -521,7 +525,7 @@ $(eval $(call generate-docker-build-target,$(BACKEND_WHISPERX))) docker-save-%: backend-images docker save local-ai-backend:$* -o backend-images/$*.tar -docker-build-backends: docker-build-llama-cpp docker-build-rerankers docker-build-vllm docker-build-vllm-omni docker-build-transformers docker-build-diffusers docker-build-kokoro docker-build-faster-whisper docker-build-coqui docker-build-chatterbox docker-build-vibevoice docker-build-moonshine docker-build-pocket-tts docker-build-qwen-tts docker-build-qwen-asr docker-build-voxcpm docker-build-whisperx +docker-build-backends: docker-build-llama-cpp docker-build-rerankers docker-build-vllm docker-build-vllm-omni docker-build-transformers docker-build-outetts docker-build-diffusers docker-build-kokoro docker-build-faster-whisper docker-build-coqui docker-build-chatterbox docker-build-vibevoice docker-build-moonshine docker-build-pocket-tts docker-build-qwen-tts docker-build-qwen-asr docker-build-voxcpm docker-build-whisperx ######################################################## ### Mock Backend for E2E Tests diff --git a/backend/index.yaml b/backend/index.yaml index c0d314577..d56cee31f 100644 --- a/backend/index.yaml +++ b/backend/index.yaml @@ -105,6 +105,7 @@ intel: "intel-rfdetr" #amd: "rocm-rfdetr" nvidia-l4t: "nvidia-l4t-arm64-rfdetr" + metal: "metal-rfdetr" default: "cpu-rfdetr" nvidia-cuda-13: "cuda13-rfdetr" nvidia-cuda-12: "cuda12-rfdetr" @@ -223,6 +224,7 @@ nvidia: "cuda12-rerankers" intel: "intel-rerankers" amd: "rocm-rerankers" + metal: "metal-rerankers" - &transformers name: "transformers" icon: https://avatars.githubusercontent.com/u/25720743?s=200&v=4 @@ -240,6 +242,7 @@ nvidia: "cuda12-transformers" intel: "intel-transformers" amd: "rocm-transformers" + metal: "metal-transformers" nvidia-cuda-13: "cuda13-transformers" nvidia-cuda-12: "cuda12-transformers" - &diffusers @@ -282,6 +285,7 @@ nvidia: "cuda12-faster-whisper" intel: "intel-faster-whisper" amd: "rocm-faster-whisper" + metal: "metal-faster-whisper" nvidia-cuda-13: "cuda13-faster-whisper" nvidia-cuda-12: "cuda12-faster-whisper" - &moonshine @@ -299,6 +303,7 @@ alias: "moonshine" capabilities: nvidia: "cuda12-moonshine" + metal: "metal-moonshine" default: "cpu-moonshine" nvidia-cuda-13: "cuda13-moonshine" nvidia-cuda-12: "cuda12-moonshine" @@ -318,6 +323,7 @@ capabilities: nvidia: "cuda12-whisperx" amd: "rocm-whisperx" + metal: "metal-whisperx" default: "cpu-whisperx" nvidia-cuda-13: "cuda13-whisperx" nvidia-cuda-12: "cuda12-whisperx" @@ -340,6 +346,7 @@ intel: "intel-kokoro" amd: "rocm-kokoro" nvidia-l4t: "nvidia-l4t-kokoro" + metal: "metal-kokoro" nvidia-cuda-13: "cuda13-kokoro" nvidia-cuda-12: "cuda12-kokoro" nvidia-l4t-cuda-12: "nvidia-l4t-arm64-kokoro" @@ -364,9 +371,25 @@ nvidia: "cuda12-coqui" intel: "intel-coqui" amd: "rocm-coqui" + metal: "metal-coqui" nvidia-cuda-13: "cuda13-coqui" nvidia-cuda-12: "cuda12-coqui" icon: https://avatars.githubusercontent.com/u/1338804?s=200&v=4 +- &outetts + urls: + - https://github.com/OuteAI/outetts + description: | + OuteTTS is an open-weight text-to-speech model from OuteAI (OuteAI/OuteTTS-0.3-1B). + Supports custom speaker voices via audio path or default speakers. + tags: + - text-to-speech + - TTS + license: apache-2.0 + name: "outetts" + alias: "outetts" + capabilities: + default: "cpu-outetts" + nvidia-cuda-12: "cuda12-outetts" - &chatterbox urls: - https://github.com/resemble-ai/chatterbox @@ -405,6 +428,7 @@ intel: "intel-vibevoice" amd: "rocm-vibevoice" nvidia-l4t: "nvidia-l4t-vibevoice" + metal: "metal-vibevoice" default: "cpu-vibevoice" nvidia-cuda-13: "cuda13-vibevoice" nvidia-cuda-12: "cuda12-vibevoice" @@ -427,6 +451,7 @@ intel: "intel-qwen-tts" amd: "rocm-qwen-tts" nvidia-l4t: "nvidia-l4t-qwen-tts" + metal: "metal-qwen-tts" default: "cpu-qwen-tts" nvidia-cuda-13: "cuda13-qwen-tts" nvidia-cuda-12: "cuda12-qwen-tts" @@ -449,6 +474,7 @@ intel: "intel-qwen-asr" amd: "rocm-qwen-asr" nvidia-l4t: "nvidia-l4t-qwen-asr" + metal: "metal-qwen-asr" default: "cpu-qwen-asr" nvidia-cuda-13: "cuda13-qwen-asr" nvidia-cuda-12: "cuda12-qwen-asr" @@ -470,6 +496,7 @@ nvidia: "cuda12-voxcpm" intel: "intel-voxcpm" amd: "rocm-voxcpm" + metal: "metal-voxcpm" default: "cpu-voxcpm" nvidia-cuda-13: "cuda13-voxcpm" nvidia-cuda-12: "cuda12-voxcpm" @@ -490,6 +517,7 @@ intel: "intel-pocket-tts" amd: "rocm-pocket-tts" nvidia-l4t: "nvidia-l4t-pocket-tts" + metal: "metal-pocket-tts" default: "cpu-pocket-tts" nvidia-cuda-13: "cuda13-pocket-tts" nvidia-cuda-12: "cuda12-pocket-tts" @@ -655,26 +683,76 @@ uri: "quay.io/go-skynet/local-ai-backends:master-kitten-tts" mirrors: - localai/localai-backends:master-kitten-tts +- !!merge <<: *kitten-tts + name: "metal-kitten-tts" + uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-kitten-tts" + mirrors: + - localai/localai-backends:latest-metal-darwin-arm64-kitten-tts +- !!merge <<: *kitten-tts + name: "metal-kitten-tts-development" + uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-kitten-tts" + mirrors: + - localai/localai-backends:master-metal-darwin-arm64-kitten-tts - !!merge <<: *huggingface name: "huggingface-development" uri: "quay.io/go-skynet/local-ai-backends:master-huggingface" mirrors: - localai/localai-backends:master-huggingface +- !!merge <<: *huggingface + name: "metal-huggingface" + uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-huggingface" + mirrors: + - localai/localai-backends:latest-metal-darwin-arm64-huggingface +- !!merge <<: *huggingface + name: "metal-huggingface-development" + uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-huggingface" + mirrors: + - localai/localai-backends:master-metal-darwin-arm64-huggingface - !!merge <<: *local-store name: "local-store-development" uri: "quay.io/go-skynet/local-ai-backends:master-cpu-local-store" mirrors: - localai/localai-backends:master-cpu-local-store +- !!merge <<: *local-store + name: "metal-local-store" + uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-local-store" + mirrors: + - localai/localai-backends:latest-metal-darwin-arm64-local-store +- !!merge <<: *local-store + name: "metal-local-store-development" + uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-local-store" + mirrors: + - localai/localai-backends:master-metal-darwin-arm64-local-store - !!merge <<: *silero-vad name: "silero-vad-development" uri: "quay.io/go-skynet/local-ai-backends:master-cpu-silero-vad" mirrors: - localai/localai-backends:master-cpu-silero-vad +- !!merge <<: *silero-vad + name: "metal-silero-vad" + uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-silero-vad" + mirrors: + - localai/localai-backends:latest-metal-darwin-arm64-silero-vad +- !!merge <<: *silero-vad + name: "metal-silero-vad-development" + uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-silero-vad" + mirrors: + - localai/localai-backends:master-metal-darwin-arm64-silero-vad - !!merge <<: *piper name: "piper-development" uri: "quay.io/go-skynet/local-ai-backends:master-piper" mirrors: - localai/localai-backends:master-piper +- !!merge <<: *piper + name: "metal-piper" + uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-piper" + mirrors: + - localai/localai-backends:latest-metal-darwin-arm64-piper +- !!merge <<: *piper + name: "metal-piper-development" + uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-piper" + mirrors: + - localai/localai-backends:master-metal-darwin-arm64-piper ## llama-cpp - !!merge <<: *llamacpp name: "nvidia-l4t-arm64-llama-cpp" @@ -1048,6 +1126,7 @@ intel: "intel-rfdetr-development" #amd: "rocm-rfdetr-development" nvidia-l4t: "nvidia-l4t-arm64-rfdetr-development" + metal: "metal-rfdetr-development" default: "cpu-rfdetr-development" nvidia-cuda-13: "cuda13-rfdetr-development" - !!merge <<: *rfdetr @@ -1115,6 +1194,16 @@ uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-rfdetr" mirrors: - localai/localai-backends:master-gpu-nvidia-cuda-13-rfdetr +- !!merge <<: *rfdetr + name: "metal-rfdetr" + uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-rfdetr" + mirrors: + - localai/localai-backends:latest-metal-darwin-arm64-rfdetr +- !!merge <<: *rfdetr + name: "metal-rfdetr-development" + uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-rfdetr" + mirrors: + - localai/localai-backends:master-metal-darwin-arm64-rfdetr ## Rerankers - !!merge <<: *rerankers name: "rerankers-development" @@ -1122,6 +1211,7 @@ nvidia: "cuda12-rerankers-development" intel: "intel-rerankers-development" amd: "rocm-rerankers-development" + metal: "metal-rerankers-development" nvidia-cuda-13: "cuda13-rerankers-development" - !!merge <<: *rerankers name: "cuda12-rerankers" @@ -1163,6 +1253,16 @@ uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-rerankers" mirrors: - localai/localai-backends:master-gpu-nvidia-cuda-13-rerankers +- !!merge <<: *rerankers + name: "metal-rerankers" + uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-rerankers" + mirrors: + - localai/localai-backends:latest-metal-darwin-arm64-rerankers +- !!merge <<: *rerankers + name: "metal-rerankers-development" + uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-rerankers" + mirrors: + - localai/localai-backends:master-metal-darwin-arm64-rerankers ## Transformers - !!merge <<: *transformers name: "transformers-development" @@ -1170,6 +1270,7 @@ nvidia: "cuda12-transformers-development" intel: "intel-transformers-development" amd: "rocm-transformers-development" + metal: "metal-transformers-development" nvidia-cuda-13: "cuda13-transformers-development" - !!merge <<: *transformers name: "cuda12-transformers" @@ -1211,6 +1312,16 @@ uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-transformers" mirrors: - localai/localai-backends:master-gpu-nvidia-cuda-13-transformers +- !!merge <<: *transformers + name: "metal-transformers" + uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-transformers" + mirrors: + - localai/localai-backends:latest-metal-darwin-arm64-transformers +- !!merge <<: *transformers + name: "metal-transformers-development" + uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-transformers" + mirrors: + - localai/localai-backends:master-metal-darwin-arm64-transformers ## Diffusers - !!merge <<: *diffusers name: "diffusers-development" @@ -1310,6 +1421,7 @@ intel: "intel-kokoro-development" amd: "rocm-kokoro-development" nvidia-l4t: "nvidia-l4t-kokoro-development" + metal: "metal-kokoro-development" - !!merge <<: *kokoro name: "cuda12-kokoro-development" uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-kokoro" @@ -1360,6 +1472,16 @@ uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-kokoro" mirrors: - localai/localai-backends:master-gpu-nvidia-cuda-13-kokoro +- !!merge <<: *kokoro + name: "metal-kokoro" + uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-kokoro" + mirrors: + - localai/localai-backends:latest-metal-darwin-arm64-kokoro +- !!merge <<: *kokoro + name: "metal-kokoro-development" + uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-kokoro" + mirrors: + - localai/localai-backends:master-metal-darwin-arm64-kokoro ## faster-whisper - !!merge <<: *faster-whisper name: "faster-whisper-development" @@ -1367,6 +1489,7 @@ nvidia: "cuda12-faster-whisper-development" intel: "intel-faster-whisper-development" amd: "rocm-faster-whisper-development" + metal: "metal-faster-whisper-development" nvidia-cuda-13: "cuda13-faster-whisper-development" - !!merge <<: *faster-whisper name: "cuda12-faster-whisper-development" @@ -1398,6 +1521,16 @@ uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-faster-whisper" mirrors: - localai/localai-backends:master-gpu-nvidia-cuda-13-faster-whisper +- !!merge <<: *faster-whisper + name: "metal-faster-whisper" + uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-faster-whisper" + mirrors: + - localai/localai-backends:latest-metal-darwin-arm64-faster-whisper +- !!merge <<: *faster-whisper + name: "metal-faster-whisper-development" + uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-faster-whisper" + mirrors: + - localai/localai-backends:master-metal-darwin-arm64-faster-whisper ## moonshine - !!merge <<: *moonshine name: "moonshine-development" @@ -1436,12 +1569,23 @@ uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-moonshine" mirrors: - localai/localai-backends:master-gpu-nvidia-cuda-13-moonshine +- !!merge <<: *moonshine + name: "metal-moonshine" + uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-moonshine" + mirrors: + - localai/localai-backends:latest-metal-darwin-arm64-moonshine +- !!merge <<: *moonshine + name: "metal-moonshine-development" + uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-moonshine" + mirrors: + - localai/localai-backends:master-metal-darwin-arm64-moonshine ## whisperx - !!merge <<: *whisperx name: "whisperx-development" capabilities: nvidia: "cuda12-whisperx-development" amd: "rocm-whisperx-development" + metal: "metal-whisperx-development" default: "cpu-whisperx-development" nvidia-cuda-13: "cuda13-whisperx-development" nvidia-cuda-12: "cuda12-whisperx-development" @@ -1485,6 +1629,16 @@ uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-whisperx" mirrors: - localai/localai-backends:master-gpu-nvidia-cuda-13-whisperx +- !!merge <<: *whisperx + name: "metal-whisperx" + uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-whisperx" + mirrors: + - localai/localai-backends:latest-metal-darwin-arm64-whisperx +- !!merge <<: *whisperx + name: "metal-whisperx-development" + uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-whisperx" + mirrors: + - localai/localai-backends:master-metal-darwin-arm64-whisperx ## coqui - !!merge <<: *coqui @@ -1493,6 +1647,7 @@ nvidia: "cuda12-coqui-development" intel: "intel-coqui-development" amd: "rocm-coqui-development" + metal: "metal-coqui-development" - !!merge <<: *coqui name: "cuda12-coqui" uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-coqui" @@ -1523,6 +1678,42 @@ uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-coqui" mirrors: - localai/localai-backends:latest-gpu-rocm-hipblas-coqui +- !!merge <<: *coqui + name: "metal-coqui" + uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-coqui" + mirrors: + - localai/localai-backends:latest-metal-darwin-arm64-coqui +- !!merge <<: *coqui + name: "metal-coqui-development" + uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-coqui" + mirrors: + - localai/localai-backends:master-metal-darwin-arm64-coqui +## outetts +- !!merge <<: *outetts + name: "outetts-development" + capabilities: + default: "cpu-outetts-development" + nvidia-cuda-12: "cuda12-outetts-development" +- !!merge <<: *outetts + name: "cpu-outetts" + uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-outetts" + mirrors: + - localai/localai-backends:latest-cpu-outetts +- !!merge <<: *outetts + name: "cpu-outetts-development" + uri: "quay.io/go-skynet/local-ai-backends:master-cpu-outetts" + mirrors: + - localai/localai-backends:master-cpu-outetts +- !!merge <<: *outetts + name: "cuda12-outetts" + uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-outetts" + mirrors: + - localai/localai-backends:latest-gpu-nvidia-cuda-12-outetts +- !!merge <<: *outetts + name: "cuda12-outetts-development" + uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-outetts" + mirrors: + - localai/localai-backends:master-gpu-nvidia-cuda-12-outetts ## chatterbox - !!merge <<: *chatterbox name: "chatterbox-development" @@ -1603,6 +1794,7 @@ intel: "intel-vibevoice-development" amd: "rocm-vibevoice-development" nvidia-l4t: "nvidia-l4t-vibevoice-development" + metal: "metal-vibevoice-development" default: "cpu-vibevoice-development" nvidia-cuda-13: "cuda13-vibevoice-development" nvidia-cuda-12: "cuda12-vibevoice-development" @@ -1678,6 +1870,16 @@ uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-cuda-13-arm64-vibevoice" mirrors: - localai/localai-backends:master-nvidia-l4t-cuda-13-arm64-vibevoice +- !!merge <<: *vibevoice + name: "metal-vibevoice" + uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-vibevoice" + mirrors: + - localai/localai-backends:latest-metal-darwin-arm64-vibevoice +- !!merge <<: *vibevoice + name: "metal-vibevoice-development" + uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-vibevoice" + mirrors: + - localai/localai-backends:master-metal-darwin-arm64-vibevoice ## qwen-tts - !!merge <<: *qwen-tts name: "qwen-tts-development" @@ -1686,6 +1888,7 @@ intel: "intel-qwen-tts-development" amd: "rocm-qwen-tts-development" nvidia-l4t: "nvidia-l4t-qwen-tts-development" + metal: "metal-qwen-tts-development" default: "cpu-qwen-tts-development" nvidia-cuda-13: "cuda13-qwen-tts-development" nvidia-cuda-12: "cuda12-qwen-tts-development" @@ -1761,6 +1964,16 @@ uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-cuda-13-arm64-qwen-tts" mirrors: - localai/localai-backends:master-nvidia-l4t-cuda-13-arm64-qwen-tts +- !!merge <<: *qwen-tts + name: "metal-qwen-tts" + uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-qwen-tts" + mirrors: + - localai/localai-backends:latest-metal-darwin-arm64-qwen-tts +- !!merge <<: *qwen-tts + name: "metal-qwen-tts-development" + uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-qwen-tts" + mirrors: + - localai/localai-backends:master-metal-darwin-arm64-qwen-tts ## qwen-asr - !!merge <<: *qwen-asr name: "qwen-asr-development" @@ -1769,6 +1982,7 @@ intel: "intel-qwen-asr-development" amd: "rocm-qwen-asr-development" nvidia-l4t: "nvidia-l4t-qwen-asr-development" + metal: "metal-qwen-asr-development" default: "cpu-qwen-asr-development" nvidia-cuda-13: "cuda13-qwen-asr-development" nvidia-cuda-12: "cuda12-qwen-asr-development" @@ -1844,6 +2058,16 @@ uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-cuda-13-arm64-qwen-asr" mirrors: - localai/localai-backends:master-nvidia-l4t-cuda-13-arm64-qwen-asr +- !!merge <<: *qwen-asr + name: "metal-qwen-asr" + uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-qwen-asr" + mirrors: + - localai/localai-backends:latest-metal-darwin-arm64-qwen-asr +- !!merge <<: *qwen-asr + name: "metal-qwen-asr-development" + uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-qwen-asr" + mirrors: + - localai/localai-backends:master-metal-darwin-arm64-qwen-asr ## voxcpm - !!merge <<: *voxcpm name: "voxcpm-development" @@ -1851,6 +2075,7 @@ nvidia: "cuda12-voxcpm-development" intel: "intel-voxcpm-development" amd: "rocm-voxcpm-development" + metal: "metal-voxcpm-development" default: "cpu-voxcpm-development" nvidia-cuda-13: "cuda13-voxcpm-development" nvidia-cuda-12: "cuda12-voxcpm-development" @@ -1904,6 +2129,16 @@ uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-voxcpm" mirrors: - localai/localai-backends:master-gpu-rocm-hipblas-voxcpm +- !!merge <<: *voxcpm + name: "metal-voxcpm" + uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-voxcpm" + mirrors: + - localai/localai-backends:latest-metal-darwin-arm64-voxcpm +- !!merge <<: *voxcpm + name: "metal-voxcpm-development" + uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-voxcpm" + mirrors: + - localai/localai-backends:master-metal-darwin-arm64-voxcpm ## pocket-tts - !!merge <<: *pocket-tts name: "pocket-tts-development" @@ -1912,6 +2147,7 @@ intel: "intel-pocket-tts-development" amd: "rocm-pocket-tts-development" nvidia-l4t: "nvidia-l4t-pocket-tts-development" + metal: "metal-pocket-tts-development" default: "cpu-pocket-tts-development" nvidia-cuda-13: "cuda13-pocket-tts-development" nvidia-cuda-12: "cuda12-pocket-tts-development" @@ -1987,3 +2223,13 @@ uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-cuda-13-arm64-pocket-tts" mirrors: - localai/localai-backends:master-nvidia-l4t-cuda-13-arm64-pocket-tts +- !!merge <<: *pocket-tts + name: "metal-pocket-tts" + uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-pocket-tts" + mirrors: + - localai/localai-backends:latest-metal-darwin-arm64-pocket-tts +- !!merge <<: *pocket-tts + name: "metal-pocket-tts-development" + uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-pocket-tts" + mirrors: + - localai/localai-backends:master-metal-darwin-arm64-pocket-tts diff --git a/backend/python/chatterbox/requirements-mps.txt b/backend/python/chatterbox/requirements-mps.txt new file mode 100644 index 000000000..620ce159f --- /dev/null +++ b/backend/python/chatterbox/requirements-mps.txt @@ -0,0 +1,7 @@ +torch +torchaudio +accelerate +numpy>=1.24.0,<1.26.0 +transformers +# https://github.com/mudler/LocalAI/pull/6240#issuecomment-3329518289 +chatterbox-tts@git+https://git@github.com/mudler/chatterbox.git@faster \ No newline at end of file diff --git a/backend/python/coqui/requirements-mps.txt b/backend/python/coqui/requirements-mps.txt new file mode 100644 index 000000000..f2883e977 --- /dev/null +++ b/backend/python/coqui/requirements-mps.txt @@ -0,0 +1,4 @@ +torch==2.7.1 +transformers==4.48.3 +accelerate +coqui-tts diff --git a/backend/python/faster-whisper/requirements-mps.txt b/backend/python/faster-whisper/requirements-mps.txt new file mode 100644 index 000000000..fc1ea5601 --- /dev/null +++ b/backend/python/faster-whisper/requirements-mps.txt @@ -0,0 +1,8 @@ +torch==2.7.1 +faster-whisper +opencv-python +accelerate +compel +peft +sentencepiece +optimum-quanto diff --git a/backend/python/kitten-tts/requirements-mps.txt b/backend/python/kitten-tts/requirements-mps.txt new file mode 100644 index 000000000..e70441dce --- /dev/null +++ b/backend/python/kitten-tts/requirements-mps.txt @@ -0,0 +1,5 @@ +grpcio==1.71.0 +protobuf +certifi +packaging==24.1 +https://github.com/KittenML/KittenTTS/releases/download/0.1/kittentts-0.1.0-py3-none-any.whl diff --git a/backend/python/kokoro/requirements-mps.txt b/backend/python/kokoro/requirements-mps.txt new file mode 100644 index 000000000..c365eb29c --- /dev/null +++ b/backend/python/kokoro/requirements-mps.txt @@ -0,0 +1,5 @@ +torch==2.7.1 +transformers +accelerate +kokoro +soundfile diff --git a/backend/python/moonshine/requirements-mps.txt b/backend/python/moonshine/requirements-mps.txt new file mode 100644 index 000000000..79bfb3c72 --- /dev/null +++ b/backend/python/moonshine/requirements-mps.txt @@ -0,0 +1,4 @@ +grpcio==1.71.0 +protobuf +grpcio-tools +useful-moonshine-onnx@git+https://git@github.com/moonshine-ai/moonshine.git#subdirectory=moonshine-onnx diff --git a/backend/python/outetts/Makefile b/backend/python/outetts/Makefile new file mode 100644 index 000000000..6d3504d11 --- /dev/null +++ b/backend/python/outetts/Makefile @@ -0,0 +1,23 @@ +.PHONY: outetts +outetts: + bash install.sh + +.PHONY: run +run: outetts + @echo "Running outetts..." + bash run.sh + @echo "outetts run." + +.PHONY: test +test: outetts + @echo "Testing outetts..." + bash test.sh + @echo "outetts tested." + +.PHONY: protogen-clean +protogen-clean: + $(RM) backend_pb2_grpc.py backend_pb2.py + +.PHONY: clean +clean: protogen-clean + rm -rf venv __pycache__ diff --git a/backend/python/outetts/backend.py b/backend/python/outetts/backend.py new file mode 100644 index 000000000..d98cc59e8 --- /dev/null +++ b/backend/python/outetts/backend.py @@ -0,0 +1,138 @@ +#!/usr/bin/env python3 +""" +gRPC server for OuteTTS (OuteAI TTS) models. +""" +from concurrent import futures + +import argparse +import signal +import sys +import os +import asyncio + +import backend_pb2 +import backend_pb2_grpc + +import grpc +import outetts + +_ONE_DAY_IN_SECONDS = 60 * 60 * 24 + +MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1')) + + +class BackendServicer(backend_pb2_grpc.BackendServicer): + def Health(self, request, context): + return backend_pb2.Reply(message=bytes("OK", 'utf-8')) + + def LoadModel(self, request, context): + model_name = request.Model + if os.path.exists(request.ModelFile): + model_name = request.ModelFile + + self.options = {} + for opt in request.Options: + if ":" not in opt: + continue + key, value = opt.split(":", 1) + try: + if "." in value: + value = float(value) + else: + value = int(value) + except ValueError: + pass + self.options[key] = value + + MODELNAME = "OuteAI/OuteTTS-0.3-1B" + TOKENIZER = "OuteAI/OuteTTS-0.3-1B" + VERSION = "0.3" + SPEAKER = "en_male_1" + for opt in request.Options: + if opt.startswith("tokenizer:"): + TOKENIZER = opt.split(":")[1] + break + if opt.startswith("version:"): + VERSION = opt.split(":")[1] + break + if opt.startswith("speaker:"): + SPEAKER = opt.split(":")[1] + break + + if model_name != "": + MODELNAME = model_name + + try: + model_config = outetts.HFModelConfig_v2( + model_path=MODELNAME, + tokenizer_path=TOKENIZER + ) + self.interface = outetts.InterfaceHF(model_version=VERSION, cfg=model_config) + + self.interface.print_default_speakers() + if request.AudioPath: + if os.path.isabs(request.AudioPath): + self.AudioPath = request.AudioPath + else: + self.AudioPath = os.path.join(request.ModelPath, request.AudioPath) + self.speaker = self.interface.create_speaker(audio_path=self.AudioPath) + else: + self.speaker = self.interface.load_default_speaker(name=SPEAKER) + + if request.ContextSize > 0: + self.max_tokens = request.ContextSize + else: + self.max_tokens = self.options.get("max_new_tokens", 512) + + except Exception as err: + print("Error:", err, file=sys.stderr) + return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}") + return backend_pb2.Result(message="Model loaded successfully", success=True) + + def TTS(self, request, context): + try: + text = request.text if request.text else "Speech synthesis is the artificial production of human speech." + print("[OuteTTS] generating TTS", file=sys.stderr) + gen_cfg = outetts.GenerationConfig( + text=text, + temperature=self.options.get("temperature", 0.1), + repetition_penalty=self.options.get("repetition_penalty", 1.1), + max_length=self.max_tokens, + speaker=self.speaker, + ) + output = self.interface.generate(config=gen_cfg) + print("[OuteTTS] Generated TTS", file=sys.stderr) + output.save(request.dst) + print("[OuteTTS] TTS done", file=sys.stderr) + except Exception as err: + return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}") + return backend_pb2.Result(success=True) + + +async def serve(address): + server = grpc.aio.server( + migration_thread_pool=futures.ThreadPoolExecutor(max_workers=MAX_WORKERS), + options=[ + ('grpc.max_message_length', 50 * 1024 * 1024), + ('grpc.max_send_message_length', 50 * 1024 * 1024), + ('grpc.max_receive_message_length', 50 * 1024 * 1024), + ]) + backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server) + server.add_insecure_port(address) + + loop = asyncio.get_event_loop() + for sig in (signal.SIGINT, signal.SIGTERM): + loop.add_signal_handler( + sig, lambda: asyncio.ensure_future(server.stop(5)) + ) + + await server.start() + print("Server started. Listening on: " + address, file=sys.stderr) + await server.wait_for_termination() + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Run the OuteTTS gRPC server.") + parser.add_argument("--addr", default="localhost:50051", help="The address to bind the server to.") + args = parser.parse_args() + asyncio.run(serve(args.addr)) diff --git a/backend/python/outetts/install.sh b/backend/python/outetts/install.sh new file mode 100644 index 000000000..4136d8765 --- /dev/null +++ b/backend/python/outetts/install.sh @@ -0,0 +1,11 @@ +#!/bin/bash +set -e + +backend_dir=$(dirname $0) +if [ -d $backend_dir/common ]; then + source $backend_dir/common/libbackend.sh +else + source $backend_dir/../common/libbackend.sh +fi + +installRequirements diff --git a/backend/python/outetts/requirements-cpu.txt b/backend/python/outetts/requirements-cpu.txt new file mode 100644 index 000000000..569fdaf87 --- /dev/null +++ b/backend/python/outetts/requirements-cpu.txt @@ -0,0 +1,7 @@ +torch==2.7.1 +llvmlite==0.43.0 +numba==0.60.0 +accelerate +bitsandbytes +outetts +protobuf==6.33.5 \ No newline at end of file diff --git a/backend/python/outetts/requirements-cublas12.txt b/backend/python/outetts/requirements-cublas12.txt new file mode 100644 index 000000000..31683be0a --- /dev/null +++ b/backend/python/outetts/requirements-cublas12.txt @@ -0,0 +1,7 @@ +torch==2.7.1 +accelerate +llvmlite==0.43.0 +numba==0.60.0 +bitsandbytes +protobuf==6.33.5 +outetts \ No newline at end of file diff --git a/backend/python/outetts/requirements-cublas13.txt b/backend/python/outetts/requirements-cublas13.txt new file mode 100644 index 000000000..c1aea423c --- /dev/null +++ b/backend/python/outetts/requirements-cublas13.txt @@ -0,0 +1,7 @@ +--extra-index-url https://download.pytorch.org/whl/cu130 +torch==2.9.0 +llvmlite==0.43.0 +numba==0.60.0 +bitsandbytes +outetts +protobuf==6.33.5 \ No newline at end of file diff --git a/backend/python/outetts/requirements-hipblas.txt b/backend/python/outetts/requirements-hipblas.txt new file mode 100644 index 000000000..1cd505db9 --- /dev/null +++ b/backend/python/outetts/requirements-hipblas.txt @@ -0,0 +1,8 @@ +--extra-index-url https://download.pytorch.org/whl/rocm6.4 +torch==2.8.0+rocm6.4 +accelerate +llvmlite==0.43.0 +numba==0.60.0 +bitsandbytes +outetts +protobuf==6.33.5 \ No newline at end of file diff --git a/backend/python/outetts/requirements-intel.txt b/backend/python/outetts/requirements-intel.txt new file mode 100644 index 000000000..15dd184f2 --- /dev/null +++ b/backend/python/outetts/requirements-intel.txt @@ -0,0 +1,8 @@ +--extra-index-url https://download.pytorch.org/whl/xpu +torch +optimum[openvino] +llvmlite==0.43.0 +numba==0.60.0 +bitsandbytes +outetts +protobuf==6.33.5 \ No newline at end of file diff --git a/backend/python/outetts/requirements.txt b/backend/python/outetts/requirements.txt new file mode 100644 index 000000000..85d3d9a41 --- /dev/null +++ b/backend/python/outetts/requirements.txt @@ -0,0 +1,6 @@ +grpcio==1.76.0 +protobuf==6.33.5 +certifi +setuptools +scipy==1.15.1 +numpy>=2.0.0 \ No newline at end of file diff --git a/backend/python/outetts/run.sh b/backend/python/outetts/run.sh new file mode 100644 index 000000000..eae121f37 --- /dev/null +++ b/backend/python/outetts/run.sh @@ -0,0 +1,9 @@ +#!/bin/bash +backend_dir=$(dirname $0) +if [ -d $backend_dir/common ]; then + source $backend_dir/common/libbackend.sh +else + source $backend_dir/../common/libbackend.sh +fi + +startBackend $@ diff --git a/backend/python/outetts/test.py b/backend/python/outetts/test.py new file mode 100644 index 000000000..7400d684a --- /dev/null +++ b/backend/python/outetts/test.py @@ -0,0 +1,35 @@ +""" +Test script for the OuteTTS gRPC service. +""" +import unittest +import subprocess +import time +import backend_pb2 +import backend_pb2_grpc + +import grpc + + +class TestBackendServicer(unittest.TestCase): + def setUp(self): + self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"]) + time.sleep(5) + + def tearDown(self): + self.service.terminate() + self.service.wait() + + def test_health(self): + try: + with grpc.insecure_channel("localhost:50051") as channel: + stub = backend_pb2_grpc.BackendStub(channel) + response = stub.Health(backend_pb2.HealthMessage()) + self.assertEqual(response.message, b'OK') + except Exception as err: + self.fail(f"Health check failed: {err}") + finally: + self.tearDown() + + +if __name__ == "__main__": + unittest.main() diff --git a/backend/python/outetts/test.sh b/backend/python/outetts/test.sh new file mode 100644 index 000000000..eb59f2aaf --- /dev/null +++ b/backend/python/outetts/test.sh @@ -0,0 +1,11 @@ +#!/bin/bash +set -e + +backend_dir=$(dirname $0) +if [ -d $backend_dir/common ]; then + source $backend_dir/common/libbackend.sh +else + source $backend_dir/../common/libbackend.sh +fi + +runUnittests diff --git a/backend/python/rerankers/requirements-mps.txt b/backend/python/rerankers/requirements-mps.txt new file mode 100644 index 000000000..4064c1c5c --- /dev/null +++ b/backend/python/rerankers/requirements-mps.txt @@ -0,0 +1,4 @@ +torch==2.7.1 +transformers +accelerate +rerankers[transformers] diff --git a/backend/python/rfdetr/requirements-mps.txt b/backend/python/rfdetr/requirements-mps.txt new file mode 100644 index 000000000..a00e6d324 --- /dev/null +++ b/backend/python/rfdetr/requirements-mps.txt @@ -0,0 +1,7 @@ +torch==2.7.1 +rfdetr +opencv-python +accelerate +peft +inference +optimum-quanto diff --git a/backend/python/transformers/backend.py b/backend/python/transformers/backend.py index 05713b917..450664d6b 100644 --- a/backend/python/transformers/backend.py +++ b/backend/python/transformers/backend.py @@ -24,7 +24,6 @@ XPU=os.environ.get("XPU", "0") == "1" from transformers import AutoTokenizer, AutoModel, set_seed, TextIteratorStreamer, StoppingCriteriaList, StopStringCriteria, MambaConfig, MambaForCausalLM from transformers import AutoProcessor, MusicgenForConditionalGeneration, DiaForConditionalGeneration from scipy.io import wavfile -import outetts from sentence_transformers import SentenceTransformer @@ -89,7 +88,6 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): self.CUDA = torch.cuda.is_available() self.OV=False - self.OuteTTS=False self.DiaTTS=False self.SentenceTransformer = False @@ -239,45 +237,6 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): self.processor = self.processor.to("cuda") print("DiaForConditionalGeneration loaded", file=sys.stderr) self.DiaTTS = True - elif request.Type == "OuteTTS": - autoTokenizer = False - options = request.Options - MODELNAME = "OuteAI/OuteTTS-0.3-1B" - TOKENIZER = "OuteAI/OuteTTS-0.3-1B" - VERSION = "0.3" - SPEAKER = "en_male_1" - for opt in options: - if opt.startswith("tokenizer:"): - TOKENIZER = opt.split(":")[1] - break - if opt.startswith("version:"): - VERSION = opt.split(":")[1] - break - if opt.startswith("speaker:"): - SPEAKER = opt.split(":")[1] - break - - if model_name != "": - MODELNAME = model_name - - # Configure the model - model_config = outetts.HFModelConfig_v2( - model_path=MODELNAME, - tokenizer_path=TOKENIZER - ) - # Initialize the interface - self.interface = outetts.InterfaceHF(model_version=VERSION, cfg=model_config) - self.OuteTTS = True - - self.interface.print_default_speakers() - if request.AudioPath: - if os.path.isabs(request.AudioPath): - self.AudioPath = request.AudioPath - else: - self.AudioPath = os.path.join(request.ModelPath, request.AudioPath) - self.speaker = self.interface.create_speaker(audio_path=self.AudioPath) - else: - self.speaker = self.interface.load_default_speaker(name=SPEAKER) elif request.Type == "SentenceTransformer": autoTokenizer = False self.model = SentenceTransformer(model_name, trust_remote_code=request.TrustRemoteCode) @@ -588,30 +547,8 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): return backend_pb2.Result(success=True) - def CallOuteTTS(self, request, context): - try: - print("[OuteTTS] generating TTS", file=sys.stderr) - gen_cfg = outetts.GenerationConfig( - text="Speech synthesis is the artificial production of human speech.", - temperature=self.options.get("temperature", 0.1), - repetition_penalty=self.options.get("repetition_penalty", 1.1), - max_length=self.max_tokens, - speaker=self.speaker, - # voice_characteristics="upbeat enthusiasm, friendliness, clarity, professionalism, and trustworthiness" - ) - output = self.interface.generate(config=gen_cfg) - print("[OuteTTS] Generated TTS", file=sys.stderr) - output.save(request.dst) - print("[OuteTTS] TTS done", file=sys.stderr) - except Exception as err: - return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}") - return backend_pb2.Result(success=True) - # The TTS endpoint is older, and provides fewer features, but exists for compatibility reasons def TTS(self, request, context): - if self.OuteTTS: - return self.CallOuteTTS(request, context) - if self.DiaTTS: print("DiaTTS", file=sys.stderr) return self.CallDiaTTS(request, context) diff --git a/backend/python/transformers/requirements-cpu.txt b/backend/python/transformers/requirements-cpu.txt index 50eac2783..d6e385868 100644 --- a/backend/python/transformers/requirements-cpu.txt +++ b/backend/python/transformers/requirements-cpu.txt @@ -4,6 +4,5 @@ numba==0.60.0 accelerate transformers bitsandbytes -outetts sentence-transformers==5.2.2 protobuf==6.33.5 \ No newline at end of file diff --git a/backend/python/transformers/requirements-cublas12.txt b/backend/python/transformers/requirements-cublas12.txt index ed5b2e6ab..028e4f191 100644 --- a/backend/python/transformers/requirements-cublas12.txt +++ b/backend/python/transformers/requirements-cublas12.txt @@ -4,6 +4,5 @@ llvmlite==0.43.0 numba==0.60.0 transformers bitsandbytes -outetts sentence-transformers==5.2.2 protobuf==6.33.5 \ No newline at end of file diff --git a/backend/python/transformers/requirements-cublas13.txt b/backend/python/transformers/requirements-cublas13.txt index d670184ba..fa375851c 100644 --- a/backend/python/transformers/requirements-cublas13.txt +++ b/backend/python/transformers/requirements-cublas13.txt @@ -4,6 +4,5 @@ llvmlite==0.43.0 numba==0.60.0 transformers bitsandbytes -outetts sentence-transformers==5.2.2 protobuf==6.33.5 \ No newline at end of file diff --git a/backend/python/transformers/requirements-hipblas.txt b/backend/python/transformers/requirements-hipblas.txt index 7a868a05b..49ccf3f25 100644 --- a/backend/python/transformers/requirements-hipblas.txt +++ b/backend/python/transformers/requirements-hipblas.txt @@ -5,7 +5,5 @@ transformers llvmlite==0.43.0 numba==0.60.0 bitsandbytes -outetts -bitsandbytes sentence-transformers==5.2.2 protobuf==6.33.5 \ No newline at end of file diff --git a/backend/python/transformers/requirements-intel.txt b/backend/python/transformers/requirements-intel.txt index af2ecaf24..233b84bd5 100644 --- a/backend/python/transformers/requirements-intel.txt +++ b/backend/python/transformers/requirements-intel.txt @@ -5,6 +5,5 @@ llvmlite==0.43.0 numba==0.60.0 transformers bitsandbytes -outetts sentence-transformers==5.2.2 protobuf==6.33.5 \ No newline at end of file diff --git a/backend/python/transformers/requirements-mps.txt b/backend/python/transformers/requirements-mps.txt new file mode 100644 index 000000000..3439a720e --- /dev/null +++ b/backend/python/transformers/requirements-mps.txt @@ -0,0 +1,8 @@ +torch==2.7.1 +llvmlite==0.43.0 +numba==0.60.0 +accelerate +transformers +bitsandbytes +sentence-transformers==5.2.2 +protobuf==6.33.5 diff --git a/backend/python/whisperx/install.sh b/backend/python/whisperx/install.sh index 9a4f7c733..7a3490001 100755 --- a/backend/python/whisperx/install.sh +++ b/backend/python/whisperx/install.sh @@ -8,6 +8,8 @@ else source $backend_dir/../common/libbackend.sh fi -EXTRA_PIP_INSTALL_FLAGS+=" --index-strategy unsafe-best-match" +if [ "x${BUILD_PROFILE}" != "xmetal" ] && [ "x${BUILD_PROFILE}" != "xmps" ]; then + EXTRA_PIP_INSTALL_FLAGS+=" --index-strategy unsafe-best-match" +fi installRequirements diff --git a/backend/python/whisperx/requirements-mps.txt b/backend/python/whisperx/requirements-mps.txt new file mode 100644 index 000000000..5b6f2d82f --- /dev/null +++ b/backend/python/whisperx/requirements-mps.txt @@ -0,0 +1,2 @@ +torch +whisperx @ git+https://github.com/m-bain/whisperX.git diff --git a/gallery/index.yaml b/gallery/index.yaml index 344e64fba..cd06707db 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -1842,7 +1842,7 @@ - gpu - text-to-speech overrides: - backend: "transformers" + backend: "outetts" name: "outetts" description: "OuteTTS is a 1.6B parameter text to speech model created by OuteAI." parameters: