Compare commits

...

5 Commits

Author SHA1 Message Date
Ettore Di Giacinto
5201b58d3e feat(mlx): Add support for CUDA12, CUDA13, L4T, SBSA and CPU (#8380)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-02-03 23:53:34 +01:00
LocalAI [bot]
8fa6737bdc chore(model gallery): 🤖 add 1 new models via gallery agent (#8381)
chore(model gallery): 🤖 add new models via gallery agent

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2026-02-03 22:40:22 +01:00
Ettore Di Giacinto
3039ced287 chore(ci): enlarge sleep startup time
Even if suboptimal as we should poll to wait for the service to be available, this should at least alleviate tests for now

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2026-02-03 22:07:07 +01:00
Ettore Di Giacinto
e7fc604dbc feat(metal): try to extend support to remaining backends (#8374)
* feat(metal): try to extend support to remaining backends

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* neutts doesn't work

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* split outetts out of transformers

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Remove torch pin to whisperx

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-02-03 21:57:50 +01:00
Richard Palethorpe
5195062e12 fix(realtime): Include noAction function in prompt template and handle tool_choice (#8372)
The realtime endpoint was not passing the noAction "answer" function to the
model in the prompt template, causing the model to always call user-provided
tools even when a direct response was appropriate.

Root cause:
- User tools were added to the funcs list
- TemplateMessages() was called to generate the prompt
- noAction function was only added AFTER templating
- This meant the prompt didn't include the "answer" function, even though
  the grammar did

Fix:
- Move noAction function creation before TemplateMessages() call so it's
  included in both the prompt and grammar
- Add proper tool_choice parameter handling to support "auto", "required",
  "none", and specific function selection
- Match behavior of the standard chat endpoint

💘 Generated with Crush

Assisted-by: Claude Sonnet 4.5 via Crush <crush@charm.land>

Signed-off-by: Richard Palethorpe <io@richiejp.com>
2026-02-03 14:30:37 +01:00
50 changed files with 1152 additions and 111 deletions

View File

@@ -104,6 +104,45 @@ jobs:
dockerfile: "./backend/Dockerfile.python"
context: "./"
ubuntu-version: '2404'
- build-type: ''
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-cpu-mlx'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:24.04"
skip-drivers: 'true'
backend: "mlx"
dockerfile: "./backend/Dockerfile.python"
context: "./"
ubuntu-version: '2404'
- build-type: ''
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-cpu-mlx-vlm'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:24.04"
skip-drivers: 'true'
backend: "mlx-vlm"
dockerfile: "./backend/Dockerfile.python"
context: "./"
ubuntu-version: '2404'
- build-type: ''
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-cpu-mlx-audio'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:24.04"
skip-drivers: 'true'
backend: "mlx-audio"
dockerfile: "./backend/Dockerfile.python"
context: "./"
ubuntu-version: '2404'
# CUDA 12 builds
- build-type: 'cublas'
cuda-major-version: "12"
@@ -300,6 +339,19 @@ jobs:
dockerfile: "./backend/Dockerfile.python"
context: "./"
ubuntu-version: '2404'
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "8"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12-outetts'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:24.04"
skip-drivers: 'false'
backend: "outetts"
dockerfile: "./backend/Dockerfile.python"
context: "./"
ubuntu-version: '2404'
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "8"
@@ -326,6 +378,45 @@ jobs:
dockerfile: "./backend/Dockerfile.python"
context: "./"
ubuntu-version: '2404'
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "8"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12-mlx'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:24.04"
skip-drivers: 'false'
backend: "mlx"
dockerfile: "./backend/Dockerfile.python"
context: "./"
ubuntu-version: '2404'
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "8"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12-mlx-vlm'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:24.04"
skip-drivers: 'false'
backend: "mlx-vlm"
dockerfile: "./backend/Dockerfile.python"
context: "./"
ubuntu-version: '2404'
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "8"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12-mlx-audio'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:24.04"
skip-drivers: 'false'
backend: "mlx-audio"
dockerfile: "./backend/Dockerfile.python"
context: "./"
ubuntu-version: '2404'
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "8"
@@ -574,6 +665,45 @@ jobs:
backend: "diffusers"
dockerfile: "./backend/Dockerfile.python"
context: "./"
- build-type: 'l4t'
cuda-major-version: "13"
cuda-minor-version: "0"
platforms: 'linux/arm64'
tag-latest: 'auto'
tag-suffix: '-nvidia-l4t-cuda-13-arm64-mlx'
runs-on: 'ubuntu-24.04-arm'
base-image: "ubuntu:24.04"
skip-drivers: 'false'
ubuntu-version: '2404'
backend: "mlx"
dockerfile: "./backend/Dockerfile.python"
context: "./"
- build-type: 'l4t'
cuda-major-version: "13"
cuda-minor-version: "0"
platforms: 'linux/arm64'
tag-latest: 'auto'
tag-suffix: '-nvidia-l4t-cuda-13-arm64-mlx-vlm'
runs-on: 'ubuntu-24.04-arm'
base-image: "ubuntu:24.04"
skip-drivers: 'false'
ubuntu-version: '2404'
backend: "mlx-vlm"
dockerfile: "./backend/Dockerfile.python"
context: "./"
- build-type: 'l4t'
cuda-major-version: "13"
cuda-minor-version: "0"
platforms: 'linux/arm64'
tag-latest: 'auto'
tag-suffix: '-nvidia-l4t-cuda-13-arm64-mlx-audio'
runs-on: 'ubuntu-24.04-arm'
base-image: "ubuntu:24.04"
skip-drivers: 'false'
ubuntu-version: '2404'
backend: "mlx-audio"
dockerfile: "./backend/Dockerfile.python"
context: "./"
- build-type: 'cublas'
cuda-major-version: "13"
cuda-minor-version: "0"
@@ -639,6 +769,45 @@ jobs:
dockerfile: "./backend/Dockerfile.python"
context: "./"
ubuntu-version: '2404'
- build-type: 'cublas'
cuda-major-version: "13"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-13-mlx'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:24.04"
skip-drivers: 'false'
backend: "mlx"
dockerfile: "./backend/Dockerfile.python"
context: "./"
ubuntu-version: '2404'
- build-type: 'cublas'
cuda-major-version: "13"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-13-mlx-vlm'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:24.04"
skip-drivers: 'false'
backend: "mlx-vlm"
dockerfile: "./backend/Dockerfile.python"
context: "./"
ubuntu-version: '2404'
- build-type: 'cublas'
cuda-major-version: "13"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-13-mlx-audio'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:24.04"
skip-drivers: 'false'
backend: "mlx-audio"
dockerfile: "./backend/Dockerfile.python"
context: "./"
ubuntu-version: '2404'
- build-type: 'cublas'
cuda-major-version: "13"
cuda-minor-version: "0"
@@ -1045,6 +1214,45 @@ jobs:
dockerfile: "./backend/Dockerfile.python"
context: "./"
ubuntu-version: '2204'
- build-type: 'l4t'
cuda-major-version: "12"
cuda-minor-version: "0"
platforms: 'linux/arm64'
tag-latest: 'auto'
tag-suffix: '-nvidia-l4t-mlx'
runs-on: 'ubuntu-24.04-arm'
base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
skip-drivers: 'true'
backend: "mlx"
dockerfile: "./backend/Dockerfile.python"
context: "./"
ubuntu-version: '2204'
- build-type: 'l4t'
cuda-major-version: "12"
cuda-minor-version: "0"
platforms: 'linux/arm64'
tag-latest: 'auto'
tag-suffix: '-nvidia-l4t-mlx-vlm'
runs-on: 'ubuntu-24.04-arm'
base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
skip-drivers: 'true'
backend: "mlx-vlm"
dockerfile: "./backend/Dockerfile.python"
context: "./"
ubuntu-version: '2204'
- build-type: 'l4t'
cuda-major-version: "12"
cuda-minor-version: "0"
platforms: 'linux/arm64'
tag-latest: 'auto'
tag-suffix: '-nvidia-l4t-mlx-audio'
runs-on: 'ubuntu-24.04-arm'
base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
skip-drivers: 'true'
backend: "mlx-audio"
dockerfile: "./backend/Dockerfile.python"
context: "./"
ubuntu-version: '2204'
# SYCL additional backends
- build-type: 'intel'
cuda-major-version: ""
@@ -1562,6 +1770,19 @@ jobs:
dockerfile: "./backend/Dockerfile.python"
context: "./"
ubuntu-version: '2404'
- build-type: ''
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-cpu-outetts'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:24.04"
skip-drivers: 'true'
backend: "outetts"
dockerfile: "./backend/Dockerfile.python"
context: "./"
ubuntu-version: '2404'
backend-jobs-darwin:
uses: ./.github/workflows/backend_build_darwin.yml
strategy:
@@ -1590,6 +1811,64 @@ jobs:
tag-suffix: "-metal-darwin-arm64-whisper"
build-type: "metal"
lang: "go"
- backend: "vibevoice"
tag-suffix: "-metal-darwin-arm64-vibevoice"
build-type: "mps"
- backend: "qwen-asr"
tag-suffix: "-metal-darwin-arm64-qwen-asr"
build-type: "mps"
- backend: "qwen-tts"
tag-suffix: "-metal-darwin-arm64-qwen-tts"
build-type: "mps"
- backend: "voxcpm"
tag-suffix: "-metal-darwin-arm64-voxcpm"
build-type: "mps"
- backend: "pocket-tts"
tag-suffix: "-metal-darwin-arm64-pocket-tts"
build-type: "mps"
- backend: "moonshine"
tag-suffix: "-metal-darwin-arm64-moonshine"
build-type: "mps"
- backend: "whisperx"
tag-suffix: "-metal-darwin-arm64-whisperx"
build-type: "mps"
- backend: "rerankers"
tag-suffix: "-metal-darwin-arm64-rerankers"
build-type: "mps"
- backend: "transformers"
tag-suffix: "-metal-darwin-arm64-transformers"
build-type: "mps"
- backend: "kokoro"
tag-suffix: "-metal-darwin-arm64-kokoro"
build-type: "mps"
- backend: "faster-whisper"
tag-suffix: "-metal-darwin-arm64-faster-whisper"
build-type: "mps"
- backend: "coqui"
tag-suffix: "-metal-darwin-arm64-coqui"
build-type: "mps"
- backend: "rfdetr"
tag-suffix: "-metal-darwin-arm64-rfdetr"
build-type: "mps"
- backend: "kitten-tts"
tag-suffix: "-metal-darwin-arm64-kitten-tts"
build-type: "mps"
- backend: "piper"
tag-suffix: "-metal-darwin-arm64-piper"
build-type: "metal"
lang: "go"
- backend: "silero-vad"
tag-suffix: "-metal-darwin-arm64-silero-vad"
build-type: "metal"
lang: "go"
- backend: "local-store"
tag-suffix: "-metal-darwin-arm64-local-store"
build-type: "metal"
lang: "go"
- backend: "huggingface"
tag-suffix: "-metal-darwin-arm64-huggingface"
build-type: "metal"
lang: "go"
with:
backend: ${{ matrix.backend }}
build-type: ${{ matrix.build-type }}

View File

@@ -1,5 +1,5 @@
# Disable parallel execution for backend builds
.NOTPARALLEL: backends/diffusers backends/llama-cpp backends/piper backends/stablediffusion-ggml backends/whisper backends/faster-whisper backends/silero-vad backends/local-store backends/huggingface backends/rfdetr backends/kitten-tts backends/kokoro backends/chatterbox backends/llama-cpp-darwin backends/neutts build-darwin-python-backend build-darwin-go-backend backends/mlx backends/diffuser-darwin backends/mlx-vlm backends/mlx-audio backends/stablediffusion-ggml-darwin backends/vllm backends/vllm-omni backends/moonshine backends/pocket-tts backends/qwen-tts backends/qwen-asr backends/voxcpm backends/whisperx
.NOTPARALLEL: backends/diffusers backends/llama-cpp backends/outetts backends/piper backends/stablediffusion-ggml backends/whisper backends/faster-whisper backends/silero-vad backends/local-store backends/huggingface backends/rfdetr backends/kitten-tts backends/kokoro backends/chatterbox backends/llama-cpp-darwin backends/neutts build-darwin-python-backend build-darwin-go-backend backends/mlx backends/diffuser-darwin backends/mlx-vlm backends/mlx-audio backends/stablediffusion-ggml-darwin backends/vllm backends/vllm-omni backends/moonshine backends/pocket-tts backends/qwen-tts backends/qwen-asr backends/voxcpm backends/whisperx
GOCMD=go
GOTEST=$(GOCMD) test
@@ -308,6 +308,7 @@ protogen-go-clean:
prepare-test-extra: protogen-python
$(MAKE) -C backend/python/transformers
$(MAKE) -C backend/python/outetts
$(MAKE) -C backend/python/diffusers
$(MAKE) -C backend/python/chatterbox
$(MAKE) -C backend/python/vllm
@@ -322,6 +323,7 @@ prepare-test-extra: protogen-python
test-extra: prepare-test-extra
$(MAKE) -C backend/python/transformers test
$(MAKE) -C backend/python/outetts test
$(MAKE) -C backend/python/diffusers test
$(MAKE) -C backend/python/chatterbox test
$(MAKE) -C backend/python/vllm test
@@ -451,6 +453,7 @@ BACKEND_WHISPER = whisper|golang|.|false|true
# Python backends with root context
BACKEND_RERANKERS = rerankers|python|.|false|true
BACKEND_TRANSFORMERS = transformers|python|.|false|true
BACKEND_OUTETTS = outetts|python|.|false|true
BACKEND_FASTER_WHISPER = faster-whisper|python|.|false|true
BACKEND_COQUI = coqui|python|.|false|true
BACKEND_RFDETR = rfdetr|python|.|false|true
@@ -499,6 +502,7 @@ $(eval $(call generate-docker-build-target,$(BACKEND_STABLEDIFFUSION_GGML)))
$(eval $(call generate-docker-build-target,$(BACKEND_WHISPER)))
$(eval $(call generate-docker-build-target,$(BACKEND_RERANKERS)))
$(eval $(call generate-docker-build-target,$(BACKEND_TRANSFORMERS)))
$(eval $(call generate-docker-build-target,$(BACKEND_OUTETTS)))
$(eval $(call generate-docker-build-target,$(BACKEND_FASTER_WHISPER)))
$(eval $(call generate-docker-build-target,$(BACKEND_COQUI)))
$(eval $(call generate-docker-build-target,$(BACKEND_RFDETR)))
@@ -521,7 +525,7 @@ $(eval $(call generate-docker-build-target,$(BACKEND_WHISPERX)))
docker-save-%: backend-images
docker save local-ai-backend:$* -o backend-images/$*.tar
docker-build-backends: docker-build-llama-cpp docker-build-rerankers docker-build-vllm docker-build-vllm-omni docker-build-transformers docker-build-diffusers docker-build-kokoro docker-build-faster-whisper docker-build-coqui docker-build-chatterbox docker-build-vibevoice docker-build-moonshine docker-build-pocket-tts docker-build-qwen-tts docker-build-qwen-asr docker-build-voxcpm docker-build-whisperx
docker-build-backends: docker-build-llama-cpp docker-build-rerankers docker-build-vllm docker-build-vllm-omni docker-build-transformers docker-build-outetts docker-build-diffusers docker-build-kokoro docker-build-faster-whisper docker-build-coqui docker-build-chatterbox docker-build-vibevoice docker-build-moonshine docker-build-pocket-tts docker-build-qwen-tts docker-build-qwen-asr docker-build-voxcpm docker-build-whisperx
########################################################
### Mock Backend for E2E Tests

View File

@@ -105,6 +105,7 @@
intel: "intel-rfdetr"
#amd: "rocm-rfdetr"
nvidia-l4t: "nvidia-l4t-arm64-rfdetr"
metal: "metal-rfdetr"
default: "cpu-rfdetr"
nvidia-cuda-13: "cuda13-rfdetr"
nvidia-cuda-12: "cuda12-rfdetr"
@@ -182,6 +183,15 @@
- text-to-text
- LLM
- MLX
capabilities:
default: "cpu-mlx"
nvidia: "cuda12-mlx"
metal: "metal-mlx"
nvidia-cuda-12: "cuda12-mlx"
nvidia-cuda-13: "cuda13-mlx"
nvidia-l4t: "nvidia-l4t-mlx"
nvidia-l4t-cuda-12: "nvidia-l4t-mlx"
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-mlx"
- &mlx-vlm
name: "mlx-vlm"
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-mlx-vlm"
@@ -199,6 +209,15 @@
- vision-language
- LLM
- MLX
capabilities:
default: "cpu-mlx-vlm"
nvidia: "cuda12-mlx-vlm"
metal: "metal-mlx-vlm"
nvidia-cuda-12: "cuda12-mlx-vlm"
nvidia-cuda-13: "cuda13-mlx-vlm"
nvidia-l4t: "nvidia-l4t-mlx-vlm"
nvidia-l4t-cuda-12: "nvidia-l4t-mlx-vlm"
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-mlx-vlm"
- &mlx-audio
name: "mlx-audio"
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-mlx-audio"
@@ -216,6 +235,15 @@
- text-to-audio
- LLM
- MLX
capabilities:
default: "cpu-mlx-audio"
nvidia: "cuda12-mlx-audio"
metal: "metal-mlx-audio"
nvidia-cuda-12: "cuda12-mlx-audio"
nvidia-cuda-13: "cuda13-mlx-audio"
nvidia-l4t: "nvidia-l4t-mlx-audio"
nvidia-l4t-cuda-12: "nvidia-l4t-mlx-audio"
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-mlx-audio"
- &rerankers
name: "rerankers"
alias: "rerankers"
@@ -223,6 +251,7 @@
nvidia: "cuda12-rerankers"
intel: "intel-rerankers"
amd: "rocm-rerankers"
metal: "metal-rerankers"
- &transformers
name: "transformers"
icon: https://avatars.githubusercontent.com/u/25720743?s=200&v=4
@@ -240,6 +269,7 @@
nvidia: "cuda12-transformers"
intel: "intel-transformers"
amd: "rocm-transformers"
metal: "metal-transformers"
nvidia-cuda-13: "cuda13-transformers"
nvidia-cuda-12: "cuda12-transformers"
- &diffusers
@@ -282,6 +312,7 @@
nvidia: "cuda12-faster-whisper"
intel: "intel-faster-whisper"
amd: "rocm-faster-whisper"
metal: "metal-faster-whisper"
nvidia-cuda-13: "cuda13-faster-whisper"
nvidia-cuda-12: "cuda12-faster-whisper"
- &moonshine
@@ -299,6 +330,7 @@
alias: "moonshine"
capabilities:
nvidia: "cuda12-moonshine"
metal: "metal-moonshine"
default: "cpu-moonshine"
nvidia-cuda-13: "cuda13-moonshine"
nvidia-cuda-12: "cuda12-moonshine"
@@ -318,6 +350,7 @@
capabilities:
nvidia: "cuda12-whisperx"
amd: "rocm-whisperx"
metal: "metal-whisperx"
default: "cpu-whisperx"
nvidia-cuda-13: "cuda13-whisperx"
nvidia-cuda-12: "cuda12-whisperx"
@@ -340,6 +373,7 @@
intel: "intel-kokoro"
amd: "rocm-kokoro"
nvidia-l4t: "nvidia-l4t-kokoro"
metal: "metal-kokoro"
nvidia-cuda-13: "cuda13-kokoro"
nvidia-cuda-12: "cuda12-kokoro"
nvidia-l4t-cuda-12: "nvidia-l4t-arm64-kokoro"
@@ -364,9 +398,25 @@
nvidia: "cuda12-coqui"
intel: "intel-coqui"
amd: "rocm-coqui"
metal: "metal-coqui"
nvidia-cuda-13: "cuda13-coqui"
nvidia-cuda-12: "cuda12-coqui"
icon: https://avatars.githubusercontent.com/u/1338804?s=200&v=4
- &outetts
urls:
- https://github.com/OuteAI/outetts
description: |
OuteTTS is an open-weight text-to-speech model from OuteAI (OuteAI/OuteTTS-0.3-1B).
Supports custom speaker voices via audio path or default speakers.
tags:
- text-to-speech
- TTS
license: apache-2.0
name: "outetts"
alias: "outetts"
capabilities:
default: "cpu-outetts"
nvidia-cuda-12: "cuda12-outetts"
- &chatterbox
urls:
- https://github.com/resemble-ai/chatterbox
@@ -405,6 +455,7 @@
intel: "intel-vibevoice"
amd: "rocm-vibevoice"
nvidia-l4t: "nvidia-l4t-vibevoice"
metal: "metal-vibevoice"
default: "cpu-vibevoice"
nvidia-cuda-13: "cuda13-vibevoice"
nvidia-cuda-12: "cuda12-vibevoice"
@@ -427,6 +478,7 @@
intel: "intel-qwen-tts"
amd: "rocm-qwen-tts"
nvidia-l4t: "nvidia-l4t-qwen-tts"
metal: "metal-qwen-tts"
default: "cpu-qwen-tts"
nvidia-cuda-13: "cuda13-qwen-tts"
nvidia-cuda-12: "cuda12-qwen-tts"
@@ -449,6 +501,7 @@
intel: "intel-qwen-asr"
amd: "rocm-qwen-asr"
nvidia-l4t: "nvidia-l4t-qwen-asr"
metal: "metal-qwen-asr"
default: "cpu-qwen-asr"
nvidia-cuda-13: "cuda13-qwen-asr"
nvidia-cuda-12: "cuda12-qwen-asr"
@@ -470,6 +523,7 @@
nvidia: "cuda12-voxcpm"
intel: "intel-voxcpm"
amd: "rocm-voxcpm"
metal: "metal-voxcpm"
default: "cpu-voxcpm"
nvidia-cuda-13: "cuda13-voxcpm"
nvidia-cuda-12: "cuda12-voxcpm"
@@ -490,6 +544,7 @@
intel: "intel-pocket-tts"
amd: "rocm-pocket-tts"
nvidia-l4t: "nvidia-l4t-pocket-tts"
metal: "metal-pocket-tts"
default: "cpu-pocket-tts"
nvidia-cuda-13: "cuda13-pocket-tts"
nvidia-cuda-12: "cuda12-pocket-tts"
@@ -650,31 +705,234 @@
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-mlx-audio"
mirrors:
- localai/localai-backends:master-metal-darwin-arm64-mlx-audio
## mlx
- !!merge <<: *mlx
name: "cpu-mlx"
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-mlx"
mirrors:
- localai/localai-backends:latest-cpu-mlx
- !!merge <<: *mlx
name: "cpu-mlx-development"
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-mlx"
mirrors:
- localai/localai-backends:master-cpu-mlx
- !!merge <<: *mlx
name: "cuda12-mlx"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-mlx"
mirrors:
- localai/localai-backends:latest-gpu-nvidia-cuda-12-mlx
- !!merge <<: *mlx
name: "cuda12-mlx-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-mlx"
mirrors:
- localai/localai-backends:master-gpu-nvidia-cuda-12-mlx
- !!merge <<: *mlx
name: "cuda13-mlx"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-13-mlx"
mirrors:
- localai/localai-backends:latest-gpu-nvidia-cuda-13-mlx
- !!merge <<: *mlx
name: "cuda13-mlx-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-mlx"
mirrors:
- localai/localai-backends:master-gpu-nvidia-cuda-13-mlx
- !!merge <<: *mlx
name: "nvidia-l4t-mlx"
uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-mlx"
mirrors:
- localai/localai-backends:latest-nvidia-l4t-mlx
- !!merge <<: *mlx
name: "nvidia-l4t-mlx-development"
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-mlx"
mirrors:
- localai/localai-backends:master-nvidia-l4t-mlx
- !!merge <<: *mlx
name: "cuda13-nvidia-l4t-arm64-mlx"
uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-cuda-13-arm64-mlx"
mirrors:
- localai/localai-backends:latest-nvidia-l4t-cuda-13-arm64-mlx
- !!merge <<: *mlx
name: "cuda13-nvidia-l4t-arm64-mlx-development"
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-cuda-13-arm64-mlx"
mirrors:
- localai/localai-backends:master-nvidia-l4t-cuda-13-arm64-mlx
## mlx-vlm
- !!merge <<: *mlx-vlm
name: "cpu-mlx-vlm"
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-mlx-vlm"
mirrors:
- localai/localai-backends:latest-cpu-mlx-vlm
- !!merge <<: *mlx-vlm
name: "cpu-mlx-vlm-development"
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-mlx-vlm"
mirrors:
- localai/localai-backends:master-cpu-mlx-vlm
- !!merge <<: *mlx-vlm
name: "cuda12-mlx-vlm"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-mlx-vlm"
mirrors:
- localai/localai-backends:latest-gpu-nvidia-cuda-12-mlx-vlm
- !!merge <<: *mlx-vlm
name: "cuda12-mlx-vlm-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-mlx-vlm"
mirrors:
- localai/localai-backends:master-gpu-nvidia-cuda-12-mlx-vlm
- !!merge <<: *mlx-vlm
name: "cuda13-mlx-vlm"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-13-mlx-vlm"
mirrors:
- localai/localai-backends:latest-gpu-nvidia-cuda-13-mlx-vlm
- !!merge <<: *mlx-vlm
name: "cuda13-mlx-vlm-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-mlx-vlm"
mirrors:
- localai/localai-backends:master-gpu-nvidia-cuda-13-mlx-vlm
- !!merge <<: *mlx-vlm
name: "nvidia-l4t-mlx-vlm"
uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-mlx-vlm"
mirrors:
- localai/localai-backends:latest-nvidia-l4t-mlx-vlm
- !!merge <<: *mlx-vlm
name: "nvidia-l4t-mlx-vlm-development"
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-mlx-vlm"
mirrors:
- localai/localai-backends:master-nvidia-l4t-mlx-vlm
- !!merge <<: *mlx-vlm
name: "cuda13-nvidia-l4t-arm64-mlx-vlm"
uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-cuda-13-arm64-mlx-vlm"
mirrors:
- localai/localai-backends:latest-nvidia-l4t-cuda-13-arm64-mlx-vlm
- !!merge <<: *mlx-vlm
name: "cuda13-nvidia-l4t-arm64-mlx-vlm-development"
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-cuda-13-arm64-mlx-vlm"
mirrors:
- localai/localai-backends:master-nvidia-l4t-cuda-13-arm64-mlx-vlm
## mlx-audio
- !!merge <<: *mlx-audio
name: "cpu-mlx-audio"
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-mlx-audio"
mirrors:
- localai/localai-backends:latest-cpu-mlx-audio
- !!merge <<: *mlx-audio
name: "cpu-mlx-audio-development"
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-mlx-audio"
mirrors:
- localai/localai-backends:master-cpu-mlx-audio
- !!merge <<: *mlx-audio
name: "cuda12-mlx-audio"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-mlx-audio"
mirrors:
- localai/localai-backends:latest-gpu-nvidia-cuda-12-mlx-audio
- !!merge <<: *mlx-audio
name: "cuda12-mlx-audio-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-mlx-audio"
mirrors:
- localai/localai-backends:master-gpu-nvidia-cuda-12-mlx-audio
- !!merge <<: *mlx-audio
name: "cuda13-mlx-audio"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-13-mlx-audio"
mirrors:
- localai/localai-backends:latest-gpu-nvidia-cuda-13-mlx-audio
- !!merge <<: *mlx-audio
name: "cuda13-mlx-audio-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-mlx-audio"
mirrors:
- localai/localai-backends:master-gpu-nvidia-cuda-13-mlx-audio
- !!merge <<: *mlx-audio
name: "nvidia-l4t-mlx-audio"
uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-mlx-audio"
mirrors:
- localai/localai-backends:latest-nvidia-l4t-mlx-audio
- !!merge <<: *mlx-audio
name: "nvidia-l4t-mlx-audio-development"
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-mlx-audio"
mirrors:
- localai/localai-backends:master-nvidia-l4t-mlx-audio
- !!merge <<: *mlx-audio
name: "cuda13-nvidia-l4t-arm64-mlx-audio"
uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-cuda-13-arm64-mlx-audio"
mirrors:
- localai/localai-backends:latest-nvidia-l4t-cuda-13-arm64-mlx-audio
- !!merge <<: *mlx-audio
name: "cuda13-nvidia-l4t-arm64-mlx-audio-development"
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-cuda-13-arm64-mlx-audio"
mirrors:
- localai/localai-backends:master-nvidia-l4t-cuda-13-arm64-mlx-audio
- !!merge <<: *kitten-tts
name: "kitten-tts-development"
uri: "quay.io/go-skynet/local-ai-backends:master-kitten-tts"
mirrors:
- localai/localai-backends:master-kitten-tts
- !!merge <<: *kitten-tts
name: "metal-kitten-tts"
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-kitten-tts"
mirrors:
- localai/localai-backends:latest-metal-darwin-arm64-kitten-tts
- !!merge <<: *kitten-tts
name: "metal-kitten-tts-development"
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-kitten-tts"
mirrors:
- localai/localai-backends:master-metal-darwin-arm64-kitten-tts
- !!merge <<: *huggingface
name: "huggingface-development"
uri: "quay.io/go-skynet/local-ai-backends:master-huggingface"
mirrors:
- localai/localai-backends:master-huggingface
- !!merge <<: *huggingface
name: "metal-huggingface"
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-huggingface"
mirrors:
- localai/localai-backends:latest-metal-darwin-arm64-huggingface
- !!merge <<: *huggingface
name: "metal-huggingface-development"
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-huggingface"
mirrors:
- localai/localai-backends:master-metal-darwin-arm64-huggingface
- !!merge <<: *local-store
name: "local-store-development"
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-local-store"
mirrors:
- localai/localai-backends:master-cpu-local-store
- !!merge <<: *local-store
name: "metal-local-store"
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-local-store"
mirrors:
- localai/localai-backends:latest-metal-darwin-arm64-local-store
- !!merge <<: *local-store
name: "metal-local-store-development"
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-local-store"
mirrors:
- localai/localai-backends:master-metal-darwin-arm64-local-store
- !!merge <<: *silero-vad
name: "silero-vad-development"
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-silero-vad"
mirrors:
- localai/localai-backends:master-cpu-silero-vad
- !!merge <<: *silero-vad
name: "metal-silero-vad"
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-silero-vad"
mirrors:
- localai/localai-backends:latest-metal-darwin-arm64-silero-vad
- !!merge <<: *silero-vad
name: "metal-silero-vad-development"
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-silero-vad"
mirrors:
- localai/localai-backends:master-metal-darwin-arm64-silero-vad
- !!merge <<: *piper
name: "piper-development"
uri: "quay.io/go-skynet/local-ai-backends:master-piper"
mirrors:
- localai/localai-backends:master-piper
- !!merge <<: *piper
name: "metal-piper"
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-piper"
mirrors:
- localai/localai-backends:latest-metal-darwin-arm64-piper
- !!merge <<: *piper
name: "metal-piper-development"
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-piper"
mirrors:
- localai/localai-backends:master-metal-darwin-arm64-piper
## llama-cpp
- !!merge <<: *llamacpp
name: "nvidia-l4t-arm64-llama-cpp"
@@ -1048,6 +1306,7 @@
intel: "intel-rfdetr-development"
#amd: "rocm-rfdetr-development"
nvidia-l4t: "nvidia-l4t-arm64-rfdetr-development"
metal: "metal-rfdetr-development"
default: "cpu-rfdetr-development"
nvidia-cuda-13: "cuda13-rfdetr-development"
- !!merge <<: *rfdetr
@@ -1115,6 +1374,16 @@
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-rfdetr"
mirrors:
- localai/localai-backends:master-gpu-nvidia-cuda-13-rfdetr
- !!merge <<: *rfdetr
name: "metal-rfdetr"
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-rfdetr"
mirrors:
- localai/localai-backends:latest-metal-darwin-arm64-rfdetr
- !!merge <<: *rfdetr
name: "metal-rfdetr-development"
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-rfdetr"
mirrors:
- localai/localai-backends:master-metal-darwin-arm64-rfdetr
## Rerankers
- !!merge <<: *rerankers
name: "rerankers-development"
@@ -1122,6 +1391,7 @@
nvidia: "cuda12-rerankers-development"
intel: "intel-rerankers-development"
amd: "rocm-rerankers-development"
metal: "metal-rerankers-development"
nvidia-cuda-13: "cuda13-rerankers-development"
- !!merge <<: *rerankers
name: "cuda12-rerankers"
@@ -1163,6 +1433,16 @@
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-rerankers"
mirrors:
- localai/localai-backends:master-gpu-nvidia-cuda-13-rerankers
- !!merge <<: *rerankers
name: "metal-rerankers"
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-rerankers"
mirrors:
- localai/localai-backends:latest-metal-darwin-arm64-rerankers
- !!merge <<: *rerankers
name: "metal-rerankers-development"
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-rerankers"
mirrors:
- localai/localai-backends:master-metal-darwin-arm64-rerankers
## Transformers
- !!merge <<: *transformers
name: "transformers-development"
@@ -1170,6 +1450,7 @@
nvidia: "cuda12-transformers-development"
intel: "intel-transformers-development"
amd: "rocm-transformers-development"
metal: "metal-transformers-development"
nvidia-cuda-13: "cuda13-transformers-development"
- !!merge <<: *transformers
name: "cuda12-transformers"
@@ -1211,6 +1492,16 @@
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-transformers"
mirrors:
- localai/localai-backends:master-gpu-nvidia-cuda-13-transformers
- !!merge <<: *transformers
name: "metal-transformers"
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-transformers"
mirrors:
- localai/localai-backends:latest-metal-darwin-arm64-transformers
- !!merge <<: *transformers
name: "metal-transformers-development"
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-transformers"
mirrors:
- localai/localai-backends:master-metal-darwin-arm64-transformers
## Diffusers
- !!merge <<: *diffusers
name: "diffusers-development"
@@ -1310,6 +1601,7 @@
intel: "intel-kokoro-development"
amd: "rocm-kokoro-development"
nvidia-l4t: "nvidia-l4t-kokoro-development"
metal: "metal-kokoro-development"
- !!merge <<: *kokoro
name: "cuda12-kokoro-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-kokoro"
@@ -1360,6 +1652,16 @@
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-kokoro"
mirrors:
- localai/localai-backends:master-gpu-nvidia-cuda-13-kokoro
- !!merge <<: *kokoro
name: "metal-kokoro"
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-kokoro"
mirrors:
- localai/localai-backends:latest-metal-darwin-arm64-kokoro
- !!merge <<: *kokoro
name: "metal-kokoro-development"
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-kokoro"
mirrors:
- localai/localai-backends:master-metal-darwin-arm64-kokoro
## faster-whisper
- !!merge <<: *faster-whisper
name: "faster-whisper-development"
@@ -1367,6 +1669,7 @@
nvidia: "cuda12-faster-whisper-development"
intel: "intel-faster-whisper-development"
amd: "rocm-faster-whisper-development"
metal: "metal-faster-whisper-development"
nvidia-cuda-13: "cuda13-faster-whisper-development"
- !!merge <<: *faster-whisper
name: "cuda12-faster-whisper-development"
@@ -1398,6 +1701,16 @@
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-faster-whisper"
mirrors:
- localai/localai-backends:master-gpu-nvidia-cuda-13-faster-whisper
- !!merge <<: *faster-whisper
name: "metal-faster-whisper"
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-faster-whisper"
mirrors:
- localai/localai-backends:latest-metal-darwin-arm64-faster-whisper
- !!merge <<: *faster-whisper
name: "metal-faster-whisper-development"
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-faster-whisper"
mirrors:
- localai/localai-backends:master-metal-darwin-arm64-faster-whisper
## moonshine
- !!merge <<: *moonshine
name: "moonshine-development"
@@ -1436,12 +1749,23 @@
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-moonshine"
mirrors:
- localai/localai-backends:master-gpu-nvidia-cuda-13-moonshine
- !!merge <<: *moonshine
name: "metal-moonshine"
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-moonshine"
mirrors:
- localai/localai-backends:latest-metal-darwin-arm64-moonshine
- !!merge <<: *moonshine
name: "metal-moonshine-development"
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-moonshine"
mirrors:
- localai/localai-backends:master-metal-darwin-arm64-moonshine
## whisperx
- !!merge <<: *whisperx
name: "whisperx-development"
capabilities:
nvidia: "cuda12-whisperx-development"
amd: "rocm-whisperx-development"
metal: "metal-whisperx-development"
default: "cpu-whisperx-development"
nvidia-cuda-13: "cuda13-whisperx-development"
nvidia-cuda-12: "cuda12-whisperx-development"
@@ -1485,6 +1809,16 @@
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-whisperx"
mirrors:
- localai/localai-backends:master-gpu-nvidia-cuda-13-whisperx
- !!merge <<: *whisperx
name: "metal-whisperx"
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-whisperx"
mirrors:
- localai/localai-backends:latest-metal-darwin-arm64-whisperx
- !!merge <<: *whisperx
name: "metal-whisperx-development"
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-whisperx"
mirrors:
- localai/localai-backends:master-metal-darwin-arm64-whisperx
## coqui
- !!merge <<: *coqui
@@ -1493,6 +1827,7 @@
nvidia: "cuda12-coqui-development"
intel: "intel-coqui-development"
amd: "rocm-coqui-development"
metal: "metal-coqui-development"
- !!merge <<: *coqui
name: "cuda12-coqui"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-coqui"
@@ -1523,6 +1858,42 @@
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-coqui"
mirrors:
- localai/localai-backends:latest-gpu-rocm-hipblas-coqui
- !!merge <<: *coqui
name: "metal-coqui"
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-coqui"
mirrors:
- localai/localai-backends:latest-metal-darwin-arm64-coqui
- !!merge <<: *coqui
name: "metal-coqui-development"
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-coqui"
mirrors:
- localai/localai-backends:master-metal-darwin-arm64-coqui
## outetts
- !!merge <<: *outetts
name: "outetts-development"
capabilities:
default: "cpu-outetts-development"
nvidia-cuda-12: "cuda12-outetts-development"
- !!merge <<: *outetts
name: "cpu-outetts"
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-outetts"
mirrors:
- localai/localai-backends:latest-cpu-outetts
- !!merge <<: *outetts
name: "cpu-outetts-development"
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-outetts"
mirrors:
- localai/localai-backends:master-cpu-outetts
- !!merge <<: *outetts
name: "cuda12-outetts"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-outetts"
mirrors:
- localai/localai-backends:latest-gpu-nvidia-cuda-12-outetts
- !!merge <<: *outetts
name: "cuda12-outetts-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-outetts"
mirrors:
- localai/localai-backends:master-gpu-nvidia-cuda-12-outetts
## chatterbox
- !!merge <<: *chatterbox
name: "chatterbox-development"
@@ -1603,6 +1974,7 @@
intel: "intel-vibevoice-development"
amd: "rocm-vibevoice-development"
nvidia-l4t: "nvidia-l4t-vibevoice-development"
metal: "metal-vibevoice-development"
default: "cpu-vibevoice-development"
nvidia-cuda-13: "cuda13-vibevoice-development"
nvidia-cuda-12: "cuda12-vibevoice-development"
@@ -1678,6 +2050,16 @@
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-cuda-13-arm64-vibevoice"
mirrors:
- localai/localai-backends:master-nvidia-l4t-cuda-13-arm64-vibevoice
- !!merge <<: *vibevoice
name: "metal-vibevoice"
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-vibevoice"
mirrors:
- localai/localai-backends:latest-metal-darwin-arm64-vibevoice
- !!merge <<: *vibevoice
name: "metal-vibevoice-development"
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-vibevoice"
mirrors:
- localai/localai-backends:master-metal-darwin-arm64-vibevoice
## qwen-tts
- !!merge <<: *qwen-tts
name: "qwen-tts-development"
@@ -1686,6 +2068,7 @@
intel: "intel-qwen-tts-development"
amd: "rocm-qwen-tts-development"
nvidia-l4t: "nvidia-l4t-qwen-tts-development"
metal: "metal-qwen-tts-development"
default: "cpu-qwen-tts-development"
nvidia-cuda-13: "cuda13-qwen-tts-development"
nvidia-cuda-12: "cuda12-qwen-tts-development"
@@ -1761,6 +2144,16 @@
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-cuda-13-arm64-qwen-tts"
mirrors:
- localai/localai-backends:master-nvidia-l4t-cuda-13-arm64-qwen-tts
- !!merge <<: *qwen-tts
name: "metal-qwen-tts"
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-qwen-tts"
mirrors:
- localai/localai-backends:latest-metal-darwin-arm64-qwen-tts
- !!merge <<: *qwen-tts
name: "metal-qwen-tts-development"
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-qwen-tts"
mirrors:
- localai/localai-backends:master-metal-darwin-arm64-qwen-tts
## qwen-asr
- !!merge <<: *qwen-asr
name: "qwen-asr-development"
@@ -1769,6 +2162,7 @@
intel: "intel-qwen-asr-development"
amd: "rocm-qwen-asr-development"
nvidia-l4t: "nvidia-l4t-qwen-asr-development"
metal: "metal-qwen-asr-development"
default: "cpu-qwen-asr-development"
nvidia-cuda-13: "cuda13-qwen-asr-development"
nvidia-cuda-12: "cuda12-qwen-asr-development"
@@ -1844,6 +2238,16 @@
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-cuda-13-arm64-qwen-asr"
mirrors:
- localai/localai-backends:master-nvidia-l4t-cuda-13-arm64-qwen-asr
- !!merge <<: *qwen-asr
name: "metal-qwen-asr"
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-qwen-asr"
mirrors:
- localai/localai-backends:latest-metal-darwin-arm64-qwen-asr
- !!merge <<: *qwen-asr
name: "metal-qwen-asr-development"
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-qwen-asr"
mirrors:
- localai/localai-backends:master-metal-darwin-arm64-qwen-asr
## voxcpm
- !!merge <<: *voxcpm
name: "voxcpm-development"
@@ -1851,6 +2255,7 @@
nvidia: "cuda12-voxcpm-development"
intel: "intel-voxcpm-development"
amd: "rocm-voxcpm-development"
metal: "metal-voxcpm-development"
default: "cpu-voxcpm-development"
nvidia-cuda-13: "cuda13-voxcpm-development"
nvidia-cuda-12: "cuda12-voxcpm-development"
@@ -1904,6 +2309,16 @@
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-voxcpm"
mirrors:
- localai/localai-backends:master-gpu-rocm-hipblas-voxcpm
- !!merge <<: *voxcpm
name: "metal-voxcpm"
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-voxcpm"
mirrors:
- localai/localai-backends:latest-metal-darwin-arm64-voxcpm
- !!merge <<: *voxcpm
name: "metal-voxcpm-development"
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-voxcpm"
mirrors:
- localai/localai-backends:master-metal-darwin-arm64-voxcpm
## pocket-tts
- !!merge <<: *pocket-tts
name: "pocket-tts-development"
@@ -1912,6 +2327,7 @@
intel: "intel-pocket-tts-development"
amd: "rocm-pocket-tts-development"
nvidia-l4t: "nvidia-l4t-pocket-tts-development"
metal: "metal-pocket-tts-development"
default: "cpu-pocket-tts-development"
nvidia-cuda-13: "cuda13-pocket-tts-development"
nvidia-cuda-12: "cuda12-pocket-tts-development"
@@ -1987,3 +2403,13 @@
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-cuda-13-arm64-pocket-tts"
mirrors:
- localai/localai-backends:master-nvidia-l4t-cuda-13-arm64-pocket-tts
- !!merge <<: *pocket-tts
name: "metal-pocket-tts"
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-pocket-tts"
mirrors:
- localai/localai-backends:latest-metal-darwin-arm64-pocket-tts
- !!merge <<: *pocket-tts
name: "metal-pocket-tts-development"
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-pocket-tts"
mirrors:
- localai/localai-backends:master-metal-darwin-arm64-pocket-tts

View File

@@ -0,0 +1,7 @@
torch
torchaudio
accelerate
numpy>=1.24.0,<1.26.0
transformers
# https://github.com/mudler/LocalAI/pull/6240#issuecomment-3329518289
chatterbox-tts@git+https://git@github.com/mudler/chatterbox.git@faster

View File

@@ -0,0 +1,4 @@
torch==2.7.1
transformers==4.48.3
accelerate
coqui-tts

View File

@@ -0,0 +1,8 @@
torch==2.7.1
faster-whisper
opencv-python
accelerate
compel
peft
sentencepiece
optimum-quanto

View File

@@ -0,0 +1,5 @@
grpcio==1.71.0
protobuf
certifi
packaging==24.1
https://github.com/KittenML/KittenTTS/releases/download/0.1/kittentts-0.1.0-py3-none-any.whl

View File

@@ -0,0 +1,5 @@
torch==2.7.1
transformers
accelerate
kokoro
soundfile

View File

@@ -0,0 +1,2 @@
git+https://github.com/Blaizzy/mlx-audio
mlx[cpu]

View File

@@ -0,0 +1,2 @@
git+https://github.com/Blaizzy/mlx-audio
mlx[cuda12]

View File

@@ -0,0 +1,2 @@
git+https://github.com/Blaizzy/mlx-audio
mlx[cuda13]

View File

@@ -0,0 +1,2 @@
git+https://github.com/Blaizzy/mlx-audio
mlx[cuda12]

View File

@@ -0,0 +1,2 @@
git+https://github.com/Blaizzy/mlx-audio
mlx[cuda13]

View File

@@ -0,0 +1,2 @@
git+https://github.com/Blaizzy/mlx-vlm
mlx[cpu]

View File

@@ -0,0 +1,2 @@
git+https://github.com/Blaizzy/mlx-vlm
mlx[cuda12]

View File

@@ -0,0 +1,2 @@
git+https://github.com/Blaizzy/mlx-vlm
mlx[cuda13]

View File

@@ -0,0 +1,2 @@
git+https://github.com/Blaizzy/mlx-vlm
mlx[cuda12]

View File

@@ -0,0 +1,2 @@
git+https://github.com/Blaizzy/mlx-vlm
mlx[cuda13]

View File

@@ -0,0 +1,2 @@
mlx-lm
mlx[cpu]

View File

@@ -0,0 +1,2 @@
mlx-lm
mlx[cuda12]

View File

@@ -0,0 +1,2 @@
mlx-lm
mlx[cuda13]

View File

@@ -0,0 +1,2 @@
mlx-lm
mlx[cuda12]

View File

@@ -0,0 +1,2 @@
mlx-lm
mlx[cuda13]

View File

@@ -0,0 +1,4 @@
grpcio==1.71.0
protobuf
grpcio-tools
useful-moonshine-onnx@git+https://git@github.com/moonshine-ai/moonshine.git#subdirectory=moonshine-onnx

View File

@@ -0,0 +1,23 @@
.PHONY: outetts
outetts:
bash install.sh
.PHONY: run
run: outetts
@echo "Running outetts..."
bash run.sh
@echo "outetts run."
.PHONY: test
test: outetts
@echo "Testing outetts..."
bash test.sh
@echo "outetts tested."
.PHONY: protogen-clean
protogen-clean:
$(RM) backend_pb2_grpc.py backend_pb2.py
.PHONY: clean
clean: protogen-clean
rm -rf venv __pycache__

View File

@@ -0,0 +1,138 @@
#!/usr/bin/env python3
"""
gRPC server for OuteTTS (OuteAI TTS) models.
"""
from concurrent import futures
import argparse
import signal
import sys
import os
import asyncio
import backend_pb2
import backend_pb2_grpc
import grpc
import outetts
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
class BackendServicer(backend_pb2_grpc.BackendServicer):
def Health(self, request, context):
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
def LoadModel(self, request, context):
model_name = request.Model
if os.path.exists(request.ModelFile):
model_name = request.ModelFile
self.options = {}
for opt in request.Options:
if ":" not in opt:
continue
key, value = opt.split(":", 1)
try:
if "." in value:
value = float(value)
else:
value = int(value)
except ValueError:
pass
self.options[key] = value
MODELNAME = "OuteAI/OuteTTS-0.3-1B"
TOKENIZER = "OuteAI/OuteTTS-0.3-1B"
VERSION = "0.3"
SPEAKER = "en_male_1"
for opt in request.Options:
if opt.startswith("tokenizer:"):
TOKENIZER = opt.split(":")[1]
break
if opt.startswith("version:"):
VERSION = opt.split(":")[1]
break
if opt.startswith("speaker:"):
SPEAKER = opt.split(":")[1]
break
if model_name != "":
MODELNAME = model_name
try:
model_config = outetts.HFModelConfig_v2(
model_path=MODELNAME,
tokenizer_path=TOKENIZER
)
self.interface = outetts.InterfaceHF(model_version=VERSION, cfg=model_config)
self.interface.print_default_speakers()
if request.AudioPath:
if os.path.isabs(request.AudioPath):
self.AudioPath = request.AudioPath
else:
self.AudioPath = os.path.join(request.ModelPath, request.AudioPath)
self.speaker = self.interface.create_speaker(audio_path=self.AudioPath)
else:
self.speaker = self.interface.load_default_speaker(name=SPEAKER)
if request.ContextSize > 0:
self.max_tokens = request.ContextSize
else:
self.max_tokens = self.options.get("max_new_tokens", 512)
except Exception as err:
print("Error:", err, file=sys.stderr)
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
return backend_pb2.Result(message="Model loaded successfully", success=True)
def TTS(self, request, context):
try:
text = request.text if request.text else "Speech synthesis is the artificial production of human speech."
print("[OuteTTS] generating TTS", file=sys.stderr)
gen_cfg = outetts.GenerationConfig(
text=text,
temperature=self.options.get("temperature", 0.1),
repetition_penalty=self.options.get("repetition_penalty", 1.1),
max_length=self.max_tokens,
speaker=self.speaker,
)
output = self.interface.generate(config=gen_cfg)
print("[OuteTTS] Generated TTS", file=sys.stderr)
output.save(request.dst)
print("[OuteTTS] TTS done", file=sys.stderr)
except Exception as err:
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
return backend_pb2.Result(success=True)
async def serve(address):
server = grpc.aio.server(
migration_thread_pool=futures.ThreadPoolExecutor(max_workers=MAX_WORKERS),
options=[
('grpc.max_message_length', 50 * 1024 * 1024),
('grpc.max_send_message_length', 50 * 1024 * 1024),
('grpc.max_receive_message_length', 50 * 1024 * 1024),
])
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
server.add_insecure_port(address)
loop = asyncio.get_event_loop()
for sig in (signal.SIGINT, signal.SIGTERM):
loop.add_signal_handler(
sig, lambda: asyncio.ensure_future(server.stop(5))
)
await server.start()
print("Server started. Listening on: " + address, file=sys.stderr)
await server.wait_for_termination()
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Run the OuteTTS gRPC server.")
parser.add_argument("--addr", default="localhost:50051", help="The address to bind the server to.")
args = parser.parse_args()
asyncio.run(serve(args.addr))

View File

@@ -0,0 +1,11 @@
#!/bin/bash
set -e
backend_dir=$(dirname $0)
if [ -d $backend_dir/common ]; then
source $backend_dir/common/libbackend.sh
else
source $backend_dir/../common/libbackend.sh
fi
installRequirements

View File

@@ -0,0 +1,7 @@
torch==2.7.1
llvmlite==0.43.0
numba==0.60.0
accelerate
bitsandbytes
outetts
protobuf==6.33.5

View File

@@ -0,0 +1,7 @@
torch==2.7.1
accelerate
llvmlite==0.43.0
numba==0.60.0
bitsandbytes
protobuf==6.33.5
outetts

View File

@@ -0,0 +1,7 @@
--extra-index-url https://download.pytorch.org/whl/cu130
torch==2.9.0
llvmlite==0.43.0
numba==0.60.0
bitsandbytes
outetts
protobuf==6.33.5

View File

@@ -0,0 +1,8 @@
--extra-index-url https://download.pytorch.org/whl/rocm6.4
torch==2.8.0+rocm6.4
accelerate
llvmlite==0.43.0
numba==0.60.0
bitsandbytes
outetts
protobuf==6.33.5

View File

@@ -0,0 +1,8 @@
--extra-index-url https://download.pytorch.org/whl/xpu
torch
optimum[openvino]
llvmlite==0.43.0
numba==0.60.0
bitsandbytes
outetts
protobuf==6.33.5

View File

@@ -0,0 +1,6 @@
grpcio==1.76.0
protobuf==6.33.5
certifi
setuptools
scipy==1.15.1
numpy>=2.0.0

View File

@@ -0,0 +1,9 @@
#!/bin/bash
backend_dir=$(dirname $0)
if [ -d $backend_dir/common ]; then
source $backend_dir/common/libbackend.sh
else
source $backend_dir/../common/libbackend.sh
fi
startBackend $@

View File

@@ -0,0 +1,35 @@
"""
Test script for the OuteTTS gRPC service.
"""
import unittest
import subprocess
import time
import backend_pb2
import backend_pb2_grpc
import grpc
class TestBackendServicer(unittest.TestCase):
def setUp(self):
self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"])
time.sleep(5)
def tearDown(self):
self.service.terminate()
self.service.wait()
def test_health(self):
try:
with grpc.insecure_channel("localhost:50051") as channel:
stub = backend_pb2_grpc.BackendStub(channel)
response = stub.Health(backend_pb2.HealthMessage())
self.assertEqual(response.message, b'OK')
except Exception as err:
self.fail(f"Health check failed: {err}")
finally:
self.tearDown()
if __name__ == "__main__":
unittest.main()

View File

@@ -0,0 +1,11 @@
#!/bin/bash
set -e
backend_dir=$(dirname $0)
if [ -d $backend_dir/common ]; then
source $backend_dir/common/libbackend.sh
else
source $backend_dir/../common/libbackend.sh
fi
runUnittests

View File

@@ -28,7 +28,7 @@ class TestBackendServicer(unittest.TestCase):
stderr=subprocess.PIPE,
text=True
)
time.sleep(5)
time.sleep(30)
def tearDown(self) -> None:
"""

View File

@@ -0,0 +1,4 @@
torch==2.7.1
transformers
accelerate
rerankers[transformers]

View File

@@ -0,0 +1,7 @@
torch==2.7.1
rfdetr
opencv-python
accelerate
peft
inference
optimum-quanto

View File

@@ -24,7 +24,6 @@ XPU=os.environ.get("XPU", "0") == "1"
from transformers import AutoTokenizer, AutoModel, set_seed, TextIteratorStreamer, StoppingCriteriaList, StopStringCriteria, MambaConfig, MambaForCausalLM
from transformers import AutoProcessor, MusicgenForConditionalGeneration, DiaForConditionalGeneration
from scipy.io import wavfile
import outetts
from sentence_transformers import SentenceTransformer
@@ -89,7 +88,6 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
self.CUDA = torch.cuda.is_available()
self.OV=False
self.OuteTTS=False
self.DiaTTS=False
self.SentenceTransformer = False
@@ -239,45 +237,6 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
self.processor = self.processor.to("cuda")
print("DiaForConditionalGeneration loaded", file=sys.stderr)
self.DiaTTS = True
elif request.Type == "OuteTTS":
autoTokenizer = False
options = request.Options
MODELNAME = "OuteAI/OuteTTS-0.3-1B"
TOKENIZER = "OuteAI/OuteTTS-0.3-1B"
VERSION = "0.3"
SPEAKER = "en_male_1"
for opt in options:
if opt.startswith("tokenizer:"):
TOKENIZER = opt.split(":")[1]
break
if opt.startswith("version:"):
VERSION = opt.split(":")[1]
break
if opt.startswith("speaker:"):
SPEAKER = opt.split(":")[1]
break
if model_name != "":
MODELNAME = model_name
# Configure the model
model_config = outetts.HFModelConfig_v2(
model_path=MODELNAME,
tokenizer_path=TOKENIZER
)
# Initialize the interface
self.interface = outetts.InterfaceHF(model_version=VERSION, cfg=model_config)
self.OuteTTS = True
self.interface.print_default_speakers()
if request.AudioPath:
if os.path.isabs(request.AudioPath):
self.AudioPath = request.AudioPath
else:
self.AudioPath = os.path.join(request.ModelPath, request.AudioPath)
self.speaker = self.interface.create_speaker(audio_path=self.AudioPath)
else:
self.speaker = self.interface.load_default_speaker(name=SPEAKER)
elif request.Type == "SentenceTransformer":
autoTokenizer = False
self.model = SentenceTransformer(model_name, trust_remote_code=request.TrustRemoteCode)
@@ -588,30 +547,8 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
return backend_pb2.Result(success=True)
def CallOuteTTS(self, request, context):
try:
print("[OuteTTS] generating TTS", file=sys.stderr)
gen_cfg = outetts.GenerationConfig(
text="Speech synthesis is the artificial production of human speech.",
temperature=self.options.get("temperature", 0.1),
repetition_penalty=self.options.get("repetition_penalty", 1.1),
max_length=self.max_tokens,
speaker=self.speaker,
# voice_characteristics="upbeat enthusiasm, friendliness, clarity, professionalism, and trustworthiness"
)
output = self.interface.generate(config=gen_cfg)
print("[OuteTTS] Generated TTS", file=sys.stderr)
output.save(request.dst)
print("[OuteTTS] TTS done", file=sys.stderr)
except Exception as err:
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
return backend_pb2.Result(success=True)
# The TTS endpoint is older, and provides fewer features, but exists for compatibility reasons
def TTS(self, request, context):
if self.OuteTTS:
return self.CallOuteTTS(request, context)
if self.DiaTTS:
print("DiaTTS", file=sys.stderr)
return self.CallDiaTTS(request, context)

View File

@@ -4,6 +4,5 @@ numba==0.60.0
accelerate
transformers
bitsandbytes
outetts
sentence-transformers==5.2.2
protobuf==6.33.5

View File

@@ -4,6 +4,5 @@ llvmlite==0.43.0
numba==0.60.0
transformers
bitsandbytes
outetts
sentence-transformers==5.2.2
protobuf==6.33.5

View File

@@ -4,6 +4,5 @@ llvmlite==0.43.0
numba==0.60.0
transformers
bitsandbytes
outetts
sentence-transformers==5.2.2
protobuf==6.33.5

View File

@@ -5,7 +5,5 @@ transformers
llvmlite==0.43.0
numba==0.60.0
bitsandbytes
outetts
bitsandbytes
sentence-transformers==5.2.2
protobuf==6.33.5

View File

@@ -5,6 +5,5 @@ llvmlite==0.43.0
numba==0.60.0
transformers
bitsandbytes
outetts
sentence-transformers==5.2.2
protobuf==6.33.5

View File

@@ -0,0 +1,8 @@
torch==2.7.1
llvmlite==0.43.0
numba==0.60.0
accelerate
transformers
bitsandbytes
sentence-transformers==5.2.2
protobuf==6.33.5

View File

@@ -8,6 +8,8 @@ else
source $backend_dir/../common/libbackend.sh
fi
EXTRA_PIP_INSTALL_FLAGS+=" --index-strategy unsafe-best-match"
if [ "x${BUILD_PROFILE}" != "xmetal" ] && [ "x${BUILD_PROFILE}" != "xmps" ]; then
EXTRA_PIP_INSTALL_FLAGS+=" --index-strategy unsafe-best-match"
fi
installRequirements

View File

@@ -0,0 +1,2 @@
torch
whisperx @ git+https://github.com/m-bain/whisperX.git

View File

@@ -28,7 +28,7 @@ type wrappedModel struct {
TTSConfig *config.ModelConfig
TranscriptionConfig *config.ModelConfig
LLMConfig *config.ModelConfig
VADConfig *config.ModelConfig
VADConfig *config.ModelConfig
appConfig *config.ApplicationConfig
modelLoader *model.ModelLoader
@@ -114,6 +114,35 @@ func (m *wrappedModel) Predict(ctx context.Context, messages schema.Messages, im
})
}
}
// Add noAction function before templating so it's included in the prompt
// Allow the user to set custom actions via config file
noActionName := "answer"
noActionDescription := "use this action to answer without performing any action"
if m.LLMConfig.FunctionsConfig.NoActionFunctionName != "" {
noActionName = m.LLMConfig.FunctionsConfig.NoActionFunctionName
}
if m.LLMConfig.FunctionsConfig.NoActionDescriptionName != "" {
noActionDescription = m.LLMConfig.FunctionsConfig.NoActionDescriptionName
}
noActionGrammar := functions.Function{
Name: noActionName,
Description: noActionDescription,
Parameters: map[string]interface{}{
"properties": map[string]interface{}{
"message": map[string]interface{}{
"type": "string",
"description": "The message to reply the user with",
},
},
},
}
if !m.LLMConfig.FunctionsConfig.DisableNoAction {
funcs = append(funcs, noActionGrammar)
}
}
predInput = m.evaluator.TemplateMessages(input, input.Messages, m.LLMConfig, funcs, len(funcs) > 0)
@@ -124,38 +153,29 @@ func (m *wrappedModel) Predict(ctx context.Context, messages schema.Messages, im
}
}
// Handle tool_choice parameter similar to the chat endpoint
if toolChoice != nil {
if toolChoice.Mode != "" {
// String values: "auto", "required", "none"
switch toolChoice.Mode {
case types.ToolChoiceModeRequired:
m.LLMConfig.SetFunctionCallString("required")
case types.ToolChoiceModeNone:
// Don't use tools
m.LLMConfig.SetFunctionCallString("none")
case types.ToolChoiceModeAuto:
// Default behavior - let model decide
}
} else if toolChoice.Function != nil {
// Specific function specified
m.LLMConfig.SetFunctionCallString(toolChoice.Function.Name)
}
}
// Generate grammar for function calling if tools are provided and grammar generation is enabled
shouldUseFn := len(tools) > 0 && m.LLMConfig.ShouldUseFunctions()
if !m.LLMConfig.FunctionsConfig.GrammarConfig.NoGrammar && shouldUseFn {
// Allow the user to set custom actions via config file
noActionName := "answer"
noActionDescription := "use this action to answer without performing any action"
if m.LLMConfig.FunctionsConfig.NoActionFunctionName != "" {
noActionName = m.LLMConfig.FunctionsConfig.NoActionFunctionName
}
if m.LLMConfig.FunctionsConfig.NoActionDescriptionName != "" {
noActionDescription = m.LLMConfig.FunctionsConfig.NoActionDescriptionName
}
noActionGrammar := functions.Function{
Name: noActionName,
Description: noActionDescription,
Parameters: map[string]interface{}{
"properties": map[string]interface{}{
"message": map[string]interface{}{
"type": "string",
"description": "The message to reply the user with",
},
},
},
}
if !m.LLMConfig.FunctionsConfig.DisableNoAction {
funcs = append(funcs, noActionGrammar)
}
// Force picking one of the functions by the request
if m.LLMConfig.FunctionToCall() != "" {
funcs = functions.Functions(funcs).Select(m.LLMConfig.FunctionToCall())
@@ -184,7 +204,7 @@ func (m *wrappedModel) Predict(ctx context.Context, messages schema.Messages, im
toolChoiceJSON = string(b)
}
return backend.ModelInference(ctx, predInput, messages, images, videos, audios, m.modelLoader, m.LLMConfig, m.confLoader, m.appConfig, tokenCallback, toolsJSON, toolChoiceJSON, logprobs, topLogprobs, logitBias, )
return backend.ModelInference(ctx, predInput, messages, images, videos, audios, m.modelLoader, m.LLMConfig, m.confLoader, m.appConfig, tokenCallback, toolsJSON, toolChoiceJSON, logprobs, topLogprobs, logitBias)
}
func (m *wrappedModel) TTS(ctx context.Context, text, voice, language string) (string, *proto.Result, error) {
@@ -218,11 +238,11 @@ func newTranscriptionOnlyModel(pipeline *config.Pipeline, cl *config.ModelConfig
return &transcriptOnlyModel{
TranscriptionConfig: cfgSST,
VADConfig: cfgVAD,
VADConfig: cfgVAD,
confLoader: cl,
confLoader: cl,
modelLoader: ml,
appConfig: appConfig,
appConfig: appConfig,
}, cfgSST, nil
}
@@ -297,11 +317,11 @@ func newModel(pipeline *config.Pipeline, cl *config.ModelConfigLoader, ml *model
TTSConfig: cfgTTS,
TranscriptionConfig: cfgSST,
LLMConfig: cfgLLM,
VADConfig: cfgVAD,
VADConfig: cfgVAD,
confLoader: cl,
confLoader: cl,
modelLoader: ml,
appConfig: appConfig,
evaluator: evaluator,
appConfig: appConfig,
evaluator: evaluator,
}, nil
}

View File

@@ -1,4 +1,29 @@
---
- name: "qwen3-coder-next-mxfp4_moe"
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
urls:
- https://huggingface.co/noctrex/Qwen3-Coder-Next-MXFP4_MOE-GGUF
description: |
The model is a quantized version of **Qwen/Qwen3-Coder-Next** (base model) using the **MXFP4** quantization scheme. It is optimized for efficiency while retaining performance, suitable for deployment in applications requiring lightweight inference. The quantized version is tailored for specific tasks, with parameters like temperature=1.0 and top_p=0.95 recommended for generation.
overrides:
parameters:
model: llama-cpp/models/Qwen3-Coder-Next-MXFP4_MOE.gguf
name: Qwen3-Coder-Next-MXFP4_MOE-GGUF
backend: llama-cpp
template:
use_tokenizer_template: true
known_usecases:
- chat
function:
grammar:
disable: true
description: Imported from https://huggingface.co/noctrex/Qwen3-Coder-Next-MXFP4_MOE-GGUF
options:
- use_jinja:true
files:
- filename: llama-cpp/models/Qwen3-Coder-Next-MXFP4_MOE.gguf
sha256: fa356439e87010163778b7eab5f2b07e0e5b7f2cd9aac78b069139f5ae069414
uri: https://huggingface.co/noctrex/Qwen3-Coder-Next-MXFP4_MOE-GGUF/resolve/main/Qwen3-Coder-Next-MXFP4_MOE.gguf
- name: "deepseek-ai.deepseek-v3.2"
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
urls:
@@ -1842,7 +1867,7 @@
- gpu
- text-to-speech
overrides:
backend: "transformers"
backend: "outetts"
name: "outetts"
description: "OuteTTS is a 1.6B parameter text to speech model created by OuteAI."
parameters: