Compare commits

..

1 Commits

Author SHA1 Message Date
copilot-swe-agent[bot]
5041294265 Initial plan 2026-02-02 22:01:37 +00:00
125 changed files with 258 additions and 3188 deletions

View File

@@ -14,7 +14,6 @@ concurrency:
jobs:
backend-jobs:
if: github.repository == 'mudler/LocalAI'
uses: ./.github/workflows/backend_build.yml
with:
tag-latest: ${{ matrix.tag-latest }}
@@ -105,58 +104,6 @@ jobs:
dockerfile: "./backend/Dockerfile.python"
context: "./"
ubuntu-version: '2404'
- build-type: ''
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-cpu-ace-step'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:24.04"
skip-drivers: 'true'
backend: "ace-step"
dockerfile: "./backend/Dockerfile.python"
context: "./"
ubuntu-version: '2404'
- build-type: ''
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-cpu-mlx'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:24.04"
skip-drivers: 'true'
backend: "mlx"
dockerfile: "./backend/Dockerfile.python"
context: "./"
ubuntu-version: '2404'
- build-type: ''
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-cpu-mlx-vlm'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:24.04"
skip-drivers: 'true'
backend: "mlx-vlm"
dockerfile: "./backend/Dockerfile.python"
context: "./"
ubuntu-version: '2404'
- build-type: ''
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-cpu-mlx-audio'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:24.04"
skip-drivers: 'true'
backend: "mlx-audio"
dockerfile: "./backend/Dockerfile.python"
context: "./"
ubuntu-version: '2404'
# CUDA 12 builds
- build-type: 'cublas'
cuda-major-version: "12"
@@ -301,19 +248,6 @@ jobs:
dockerfile: "./backend/Dockerfile.python"
context: "./"
ubuntu-version: '2404'
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "8"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12-ace-step'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:24.04"
skip-drivers: 'false'
backend: "ace-step"
dockerfile: "./backend/Dockerfile.python"
context: "./"
ubuntu-version: '2404'
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "8"
@@ -366,19 +300,6 @@ jobs:
dockerfile: "./backend/Dockerfile.python"
context: "./"
ubuntu-version: '2404'
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "8"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12-outetts'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:24.04"
skip-drivers: 'false'
backend: "outetts"
dockerfile: "./backend/Dockerfile.python"
context: "./"
ubuntu-version: '2404'
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "8"
@@ -405,45 +326,6 @@ jobs:
dockerfile: "./backend/Dockerfile.python"
context: "./"
ubuntu-version: '2404'
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "8"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12-mlx'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:24.04"
skip-drivers: 'false'
backend: "mlx"
dockerfile: "./backend/Dockerfile.python"
context: "./"
ubuntu-version: '2404'
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "8"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12-mlx-vlm'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:24.04"
skip-drivers: 'false'
backend: "mlx-vlm"
dockerfile: "./backend/Dockerfile.python"
context: "./"
ubuntu-version: '2404'
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "8"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12-mlx-audio'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:24.04"
skip-drivers: 'false'
backend: "mlx-audio"
dockerfile: "./backend/Dockerfile.python"
context: "./"
ubuntu-version: '2404'
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "8"
@@ -627,19 +509,6 @@ jobs:
dockerfile: "./backend/Dockerfile.python"
context: "./"
ubuntu-version: '2404'
- build-type: 'cublas'
cuda-major-version: "13"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-13-ace-step'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:24.04"
skip-drivers: 'false'
backend: "ace-step"
dockerfile: "./backend/Dockerfile.python"
context: "./"
ubuntu-version: '2404'
- build-type: 'l4t'
cuda-major-version: "13"
cuda-minor-version: "0"
@@ -705,45 +574,6 @@ jobs:
backend: "diffusers"
dockerfile: "./backend/Dockerfile.python"
context: "./"
- build-type: 'l4t'
cuda-major-version: "13"
cuda-minor-version: "0"
platforms: 'linux/arm64'
tag-latest: 'auto'
tag-suffix: '-nvidia-l4t-cuda-13-arm64-mlx'
runs-on: 'ubuntu-24.04-arm'
base-image: "ubuntu:24.04"
skip-drivers: 'false'
ubuntu-version: '2404'
backend: "mlx"
dockerfile: "./backend/Dockerfile.python"
context: "./"
- build-type: 'l4t'
cuda-major-version: "13"
cuda-minor-version: "0"
platforms: 'linux/arm64'
tag-latest: 'auto'
tag-suffix: '-nvidia-l4t-cuda-13-arm64-mlx-vlm'
runs-on: 'ubuntu-24.04-arm'
base-image: "ubuntu:24.04"
skip-drivers: 'false'
ubuntu-version: '2404'
backend: "mlx-vlm"
dockerfile: "./backend/Dockerfile.python"
context: "./"
- build-type: 'l4t'
cuda-major-version: "13"
cuda-minor-version: "0"
platforms: 'linux/arm64'
tag-latest: 'auto'
tag-suffix: '-nvidia-l4t-cuda-13-arm64-mlx-audio'
runs-on: 'ubuntu-24.04-arm'
base-image: "ubuntu:24.04"
skip-drivers: 'false'
ubuntu-version: '2404'
backend: "mlx-audio"
dockerfile: "./backend/Dockerfile.python"
context: "./"
- build-type: 'cublas'
cuda-major-version: "13"
cuda-minor-version: "0"
@@ -809,45 +639,6 @@ jobs:
dockerfile: "./backend/Dockerfile.python"
context: "./"
ubuntu-version: '2404'
- build-type: 'cublas'
cuda-major-version: "13"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-13-mlx'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:24.04"
skip-drivers: 'false'
backend: "mlx"
dockerfile: "./backend/Dockerfile.python"
context: "./"
ubuntu-version: '2404'
- build-type: 'cublas'
cuda-major-version: "13"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-13-mlx-vlm'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:24.04"
skip-drivers: 'false'
backend: "mlx-vlm"
dockerfile: "./backend/Dockerfile.python"
context: "./"
ubuntu-version: '2404'
- build-type: 'cublas'
cuda-major-version: "13"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-13-mlx-audio'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:24.04"
skip-drivers: 'false'
backend: "mlx-audio"
dockerfile: "./backend/Dockerfile.python"
context: "./"
ubuntu-version: '2404'
- build-type: 'cublas'
cuda-major-version: "13"
cuda-minor-version: "0"
@@ -992,19 +783,6 @@ jobs:
dockerfile: "./backend/Dockerfile.python"
context: "./"
ubuntu-version: '2404'
- build-type: 'hipblas'
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-rocm-hipblas-ace-step'
runs-on: 'arc-runner-set'
base-image: "rocm/dev-ubuntu-24.04:6.4.4"
skip-drivers: 'false'
backend: "ace-step"
dockerfile: "./backend/Dockerfile.python"
context: "./"
ubuntu-version: '2404'
# ROCm additional backends
- build-type: 'hipblas'
cuda-major-version: ""
@@ -1202,19 +980,6 @@ jobs:
dockerfile: "./backend/Dockerfile.python"
context: "./"
ubuntu-version: '2404'
- build-type: 'intel'
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-intel-ace-step'
runs-on: 'ubuntu-latest'
base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
skip-drivers: 'false'
backend: "ace-step"
dockerfile: "./backend/Dockerfile.python"
context: "./"
ubuntu-version: '2404'
- build-type: 'l4t'
cuda-major-version: "12"
cuda-minor-version: "0"
@@ -1280,45 +1045,6 @@ jobs:
dockerfile: "./backend/Dockerfile.python"
context: "./"
ubuntu-version: '2204'
- build-type: 'l4t'
cuda-major-version: "12"
cuda-minor-version: "0"
platforms: 'linux/arm64'
tag-latest: 'auto'
tag-suffix: '-nvidia-l4t-mlx'
runs-on: 'ubuntu-24.04-arm'
base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
skip-drivers: 'true'
backend: "mlx"
dockerfile: "./backend/Dockerfile.python"
context: "./"
ubuntu-version: '2204'
- build-type: 'l4t'
cuda-major-version: "12"
cuda-minor-version: "0"
platforms: 'linux/arm64'
tag-latest: 'auto'
tag-suffix: '-nvidia-l4t-mlx-vlm'
runs-on: 'ubuntu-24.04-arm'
base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
skip-drivers: 'true'
backend: "mlx-vlm"
dockerfile: "./backend/Dockerfile.python"
context: "./"
ubuntu-version: '2204'
- build-type: 'l4t'
cuda-major-version: "12"
cuda-minor-version: "0"
platforms: 'linux/arm64'
tag-latest: 'auto'
tag-suffix: '-nvidia-l4t-mlx-audio'
runs-on: 'ubuntu-24.04-arm'
base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
skip-drivers: 'true'
backend: "mlx-audio"
dockerfile: "./backend/Dockerfile.python"
context: "./"
ubuntu-version: '2204'
# SYCL additional backends
- build-type: 'intel'
cuda-major-version: ""
@@ -1836,19 +1562,6 @@ jobs:
dockerfile: "./backend/Dockerfile.python"
context: "./"
ubuntu-version: '2404'
- build-type: ''
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-cpu-outetts'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:24.04"
skip-drivers: 'true'
backend: "outetts"
dockerfile: "./backend/Dockerfile.python"
context: "./"
ubuntu-version: '2404'
backend-jobs-darwin:
uses: ./.github/workflows/backend_build_darwin.yml
strategy:
@@ -1857,9 +1570,6 @@ jobs:
- backend: "diffusers"
tag-suffix: "-metal-darwin-arm64-diffusers"
build-type: "mps"
- backend: "ace-step"
tag-suffix: "-metal-darwin-arm64-ace-step"
build-type: "mps"
- backend: "mlx"
tag-suffix: "-metal-darwin-arm64-mlx"
build-type: "mps"
@@ -1880,64 +1590,6 @@ jobs:
tag-suffix: "-metal-darwin-arm64-whisper"
build-type: "metal"
lang: "go"
- backend: "vibevoice"
tag-suffix: "-metal-darwin-arm64-vibevoice"
build-type: "mps"
- backend: "qwen-asr"
tag-suffix: "-metal-darwin-arm64-qwen-asr"
build-type: "mps"
- backend: "qwen-tts"
tag-suffix: "-metal-darwin-arm64-qwen-tts"
build-type: "mps"
- backend: "voxcpm"
tag-suffix: "-metal-darwin-arm64-voxcpm"
build-type: "mps"
- backend: "pocket-tts"
tag-suffix: "-metal-darwin-arm64-pocket-tts"
build-type: "mps"
- backend: "moonshine"
tag-suffix: "-metal-darwin-arm64-moonshine"
build-type: "mps"
- backend: "whisperx"
tag-suffix: "-metal-darwin-arm64-whisperx"
build-type: "mps"
- backend: "rerankers"
tag-suffix: "-metal-darwin-arm64-rerankers"
build-type: "mps"
- backend: "transformers"
tag-suffix: "-metal-darwin-arm64-transformers"
build-type: "mps"
- backend: "kokoro"
tag-suffix: "-metal-darwin-arm64-kokoro"
build-type: "mps"
- backend: "faster-whisper"
tag-suffix: "-metal-darwin-arm64-faster-whisper"
build-type: "mps"
- backend: "coqui"
tag-suffix: "-metal-darwin-arm64-coqui"
build-type: "mps"
- backend: "rfdetr"
tag-suffix: "-metal-darwin-arm64-rfdetr"
build-type: "mps"
- backend: "kitten-tts"
tag-suffix: "-metal-darwin-arm64-kitten-tts"
build-type: "mps"
- backend: "piper"
tag-suffix: "-metal-darwin-arm64-piper"
build-type: "metal"
lang: "go"
- backend: "silero-vad"
tag-suffix: "-metal-darwin-arm64-silero-vad"
build-type: "metal"
lang: "go"
- backend: "local-store"
tag-suffix: "-metal-darwin-arm64-local-store"
build-type: "metal"
lang: "go"
- backend: "huggingface"
tag-suffix: "-metal-darwin-arm64-huggingface"
build-type: "metal"
lang: "go"
with:
backend: ${{ matrix.backend }}
build-type: ${{ matrix.build-type }}

View File

@@ -5,7 +5,6 @@ on:
workflow_dispatch:
jobs:
bump-backends:
if: github.repository == 'mudler/LocalAI'
strategy:
fail-fast: false
matrix:

View File

@@ -5,7 +5,6 @@ on:
workflow_dispatch:
jobs:
bump-docs:
if: github.repository == 'mudler/LocalAI'
strategy:
fail-fast: false
matrix:

View File

@@ -5,7 +5,6 @@ on:
workflow_dispatch:
jobs:
checksum_check:
if: github.repository == 'mudler/LocalAI'
runs-on: ubuntu-latest
steps:
- name: Force Install GIT latest

View File

@@ -9,8 +9,8 @@ permissions:
jobs:
dependabot:
if: github.repository == 'mudler/LocalAI' && github.actor == 'dependabot[bot]'
runs-on: ubuntu-latest
if: ${{ github.actor == 'dependabot[bot]' }}
steps:
- name: Dependabot metadata
id: metadata

View File

@@ -12,7 +12,6 @@ concurrency:
jobs:
build-linux:
if: github.repository == 'mudler/LocalAI'
runs-on: ubuntu-latest
steps:
- name: Clone
@@ -34,7 +33,7 @@ jobs:
run: |
CGO_ENABLED=0 make build
- name: rm
uses: appleboy/ssh-action@v1.2.5
uses: appleboy/ssh-action@v1.2.4
with:
host: ${{ secrets.EXPLORER_SSH_HOST }}
username: ${{ secrets.EXPLORER_SSH_USERNAME }}
@@ -54,7 +53,7 @@ jobs:
rm: true
target: ./local-ai
- name: restarting
uses: appleboy/ssh-action@v1.2.5
uses: appleboy/ssh-action@v1.2.4
with:
host: ${{ secrets.EXPLORER_SSH_HOST }}
username: ${{ secrets.EXPLORER_SSH_USERNAME }}

View File

@@ -27,7 +27,6 @@ on:
type: string
jobs:
gallery-agent:
if: github.repository == 'mudler/LocalAI'
runs-on: ubuntu-latest
steps:
- name: Checkout repository

View File

@@ -13,7 +13,6 @@ concurrency:
jobs:
generate_caches:
if: github.repository == 'mudler/LocalAI'
strategy:
matrix:
include:

View File

@@ -12,7 +12,6 @@ concurrency:
jobs:
generate_caches:
if: github.repository == 'mudler/LocalAI'
strategy:
matrix:
include:

View File

@@ -14,7 +14,6 @@
jobs:
hipblas-jobs:
if: github.repository == 'mudler/LocalAI'
uses: ./.github/workflows/image_build.yml
with:
tag-latest: ${{ matrix.tag-latest }}
@@ -51,7 +50,6 @@
ubuntu-codename: 'noble'
core-image-build:
if: github.repository == 'mudler/LocalAI'
uses: ./.github/workflows/image_build.yml
with:
tag-latest: ${{ matrix.tag-latest }}
@@ -138,7 +136,6 @@
ubuntu-codename: 'noble'
gh-runner:
if: github.repository == 'mudler/LocalAI'
uses: ./.github/workflows/image_build.yml
with:
tag-latest: ${{ matrix.tag-latest }}

View File

@@ -10,8 +10,8 @@ permissions:
actions: write # to dispatch publish workflow
jobs:
dependabot:
if: github.repository == 'mudler/LocalAI' && github.actor == 'localai-bot' && !contains(github.event.pull_request.title, 'chore(model gallery):')
runs-on: ubuntu-latest
if: ${{ github.actor == 'localai-bot' && !contains(github.event.pull_request.title, 'chore(model gallery):') }}
steps:
- name: Checkout repository
uses: actions/checkout@v6

View File

@@ -10,7 +10,7 @@ permissions:
jobs:
notify-discord:
if: github.repository == 'mudler/LocalAI' && (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'area/ai-model'))
if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'area/ai-model')) }}
env:
MODEL_NAME: gemma-3-12b-it-qat
runs-on: ubuntu-latest
@@ -90,7 +90,7 @@ jobs:
connect-timeout-seconds: 180
limit-access-to-actor: true
notify-twitter:
if: github.repository == 'mudler/LocalAI' && (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'area/ai-model'))
if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'area/ai-model')) }}
env:
MODEL_NAME: gemma-3-12b-it-qat
runs-on: ubuntu-latest

View File

@@ -6,7 +6,6 @@ on:
jobs:
notify-discord:
if: github.repository == 'mudler/LocalAI'
runs-on: ubuntu-latest
env:
RELEASE_BODY: ${{ github.event.release.body }}

View File

@@ -8,7 +8,6 @@ on:
jobs:
stale:
if: github.repository == 'mudler/LocalAI'
runs-on: ubuntu-latest
steps:
- uses: actions/stale@997185467fa4f803885201cee163a9f38240193d # v9

View File

@@ -5,7 +5,6 @@ on:
workflow_dispatch:
jobs:
swagger:
if: github.repository == 'mudler/LocalAI'
strategy:
fail-fast: false
runs-on: ubuntu-latest

View File

@@ -1,5 +1,5 @@
# Disable parallel execution for backend builds
.NOTPARALLEL: backends/diffusers backends/llama-cpp backends/outetts backends/piper backends/stablediffusion-ggml backends/whisper backends/faster-whisper backends/silero-vad backends/local-store backends/huggingface backends/rfdetr backends/kitten-tts backends/kokoro backends/chatterbox backends/llama-cpp-darwin backends/neutts build-darwin-python-backend build-darwin-go-backend backends/mlx backends/diffuser-darwin backends/mlx-vlm backends/mlx-audio backends/stablediffusion-ggml-darwin backends/vllm backends/vllm-omni backends/moonshine backends/pocket-tts backends/qwen-tts backends/qwen-asr backends/voxcpm backends/whisperx backends/ace-step
.NOTPARALLEL: backends/diffusers backends/llama-cpp backends/piper backends/stablediffusion-ggml backends/whisper backends/faster-whisper backends/silero-vad backends/local-store backends/huggingface backends/rfdetr backends/kitten-tts backends/kokoro backends/chatterbox backends/llama-cpp-darwin backends/neutts build-darwin-python-backend build-darwin-go-backend backends/mlx backends/diffuser-darwin backends/mlx-vlm backends/mlx-audio backends/stablediffusion-ggml-darwin backends/vllm backends/vllm-omni backends/moonshine backends/pocket-tts backends/qwen-tts backends/qwen-asr backends/voxcpm backends/whisperx
GOCMD=go
GOTEST=$(GOCMD) test
@@ -308,7 +308,6 @@ protogen-go-clean:
prepare-test-extra: protogen-python
$(MAKE) -C backend/python/transformers
$(MAKE) -C backend/python/outetts
$(MAKE) -C backend/python/diffusers
$(MAKE) -C backend/python/chatterbox
$(MAKE) -C backend/python/vllm
@@ -320,11 +319,9 @@ prepare-test-extra: protogen-python
$(MAKE) -C backend/python/qwen-asr
$(MAKE) -C backend/python/voxcpm
$(MAKE) -C backend/python/whisperx
$(MAKE) -C backend/python/ace-step
test-extra: prepare-test-extra
$(MAKE) -C backend/python/transformers test
$(MAKE) -C backend/python/outetts test
$(MAKE) -C backend/python/diffusers test
$(MAKE) -C backend/python/chatterbox test
$(MAKE) -C backend/python/vllm test
@@ -336,7 +333,6 @@ test-extra: prepare-test-extra
$(MAKE) -C backend/python/qwen-asr test
$(MAKE) -C backend/python/voxcpm test
$(MAKE) -C backend/python/whisperx test
$(MAKE) -C backend/python/ace-step test
DOCKER_IMAGE?=local-ai
DOCKER_AIO_IMAGE?=local-ai-aio
@@ -455,7 +451,6 @@ BACKEND_WHISPER = whisper|golang|.|false|true
# Python backends with root context
BACKEND_RERANKERS = rerankers|python|.|false|true
BACKEND_TRANSFORMERS = transformers|python|.|false|true
BACKEND_OUTETTS = outetts|python|.|false|true
BACKEND_FASTER_WHISPER = faster-whisper|python|.|false|true
BACKEND_COQUI = coqui|python|.|false|true
BACKEND_RFDETR = rfdetr|python|.|false|true
@@ -473,7 +468,6 @@ BACKEND_QWEN_TTS = qwen-tts|python|.|false|true
BACKEND_QWEN_ASR = qwen-asr|python|.|false|true
BACKEND_VOXCPM = voxcpm|python|.|false|true
BACKEND_WHISPERX = whisperx|python|.|false|true
BACKEND_ACE_STEP = ace-step|python|.|false|true
# Helper function to build docker image for a backend
# Usage: $(call docker-build-backend,BACKEND_NAME,DOCKERFILE_TYPE,BUILD_CONTEXT,PROGRESS_FLAG,NEEDS_BACKEND_ARG)
@@ -505,7 +499,6 @@ $(eval $(call generate-docker-build-target,$(BACKEND_STABLEDIFFUSION_GGML)))
$(eval $(call generate-docker-build-target,$(BACKEND_WHISPER)))
$(eval $(call generate-docker-build-target,$(BACKEND_RERANKERS)))
$(eval $(call generate-docker-build-target,$(BACKEND_TRANSFORMERS)))
$(eval $(call generate-docker-build-target,$(BACKEND_OUTETTS)))
$(eval $(call generate-docker-build-target,$(BACKEND_FASTER_WHISPER)))
$(eval $(call generate-docker-build-target,$(BACKEND_COQUI)))
$(eval $(call generate-docker-build-target,$(BACKEND_RFDETR)))
@@ -523,13 +516,12 @@ $(eval $(call generate-docker-build-target,$(BACKEND_QWEN_TTS)))
$(eval $(call generate-docker-build-target,$(BACKEND_QWEN_ASR)))
$(eval $(call generate-docker-build-target,$(BACKEND_VOXCPM)))
$(eval $(call generate-docker-build-target,$(BACKEND_WHISPERX)))
$(eval $(call generate-docker-build-target,$(BACKEND_ACE_STEP)))
# Pattern rule for docker-save targets
docker-save-%: backend-images
docker save local-ai-backend:$* -o backend-images/$*.tar
docker-build-backends: docker-build-llama-cpp docker-build-rerankers docker-build-vllm docker-build-vllm-omni docker-build-transformers docker-build-outetts docker-build-diffusers docker-build-kokoro docker-build-faster-whisper docker-build-coqui docker-build-chatterbox docker-build-vibevoice docker-build-moonshine docker-build-pocket-tts docker-build-qwen-tts docker-build-qwen-asr docker-build-voxcpm docker-build-whisperx docker-build-ace-step
docker-build-backends: docker-build-llama-cpp docker-build-rerankers docker-build-vllm docker-build-vllm-omni docker-build-transformers docker-build-diffusers docker-build-kokoro docker-build-faster-whisper docker-build-coqui docker-build-chatterbox docker-build-vibevoice docker-build-moonshine docker-build-pocket-tts docker-build-qwen-tts docker-build-qwen-asr docker-build-voxcpm docker-build-whisperx
########################################################
### Mock Backend for E2E Tests

View File

@@ -203,8 +203,7 @@ local-ai run oci://localai/phi-2:latest
For more information, see [💻 Getting started](https://localai.io/basics/getting_started/index.html), if you are interested in our roadmap items and future enhancements, you can see the [Issues labeled as Roadmap here](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
## 📰 Latest project news
- February 2026: [Realtime API for audio-to-audio with tool calling](https://github.com/mudler/LocalAI/pull/6245), [ACE-Step 1.5 support](https://github.com/mudler/LocalAI/pull/8396)
- January 2026: **LocalAI 3.10.0** - Major release with Anthropic API support, Open Responses API for stateful agents, video & image generation suite (LTX-2), unified GPU backends, tool streaming & XML parsing, system-aware backend gallery, crash fixes for AVX-only CPUs and AMD VRAM reporting, request tracing, and new backends: **Moonshine** (ultra-fast transcription), **Pocket-TTS** (lightweight TTS). Vulkan arm64 builds now available. [Release notes](https://github.com/mudler/LocalAI/releases/tag/v3.10.0).
- December 2025: [Dynamic Memory Resource reclaimer](https://github.com/mudler/LocalAI/pull/7583), [Automatic fitting of models to multiple GPUS(llama.cpp)](https://github.com/mudler/LocalAI/pull/7584), [Added Vibevoice backend](https://github.com/mudler/LocalAI/pull/7494)
- November 2025: Major improvements to the UX. Among these: [Import models via URL](https://github.com/mudler/LocalAI/pull/7245) and [Multiple chats and history](https://github.com/mudler/LocalAI/pull/7325)
- October 2025: 🔌 [Model Context Protocol (MCP)](https://localai.io/docs/features/mcp/) support added for agentic capabilities with external tools
@@ -270,7 +269,6 @@ LocalAI supports a comprehensive range of AI backends with multiple acceleration
|---------|-------------|---------------------|
| **whisper.cpp** | OpenAI Whisper in C/C++ | CUDA 12/13, ROCm, Intel SYCL, Vulkan, CPU |
| **faster-whisper** | Fast Whisper with CTranslate2 | CUDA 12/13, ROCm, Intel, CPU |
| **moonshine** | Ultra-fast transcription engine for low-end devices | CUDA 12/13, Metal, CPU |
| **coqui** | Advanced TTS with 1100+ languages | CUDA 12/13, ROCm, Intel, CPU |
| **kokoro** | Lightweight TTS model | CUDA 12/13, ROCm, Intel, CPU |
| **chatterbox** | Production-grade TTS | CUDA 12/13, CPU |
@@ -281,7 +279,6 @@ LocalAI supports a comprehensive range of AI backends with multiple acceleration
| **vibevoice** | Real-time TTS with voice cloning | CUDA 12/13, ROCm, Intel, CPU |
| **pocket-tts** | Lightweight CPU-based TTS | CUDA 12/13, ROCm, Intel, CPU |
| **qwen-tts** | High-quality TTS with custom voice, voice design, and voice cloning | CUDA 12/13, ROCm, Intel, CPU |
| **ace-step** | Music generation from text descriptions, lyrics, or audio samples | CUDA 12/13, ROCm, Intel, Metal, CPU |
### Image & Video Generation
| Backend | Description | Acceleration Support |
@@ -303,11 +300,11 @@ LocalAI supports a comprehensive range of AI backends with multiple acceleration
|-------------------|-------------------|------------------|
| **NVIDIA CUDA 12** | All CUDA-compatible backends | Nvidia hardware |
| **NVIDIA CUDA 13** | All CUDA-compatible backends | Nvidia hardware |
| **AMD ROCm** | llama.cpp, whisper, vllm, transformers, diffusers, rerankers, coqui, kokoro, neutts, vibevoice, pocket-tts, qwen-tts, ace-step | AMD Graphics |
| **Intel oneAPI** | llama.cpp, whisper, stablediffusion, vllm, transformers, diffusers, rfdetr, rerankers, coqui, kokoro, vibevoice, pocket-tts, qwen-tts, ace-step | Intel Arc, Intel iGPUs |
| **Apple Metal** | llama.cpp, whisper, diffusers, MLX, MLX-VLM, moonshine, ace-step | Apple M1/M2/M3+ |
| **AMD ROCm** | llama.cpp, whisper, vllm, transformers, diffusers, rerankers, coqui, kokoro, neutts, vibevoice, pocket-tts, qwen-tts | AMD Graphics |
| **Intel oneAPI** | llama.cpp, whisper, stablediffusion, vllm, transformers, diffusers, rfdetr, rerankers, coqui, kokoro, vibevoice, pocket-tts, qwen-tts | Intel Arc, Intel iGPUs |
| **Apple Metal** | llama.cpp, whisper, diffusers, MLX, MLX-VLM | Apple M1/M2/M3+ |
| **Vulkan** | llama.cpp, whisper, stablediffusion | Cross-platform GPUs |
| **NVIDIA Jetson (CUDA 12)** | llama.cpp, whisper, stablediffusion, diffusers, rfdetr, ace-step | ARM64 embedded AI (AGX Orin, etc.) |
| **NVIDIA Jetson (CUDA 12)** | llama.cpp, whisper, stablediffusion, diffusers, rfdetr | ARM64 embedded AI (AGX Orin, etc.) |
| **NVIDIA Jetson (CUDA 13)** | llama.cpp, whisper, stablediffusion, diffusers, rfdetr | ARM64 embedded AI (DGX Spark) |
| **CPU Optimized** | All backends | AVX/AVX2/AVX512, quantization support |

View File

@@ -365,14 +365,6 @@ message SoundGenerationRequest {
optional bool sample = 6;
optional string src = 7;
optional int32 src_divisor = 8;
optional bool think = 9;
optional string caption = 10;
optional string lyrics = 11;
optional int32 bpm = 12;
optional string keyscale = 13;
optional string language = 14;
optional string timesignature = 15;
optional bool instrumental = 17;
}
message TokenizationResponse {

View File

@@ -1,5 +1,5 @@
LLAMA_VERSION?=22cae832188a1f08d18bd0a707a4ba5cd03c7349
LLAMA_VERSION?=2634ed207a17db1a54bd8df0555bd8499a6ab691
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
CMAKE_ARGS?=

View File

@@ -6,7 +6,4 @@ huggingface:
package:
bash package.sh
build: huggingface package
clean:
rm -f huggingface
build: huggingface package

View File

@@ -8,5 +8,5 @@ set -e
CURDIR=$(dirname "$(realpath $0)")
mkdir -p $CURDIR/package
cp -avf $CURDIR/huggingface $CURDIR/package/
cp -avrf $CURDIR/huggingface $CURDIR/package/
cp -rfv $CURDIR/run.sh $CURDIR/package/

View File

@@ -6,7 +6,4 @@ local-store:
package:
bash package.sh
build: local-store package
clean:
rm -f local-store
build: local-store package

View File

@@ -8,5 +8,5 @@ set -e
CURDIR=$(dirname "$(realpath $0)")
mkdir -p $CURDIR/package
cp -avf $CURDIR/local-store $CURDIR/package/
cp -avrf $CURDIR/local-store $CURDIR/package/
cp -rfv $CURDIR/run.sh $CURDIR/package/

View File

@@ -34,7 +34,4 @@ piper: sources/go-piper sources/go-piper/libpiper_binding.a espeak-ng-data
package:
bash package.sh
build: piper package
clean:
rm -f piper
build: piper package

View File

@@ -10,8 +10,8 @@ CURDIR=$(dirname "$(realpath $0)")
# Create lib directory
mkdir -p $CURDIR/package/lib
cp -avf $CURDIR/piper $CURDIR/package/
cp -avf $CURDIR/espeak-ng-data $CURDIR/package/
cp -avrf $CURDIR/piper $CURDIR/package/
cp -avrf $CURDIR/espeak-ng-data $CURDIR/package/
cp -rfv $CURDIR/run.sh $CURDIR/package/
cp -rfLv $CURDIR/sources/go-piper/piper-phonemize/pi/lib/* $CURDIR/package/lib/

View File

@@ -44,7 +44,4 @@ silero-vad: backend-assets/lib/libonnxruntime.so.1
package:
bash package.sh
build: silero-vad package
clean:
rm -f silero-vad
build: silero-vad package

View File

@@ -10,8 +10,8 @@ CURDIR=$(dirname "$(realpath $0)")
# Create lib directory
mkdir -p $CURDIR/package/lib
cp -avf $CURDIR/silero-vad $CURDIR/package/
cp -avf $CURDIR/run.sh $CURDIR/package/
cp -avrf $CURDIR/silero-vad $CURDIR/package/
cp -avrf $CURDIR/run.sh $CURDIR/package/
cp -rfLv $CURDIR/backend-assets/lib/* $CURDIR/package/lib/
# Detect architecture and copy appropriate libraries

View File

@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
# whisper.cpp version
WHISPER_REPO?=https://github.com/ggml-org/whisper.cpp
WHISPER_CPP_VERSION?=941bdabbe4561bc6de68981aea01bc5ab05781c5
WHISPER_CPP_VERSION?=aa1bc0d1a6dfd70dbb9f60c11df12441e03a9075
SO_TARGET?=libgowhisper.so
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
@@ -78,7 +78,7 @@ package: whisper
build: package
clean: purge
rm -rf libgowhisper*.so package sources/whisper.cpp whisper
rm -rf libgowhisper*.so sources/whisper.cpp whisper
purge:
rm -rf build*

View File

@@ -105,7 +105,6 @@
intel: "intel-rfdetr"
#amd: "rocm-rfdetr"
nvidia-l4t: "nvidia-l4t-arm64-rfdetr"
metal: "metal-rfdetr"
default: "cpu-rfdetr"
nvidia-cuda-13: "cuda13-rfdetr"
nvidia-cuda-12: "cuda12-rfdetr"
@@ -183,15 +182,6 @@
- text-to-text
- LLM
- MLX
capabilities:
default: "cpu-mlx"
nvidia: "cuda12-mlx"
metal: "metal-mlx"
nvidia-cuda-12: "cuda12-mlx"
nvidia-cuda-13: "cuda13-mlx"
nvidia-l4t: "nvidia-l4t-mlx"
nvidia-l4t-cuda-12: "nvidia-l4t-mlx"
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-mlx"
- &mlx-vlm
name: "mlx-vlm"
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-mlx-vlm"
@@ -209,15 +199,6 @@
- vision-language
- LLM
- MLX
capabilities:
default: "cpu-mlx-vlm"
nvidia: "cuda12-mlx-vlm"
metal: "metal-mlx-vlm"
nvidia-cuda-12: "cuda12-mlx-vlm"
nvidia-cuda-13: "cuda13-mlx-vlm"
nvidia-l4t: "nvidia-l4t-mlx-vlm"
nvidia-l4t-cuda-12: "nvidia-l4t-mlx-vlm"
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-mlx-vlm"
- &mlx-audio
name: "mlx-audio"
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-mlx-audio"
@@ -235,15 +216,6 @@
- text-to-audio
- LLM
- MLX
capabilities:
default: "cpu-mlx-audio"
nvidia: "cuda12-mlx-audio"
metal: "metal-mlx-audio"
nvidia-cuda-12: "cuda12-mlx-audio"
nvidia-cuda-13: "cuda13-mlx-audio"
nvidia-l4t: "nvidia-l4t-mlx-audio"
nvidia-l4t-cuda-12: "nvidia-l4t-mlx-audio"
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-mlx-audio"
- &rerankers
name: "rerankers"
alias: "rerankers"
@@ -251,7 +223,6 @@
nvidia: "cuda12-rerankers"
intel: "intel-rerankers"
amd: "rocm-rerankers"
metal: "metal-rerankers"
- &transformers
name: "transformers"
icon: https://avatars.githubusercontent.com/u/25720743?s=200&v=4
@@ -269,7 +240,6 @@
nvidia: "cuda12-transformers"
intel: "intel-transformers"
amd: "rocm-transformers"
metal: "metal-transformers"
nvidia-cuda-13: "cuda13-transformers"
nvidia-cuda-12: "cuda12-transformers"
- &diffusers
@@ -296,34 +266,6 @@
nvidia-cuda-12: "cuda12-diffusers"
nvidia-l4t-cuda-12: "nvidia-l4t-arm64-diffusers"
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-diffusers"
- &ace-step
name: "ace-step"
description: |
ACE-Step 1.5 is an open-source music generation model. It supports simple mode (natural language description) and advanced mode (caption, lyrics, think, bpm, keyscale, etc.). Uses in-process acestep (LLMHandler for metadata, DiT for audio).
urls:
- https://github.com/ace-step/ACE-Step-1.5
tags:
- music-generation
- sound-generation
alias: "ace-step"
capabilities:
nvidia: "cuda12-ace-step"
intel: "intel-ace-step"
amd: "rocm-ace-step"
metal: "metal-ace-step"
default: "cpu-ace-step"
nvidia-cuda-13: "cuda13-ace-step"
nvidia-cuda-12: "cuda12-ace-step"
- !!merge <<: *ace-step
name: "ace-step-development"
capabilities:
nvidia: "cuda12-ace-step-development"
intel: "intel-ace-step-development"
amd: "rocm-ace-step-development"
metal: "metal-ace-step-development"
default: "cpu-ace-step-development"
nvidia-cuda-13: "cuda13-ace-step-development"
nvidia-cuda-12: "cuda12-ace-step-development"
- &faster-whisper
icon: https://avatars.githubusercontent.com/u/1520500?s=200&v=4
description: |
@@ -340,7 +282,6 @@
nvidia: "cuda12-faster-whisper"
intel: "intel-faster-whisper"
amd: "rocm-faster-whisper"
metal: "metal-faster-whisper"
nvidia-cuda-13: "cuda13-faster-whisper"
nvidia-cuda-12: "cuda12-faster-whisper"
- &moonshine
@@ -358,7 +299,6 @@
alias: "moonshine"
capabilities:
nvidia: "cuda12-moonshine"
metal: "metal-moonshine"
default: "cpu-moonshine"
nvidia-cuda-13: "cuda13-moonshine"
nvidia-cuda-12: "cuda12-moonshine"
@@ -378,7 +318,6 @@
capabilities:
nvidia: "cuda12-whisperx"
amd: "rocm-whisperx"
metal: "metal-whisperx"
default: "cpu-whisperx"
nvidia-cuda-13: "cuda13-whisperx"
nvidia-cuda-12: "cuda12-whisperx"
@@ -401,7 +340,6 @@
intel: "intel-kokoro"
amd: "rocm-kokoro"
nvidia-l4t: "nvidia-l4t-kokoro"
metal: "metal-kokoro"
nvidia-cuda-13: "cuda13-kokoro"
nvidia-cuda-12: "cuda12-kokoro"
nvidia-l4t-cuda-12: "nvidia-l4t-arm64-kokoro"
@@ -426,25 +364,9 @@
nvidia: "cuda12-coqui"
intel: "intel-coqui"
amd: "rocm-coqui"
metal: "metal-coqui"
nvidia-cuda-13: "cuda13-coqui"
nvidia-cuda-12: "cuda12-coqui"
icon: https://avatars.githubusercontent.com/u/1338804?s=200&v=4
- &outetts
urls:
- https://github.com/OuteAI/outetts
description: |
OuteTTS is an open-weight text-to-speech model from OuteAI (OuteAI/OuteTTS-0.3-1B).
Supports custom speaker voices via audio path or default speakers.
tags:
- text-to-speech
- TTS
license: apache-2.0
name: "outetts"
alias: "outetts"
capabilities:
default: "cpu-outetts"
nvidia-cuda-12: "cuda12-outetts"
- &chatterbox
urls:
- https://github.com/resemble-ai/chatterbox
@@ -483,7 +405,6 @@
intel: "intel-vibevoice"
amd: "rocm-vibevoice"
nvidia-l4t: "nvidia-l4t-vibevoice"
metal: "metal-vibevoice"
default: "cpu-vibevoice"
nvidia-cuda-13: "cuda13-vibevoice"
nvidia-cuda-12: "cuda12-vibevoice"
@@ -506,7 +427,6 @@
intel: "intel-qwen-tts"
amd: "rocm-qwen-tts"
nvidia-l4t: "nvidia-l4t-qwen-tts"
metal: "metal-qwen-tts"
default: "cpu-qwen-tts"
nvidia-cuda-13: "cuda13-qwen-tts"
nvidia-cuda-12: "cuda12-qwen-tts"
@@ -529,7 +449,6 @@
intel: "intel-qwen-asr"
amd: "rocm-qwen-asr"
nvidia-l4t: "nvidia-l4t-qwen-asr"
metal: "metal-qwen-asr"
default: "cpu-qwen-asr"
nvidia-cuda-13: "cuda13-qwen-asr"
nvidia-cuda-12: "cuda12-qwen-asr"
@@ -551,7 +470,6 @@
nvidia: "cuda12-voxcpm"
intel: "intel-voxcpm"
amd: "rocm-voxcpm"
metal: "metal-voxcpm"
default: "cpu-voxcpm"
nvidia-cuda-13: "cuda13-voxcpm"
nvidia-cuda-12: "cuda12-voxcpm"
@@ -572,7 +490,6 @@
intel: "intel-pocket-tts"
amd: "rocm-pocket-tts"
nvidia-l4t: "nvidia-l4t-pocket-tts"
metal: "metal-pocket-tts"
default: "cpu-pocket-tts"
nvidia-cuda-13: "cuda13-pocket-tts"
nvidia-cuda-12: "cuda12-pocket-tts"
@@ -733,234 +650,31 @@
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-mlx-audio"
mirrors:
- localai/localai-backends:master-metal-darwin-arm64-mlx-audio
## mlx
- !!merge <<: *mlx
name: "cpu-mlx"
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-mlx"
mirrors:
- localai/localai-backends:latest-cpu-mlx
- !!merge <<: *mlx
name: "cpu-mlx-development"
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-mlx"
mirrors:
- localai/localai-backends:master-cpu-mlx
- !!merge <<: *mlx
name: "cuda12-mlx"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-mlx"
mirrors:
- localai/localai-backends:latest-gpu-nvidia-cuda-12-mlx
- !!merge <<: *mlx
name: "cuda12-mlx-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-mlx"
mirrors:
- localai/localai-backends:master-gpu-nvidia-cuda-12-mlx
- !!merge <<: *mlx
name: "cuda13-mlx"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-13-mlx"
mirrors:
- localai/localai-backends:latest-gpu-nvidia-cuda-13-mlx
- !!merge <<: *mlx
name: "cuda13-mlx-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-mlx"
mirrors:
- localai/localai-backends:master-gpu-nvidia-cuda-13-mlx
- !!merge <<: *mlx
name: "nvidia-l4t-mlx"
uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-mlx"
mirrors:
- localai/localai-backends:latest-nvidia-l4t-mlx
- !!merge <<: *mlx
name: "nvidia-l4t-mlx-development"
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-mlx"
mirrors:
- localai/localai-backends:master-nvidia-l4t-mlx
- !!merge <<: *mlx
name: "cuda13-nvidia-l4t-arm64-mlx"
uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-cuda-13-arm64-mlx"
mirrors:
- localai/localai-backends:latest-nvidia-l4t-cuda-13-arm64-mlx
- !!merge <<: *mlx
name: "cuda13-nvidia-l4t-arm64-mlx-development"
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-cuda-13-arm64-mlx"
mirrors:
- localai/localai-backends:master-nvidia-l4t-cuda-13-arm64-mlx
## mlx-vlm
- !!merge <<: *mlx-vlm
name: "cpu-mlx-vlm"
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-mlx-vlm"
mirrors:
- localai/localai-backends:latest-cpu-mlx-vlm
- !!merge <<: *mlx-vlm
name: "cpu-mlx-vlm-development"
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-mlx-vlm"
mirrors:
- localai/localai-backends:master-cpu-mlx-vlm
- !!merge <<: *mlx-vlm
name: "cuda12-mlx-vlm"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-mlx-vlm"
mirrors:
- localai/localai-backends:latest-gpu-nvidia-cuda-12-mlx-vlm
- !!merge <<: *mlx-vlm
name: "cuda12-mlx-vlm-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-mlx-vlm"
mirrors:
- localai/localai-backends:master-gpu-nvidia-cuda-12-mlx-vlm
- !!merge <<: *mlx-vlm
name: "cuda13-mlx-vlm"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-13-mlx-vlm"
mirrors:
- localai/localai-backends:latest-gpu-nvidia-cuda-13-mlx-vlm
- !!merge <<: *mlx-vlm
name: "cuda13-mlx-vlm-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-mlx-vlm"
mirrors:
- localai/localai-backends:master-gpu-nvidia-cuda-13-mlx-vlm
- !!merge <<: *mlx-vlm
name: "nvidia-l4t-mlx-vlm"
uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-mlx-vlm"
mirrors:
- localai/localai-backends:latest-nvidia-l4t-mlx-vlm
- !!merge <<: *mlx-vlm
name: "nvidia-l4t-mlx-vlm-development"
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-mlx-vlm"
mirrors:
- localai/localai-backends:master-nvidia-l4t-mlx-vlm
- !!merge <<: *mlx-vlm
name: "cuda13-nvidia-l4t-arm64-mlx-vlm"
uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-cuda-13-arm64-mlx-vlm"
mirrors:
- localai/localai-backends:latest-nvidia-l4t-cuda-13-arm64-mlx-vlm
- !!merge <<: *mlx-vlm
name: "cuda13-nvidia-l4t-arm64-mlx-vlm-development"
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-cuda-13-arm64-mlx-vlm"
mirrors:
- localai/localai-backends:master-nvidia-l4t-cuda-13-arm64-mlx-vlm
## mlx-audio
- !!merge <<: *mlx-audio
name: "cpu-mlx-audio"
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-mlx-audio"
mirrors:
- localai/localai-backends:latest-cpu-mlx-audio
- !!merge <<: *mlx-audio
name: "cpu-mlx-audio-development"
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-mlx-audio"
mirrors:
- localai/localai-backends:master-cpu-mlx-audio
- !!merge <<: *mlx-audio
name: "cuda12-mlx-audio"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-mlx-audio"
mirrors:
- localai/localai-backends:latest-gpu-nvidia-cuda-12-mlx-audio
- !!merge <<: *mlx-audio
name: "cuda12-mlx-audio-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-mlx-audio"
mirrors:
- localai/localai-backends:master-gpu-nvidia-cuda-12-mlx-audio
- !!merge <<: *mlx-audio
name: "cuda13-mlx-audio"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-13-mlx-audio"
mirrors:
- localai/localai-backends:latest-gpu-nvidia-cuda-13-mlx-audio
- !!merge <<: *mlx-audio
name: "cuda13-mlx-audio-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-mlx-audio"
mirrors:
- localai/localai-backends:master-gpu-nvidia-cuda-13-mlx-audio
- !!merge <<: *mlx-audio
name: "nvidia-l4t-mlx-audio"
uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-mlx-audio"
mirrors:
- localai/localai-backends:latest-nvidia-l4t-mlx-audio
- !!merge <<: *mlx-audio
name: "nvidia-l4t-mlx-audio-development"
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-mlx-audio"
mirrors:
- localai/localai-backends:master-nvidia-l4t-mlx-audio
- !!merge <<: *mlx-audio
name: "cuda13-nvidia-l4t-arm64-mlx-audio"
uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-cuda-13-arm64-mlx-audio"
mirrors:
- localai/localai-backends:latest-nvidia-l4t-cuda-13-arm64-mlx-audio
- !!merge <<: *mlx-audio
name: "cuda13-nvidia-l4t-arm64-mlx-audio-development"
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-cuda-13-arm64-mlx-audio"
mirrors:
- localai/localai-backends:master-nvidia-l4t-cuda-13-arm64-mlx-audio
- !!merge <<: *kitten-tts
name: "kitten-tts-development"
uri: "quay.io/go-skynet/local-ai-backends:master-kitten-tts"
mirrors:
- localai/localai-backends:master-kitten-tts
- !!merge <<: *kitten-tts
name: "metal-kitten-tts"
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-kitten-tts"
mirrors:
- localai/localai-backends:latest-metal-darwin-arm64-kitten-tts
- !!merge <<: *kitten-tts
name: "metal-kitten-tts-development"
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-kitten-tts"
mirrors:
- localai/localai-backends:master-metal-darwin-arm64-kitten-tts
- !!merge <<: *huggingface
name: "huggingface-development"
uri: "quay.io/go-skynet/local-ai-backends:master-huggingface"
mirrors:
- localai/localai-backends:master-huggingface
- !!merge <<: *huggingface
name: "metal-huggingface"
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-huggingface"
mirrors:
- localai/localai-backends:latest-metal-darwin-arm64-huggingface
- !!merge <<: *huggingface
name: "metal-huggingface-development"
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-huggingface"
mirrors:
- localai/localai-backends:master-metal-darwin-arm64-huggingface
- !!merge <<: *local-store
name: "local-store-development"
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-local-store"
mirrors:
- localai/localai-backends:master-cpu-local-store
- !!merge <<: *local-store
name: "metal-local-store"
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-local-store"
mirrors:
- localai/localai-backends:latest-metal-darwin-arm64-local-store
- !!merge <<: *local-store
name: "metal-local-store-development"
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-local-store"
mirrors:
- localai/localai-backends:master-metal-darwin-arm64-local-store
- !!merge <<: *silero-vad
name: "silero-vad-development"
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-silero-vad"
mirrors:
- localai/localai-backends:master-cpu-silero-vad
- !!merge <<: *silero-vad
name: "metal-silero-vad"
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-silero-vad"
mirrors:
- localai/localai-backends:latest-metal-darwin-arm64-silero-vad
- !!merge <<: *silero-vad
name: "metal-silero-vad-development"
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-silero-vad"
mirrors:
- localai/localai-backends:master-metal-darwin-arm64-silero-vad
- !!merge <<: *piper
name: "piper-development"
uri: "quay.io/go-skynet/local-ai-backends:master-piper"
mirrors:
- localai/localai-backends:master-piper
- !!merge <<: *piper
name: "metal-piper"
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-piper"
mirrors:
- localai/localai-backends:latest-metal-darwin-arm64-piper
- !!merge <<: *piper
name: "metal-piper-development"
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-piper"
mirrors:
- localai/localai-backends:master-metal-darwin-arm64-piper
## llama-cpp
- !!merge <<: *llamacpp
name: "nvidia-l4t-arm64-llama-cpp"
@@ -1334,7 +1048,6 @@
intel: "intel-rfdetr-development"
#amd: "rocm-rfdetr-development"
nvidia-l4t: "nvidia-l4t-arm64-rfdetr-development"
metal: "metal-rfdetr-development"
default: "cpu-rfdetr-development"
nvidia-cuda-13: "cuda13-rfdetr-development"
- !!merge <<: *rfdetr
@@ -1402,16 +1115,6 @@
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-rfdetr"
mirrors:
- localai/localai-backends:master-gpu-nvidia-cuda-13-rfdetr
- !!merge <<: *rfdetr
name: "metal-rfdetr"
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-rfdetr"
mirrors:
- localai/localai-backends:latest-metal-darwin-arm64-rfdetr
- !!merge <<: *rfdetr
name: "metal-rfdetr-development"
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-rfdetr"
mirrors:
- localai/localai-backends:master-metal-darwin-arm64-rfdetr
## Rerankers
- !!merge <<: *rerankers
name: "rerankers-development"
@@ -1419,7 +1122,6 @@
nvidia: "cuda12-rerankers-development"
intel: "intel-rerankers-development"
amd: "rocm-rerankers-development"
metal: "metal-rerankers-development"
nvidia-cuda-13: "cuda13-rerankers-development"
- !!merge <<: *rerankers
name: "cuda12-rerankers"
@@ -1461,16 +1163,6 @@
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-rerankers"
mirrors:
- localai/localai-backends:master-gpu-nvidia-cuda-13-rerankers
- !!merge <<: *rerankers
name: "metal-rerankers"
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-rerankers"
mirrors:
- localai/localai-backends:latest-metal-darwin-arm64-rerankers
- !!merge <<: *rerankers
name: "metal-rerankers-development"
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-rerankers"
mirrors:
- localai/localai-backends:master-metal-darwin-arm64-rerankers
## Transformers
- !!merge <<: *transformers
name: "transformers-development"
@@ -1478,7 +1170,6 @@
nvidia: "cuda12-transformers-development"
intel: "intel-transformers-development"
amd: "rocm-transformers-development"
metal: "metal-transformers-development"
nvidia-cuda-13: "cuda13-transformers-development"
- !!merge <<: *transformers
name: "cuda12-transformers"
@@ -1520,16 +1211,6 @@
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-transformers"
mirrors:
- localai/localai-backends:master-gpu-nvidia-cuda-13-transformers
- !!merge <<: *transformers
name: "metal-transformers"
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-transformers"
mirrors:
- localai/localai-backends:latest-metal-darwin-arm64-transformers
- !!merge <<: *transformers
name: "metal-transformers-development"
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-transformers"
mirrors:
- localai/localai-backends:master-metal-darwin-arm64-transformers
## Diffusers
- !!merge <<: *diffusers
name: "diffusers-development"
@@ -1621,67 +1302,6 @@
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-diffusers"
mirrors:
- localai/localai-backends:master-metal-darwin-arm64-diffusers
## ace-step
- !!merge <<: *ace-step
name: "cpu-ace-step"
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-ace-step"
mirrors:
- localai/localai-backends:latest-cpu-ace-step
- !!merge <<: *ace-step
name: "cpu-ace-step-development"
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-ace-step"
mirrors:
- localai/localai-backends:master-cpu-ace-step
- !!merge <<: *ace-step
name: "cuda12-ace-step"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-ace-step"
mirrors:
- localai/localai-backends:latest-gpu-nvidia-cuda-12-ace-step
- !!merge <<: *ace-step
name: "cuda12-ace-step-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-ace-step"
mirrors:
- localai/localai-backends:master-gpu-nvidia-cuda-12-ace-step
- !!merge <<: *ace-step
name: "cuda13-ace-step"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-13-ace-step"
mirrors:
- localai/localai-backends:latest-gpu-nvidia-cuda-13-ace-step
- !!merge <<: *ace-step
name: "cuda13-ace-step-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-ace-step"
mirrors:
- localai/localai-backends:master-gpu-nvidia-cuda-13-ace-step
- !!merge <<: *ace-step
name: "rocm-ace-step"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-ace-step"
mirrors:
- localai/localai-backends:latest-gpu-rocm-hipblas-ace-step
- !!merge <<: *ace-step
name: "rocm-ace-step-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-ace-step"
mirrors:
- localai/localai-backends:master-gpu-rocm-hipblas-ace-step
- !!merge <<: *ace-step
name: "intel-ace-step"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-ace-step"
mirrors:
- localai/localai-backends:latest-gpu-intel-ace-step
- !!merge <<: *ace-step
name: "intel-ace-step-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-ace-step"
mirrors:
- localai/localai-backends:master-gpu-intel-ace-step
- !!merge <<: *ace-step
name: "metal-ace-step"
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-ace-step"
mirrors:
- localai/localai-backends:latest-metal-darwin-arm64-ace-step
- !!merge <<: *ace-step
name: "metal-ace-step-development"
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-ace-step"
mirrors:
- localai/localai-backends:master-metal-darwin-arm64-ace-step
## kokoro
- !!merge <<: *kokoro
name: "kokoro-development"
@@ -1690,7 +1310,6 @@
intel: "intel-kokoro-development"
amd: "rocm-kokoro-development"
nvidia-l4t: "nvidia-l4t-kokoro-development"
metal: "metal-kokoro-development"
- !!merge <<: *kokoro
name: "cuda12-kokoro-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-kokoro"
@@ -1741,16 +1360,6 @@
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-kokoro"
mirrors:
- localai/localai-backends:master-gpu-nvidia-cuda-13-kokoro
- !!merge <<: *kokoro
name: "metal-kokoro"
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-kokoro"
mirrors:
- localai/localai-backends:latest-metal-darwin-arm64-kokoro
- !!merge <<: *kokoro
name: "metal-kokoro-development"
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-kokoro"
mirrors:
- localai/localai-backends:master-metal-darwin-arm64-kokoro
## faster-whisper
- !!merge <<: *faster-whisper
name: "faster-whisper-development"
@@ -1758,7 +1367,6 @@
nvidia: "cuda12-faster-whisper-development"
intel: "intel-faster-whisper-development"
amd: "rocm-faster-whisper-development"
metal: "metal-faster-whisper-development"
nvidia-cuda-13: "cuda13-faster-whisper-development"
- !!merge <<: *faster-whisper
name: "cuda12-faster-whisper-development"
@@ -1790,16 +1398,6 @@
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-faster-whisper"
mirrors:
- localai/localai-backends:master-gpu-nvidia-cuda-13-faster-whisper
- !!merge <<: *faster-whisper
name: "metal-faster-whisper"
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-faster-whisper"
mirrors:
- localai/localai-backends:latest-metal-darwin-arm64-faster-whisper
- !!merge <<: *faster-whisper
name: "metal-faster-whisper-development"
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-faster-whisper"
mirrors:
- localai/localai-backends:master-metal-darwin-arm64-faster-whisper
## moonshine
- !!merge <<: *moonshine
name: "moonshine-development"
@@ -1838,23 +1436,12 @@
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-moonshine"
mirrors:
- localai/localai-backends:master-gpu-nvidia-cuda-13-moonshine
- !!merge <<: *moonshine
name: "metal-moonshine"
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-moonshine"
mirrors:
- localai/localai-backends:latest-metal-darwin-arm64-moonshine
- !!merge <<: *moonshine
name: "metal-moonshine-development"
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-moonshine"
mirrors:
- localai/localai-backends:master-metal-darwin-arm64-moonshine
## whisperx
- !!merge <<: *whisperx
name: "whisperx-development"
capabilities:
nvidia: "cuda12-whisperx-development"
amd: "rocm-whisperx-development"
metal: "metal-whisperx-development"
default: "cpu-whisperx-development"
nvidia-cuda-13: "cuda13-whisperx-development"
nvidia-cuda-12: "cuda12-whisperx-development"
@@ -1898,16 +1485,6 @@
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-whisperx"
mirrors:
- localai/localai-backends:master-gpu-nvidia-cuda-13-whisperx
- !!merge <<: *whisperx
name: "metal-whisperx"
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-whisperx"
mirrors:
- localai/localai-backends:latest-metal-darwin-arm64-whisperx
- !!merge <<: *whisperx
name: "metal-whisperx-development"
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-whisperx"
mirrors:
- localai/localai-backends:master-metal-darwin-arm64-whisperx
## coqui
- !!merge <<: *coqui
@@ -1916,7 +1493,6 @@
nvidia: "cuda12-coqui-development"
intel: "intel-coqui-development"
amd: "rocm-coqui-development"
metal: "metal-coqui-development"
- !!merge <<: *coqui
name: "cuda12-coqui"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-coqui"
@@ -1947,42 +1523,6 @@
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-coqui"
mirrors:
- localai/localai-backends:latest-gpu-rocm-hipblas-coqui
- !!merge <<: *coqui
name: "metal-coqui"
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-coqui"
mirrors:
- localai/localai-backends:latest-metal-darwin-arm64-coqui
- !!merge <<: *coqui
name: "metal-coqui-development"
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-coqui"
mirrors:
- localai/localai-backends:master-metal-darwin-arm64-coqui
## outetts
- !!merge <<: *outetts
name: "outetts-development"
capabilities:
default: "cpu-outetts-development"
nvidia-cuda-12: "cuda12-outetts-development"
- !!merge <<: *outetts
name: "cpu-outetts"
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-outetts"
mirrors:
- localai/localai-backends:latest-cpu-outetts
- !!merge <<: *outetts
name: "cpu-outetts-development"
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-outetts"
mirrors:
- localai/localai-backends:master-cpu-outetts
- !!merge <<: *outetts
name: "cuda12-outetts"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-outetts"
mirrors:
- localai/localai-backends:latest-gpu-nvidia-cuda-12-outetts
- !!merge <<: *outetts
name: "cuda12-outetts-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-outetts"
mirrors:
- localai/localai-backends:master-gpu-nvidia-cuda-12-outetts
## chatterbox
- !!merge <<: *chatterbox
name: "chatterbox-development"
@@ -2063,7 +1603,6 @@
intel: "intel-vibevoice-development"
amd: "rocm-vibevoice-development"
nvidia-l4t: "nvidia-l4t-vibevoice-development"
metal: "metal-vibevoice-development"
default: "cpu-vibevoice-development"
nvidia-cuda-13: "cuda13-vibevoice-development"
nvidia-cuda-12: "cuda12-vibevoice-development"
@@ -2139,16 +1678,6 @@
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-cuda-13-arm64-vibevoice"
mirrors:
- localai/localai-backends:master-nvidia-l4t-cuda-13-arm64-vibevoice
- !!merge <<: *vibevoice
name: "metal-vibevoice"
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-vibevoice"
mirrors:
- localai/localai-backends:latest-metal-darwin-arm64-vibevoice
- !!merge <<: *vibevoice
name: "metal-vibevoice-development"
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-vibevoice"
mirrors:
- localai/localai-backends:master-metal-darwin-arm64-vibevoice
## qwen-tts
- !!merge <<: *qwen-tts
name: "qwen-tts-development"
@@ -2157,7 +1686,6 @@
intel: "intel-qwen-tts-development"
amd: "rocm-qwen-tts-development"
nvidia-l4t: "nvidia-l4t-qwen-tts-development"
metal: "metal-qwen-tts-development"
default: "cpu-qwen-tts-development"
nvidia-cuda-13: "cuda13-qwen-tts-development"
nvidia-cuda-12: "cuda12-qwen-tts-development"
@@ -2233,16 +1761,6 @@
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-cuda-13-arm64-qwen-tts"
mirrors:
- localai/localai-backends:master-nvidia-l4t-cuda-13-arm64-qwen-tts
- !!merge <<: *qwen-tts
name: "metal-qwen-tts"
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-qwen-tts"
mirrors:
- localai/localai-backends:latest-metal-darwin-arm64-qwen-tts
- !!merge <<: *qwen-tts
name: "metal-qwen-tts-development"
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-qwen-tts"
mirrors:
- localai/localai-backends:master-metal-darwin-arm64-qwen-tts
## qwen-asr
- !!merge <<: *qwen-asr
name: "qwen-asr-development"
@@ -2251,7 +1769,6 @@
intel: "intel-qwen-asr-development"
amd: "rocm-qwen-asr-development"
nvidia-l4t: "nvidia-l4t-qwen-asr-development"
metal: "metal-qwen-asr-development"
default: "cpu-qwen-asr-development"
nvidia-cuda-13: "cuda13-qwen-asr-development"
nvidia-cuda-12: "cuda12-qwen-asr-development"
@@ -2327,16 +1844,6 @@
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-cuda-13-arm64-qwen-asr"
mirrors:
- localai/localai-backends:master-nvidia-l4t-cuda-13-arm64-qwen-asr
- !!merge <<: *qwen-asr
name: "metal-qwen-asr"
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-qwen-asr"
mirrors:
- localai/localai-backends:latest-metal-darwin-arm64-qwen-asr
- !!merge <<: *qwen-asr
name: "metal-qwen-asr-development"
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-qwen-asr"
mirrors:
- localai/localai-backends:master-metal-darwin-arm64-qwen-asr
## voxcpm
- !!merge <<: *voxcpm
name: "voxcpm-development"
@@ -2344,7 +1851,6 @@
nvidia: "cuda12-voxcpm-development"
intel: "intel-voxcpm-development"
amd: "rocm-voxcpm-development"
metal: "metal-voxcpm-development"
default: "cpu-voxcpm-development"
nvidia-cuda-13: "cuda13-voxcpm-development"
nvidia-cuda-12: "cuda12-voxcpm-development"
@@ -2398,16 +1904,6 @@
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-voxcpm"
mirrors:
- localai/localai-backends:master-gpu-rocm-hipblas-voxcpm
- !!merge <<: *voxcpm
name: "metal-voxcpm"
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-voxcpm"
mirrors:
- localai/localai-backends:latest-metal-darwin-arm64-voxcpm
- !!merge <<: *voxcpm
name: "metal-voxcpm-development"
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-voxcpm"
mirrors:
- localai/localai-backends:master-metal-darwin-arm64-voxcpm
## pocket-tts
- !!merge <<: *pocket-tts
name: "pocket-tts-development"
@@ -2416,7 +1912,6 @@
intel: "intel-pocket-tts-development"
amd: "rocm-pocket-tts-development"
nvidia-l4t: "nvidia-l4t-pocket-tts-development"
metal: "metal-pocket-tts-development"
default: "cpu-pocket-tts-development"
nvidia-cuda-13: "cuda13-pocket-tts-development"
nvidia-cuda-12: "cuda12-pocket-tts-development"
@@ -2492,13 +1987,3 @@
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-cuda-13-arm64-pocket-tts"
mirrors:
- localai/localai-backends:master-nvidia-l4t-cuda-13-arm64-pocket-tts
- !!merge <<: *pocket-tts
name: "metal-pocket-tts"
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-pocket-tts"
mirrors:
- localai/localai-backends:latest-metal-darwin-arm64-pocket-tts
- !!merge <<: *pocket-tts
name: "metal-pocket-tts-development"
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-pocket-tts"
mirrors:
- localai/localai-backends:master-metal-darwin-arm64-pocket-tts

View File

@@ -1,16 +0,0 @@
.DEFAULT_GOAL := install
.PHONY: install
install:
bash install.sh
.PHONY: protogen-clean
protogen-clean:
$(RM) backend_pb2_grpc.py backend_pb2.py
.PHONY: clean
clean: protogen-clean
rm -rf venv __pycache__
test: install
bash test.sh

View File

@@ -1,472 +0,0 @@
#!/usr/bin/env python3
"""
LocalAI ACE-Step Backend
gRPC backend for ACE-Step 1.5 music generation. Aligns with upstream acestep API:
- LoadModel: initializes AceStepHandler (DiT) and LLMHandler, parses Options.
- SoundGeneration: uses create_sample (simple mode), format_sample (optional), then
generate_music from acestep.inference. Writes first output to request.dst.
- Fail hard: no fallback WAV on error; exceptions propagate to gRPC.
"""
from concurrent import futures
import argparse
import shutil
import signal
import sys
import os
import tempfile
import backend_pb2
import backend_pb2_grpc
import grpc
from acestep.inference import (
GenerationParams,
GenerationConfig,
generate_music,
create_sample,
format_sample,
)
from acestep.handler import AceStepHandler
from acestep.llm_inference import LLMHandler
from acestep.model_downloader import ensure_lm_model
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
MAX_WORKERS = int(os.environ.get("PYTHON_GRPC_MAX_WORKERS", "1"))
# Model name -> HuggingFace/ModelScope repo (from upstream api_server.py)
MODEL_REPO_MAPPING = {
"acestep-v15-turbo": "ACE-Step/Ace-Step1.5",
"acestep-5Hz-lm-0.6B": "ACE-Step/Ace-Step1.5",
"acestep-5Hz-lm-1.7B": "ACE-Step/Ace-Step1.5",
"vae": "ACE-Step/Ace-Step1.5",
"Qwen3-Embedding-0.6B": "ACE-Step/Ace-Step1.5",
"acestep-v15-base": "ACE-Step/acestep-v15-base",
"acestep-v15-sft": "ACE-Step/acestep-v15-sft",
"acestep-v15-turbo-shift3": "ACE-Step/acestep-v15-turbo-shift3",
"acestep-5Hz-lm-4B": "ACE-Step/acestep-5Hz-lm-4B",
}
DEFAULT_REPO_ID = "ACE-Step/Ace-Step1.5"
def _is_float(s):
try:
float(s)
return True
except (ValueError, TypeError):
return False
def _is_int(s):
try:
int(s)
return True
except (ValueError, TypeError):
return False
def _parse_timesteps(s):
if s is None or (isinstance(s, str) and not s.strip()):
return None
if isinstance(s, (list, tuple)):
return [float(x) for x in s]
try:
return [float(x.strip()) for x in str(s).split(",") if x.strip()]
except (ValueError, TypeError):
return None
def _parse_options(opts_list):
"""Parse repeated 'key:value' options into a dict. Coerce numeric and bool."""
out = {}
for opt in opts_list or []:
if ":" not in opt:
continue
key, value = opt.split(":", 1)
key = key.strip()
value = value.strip()
if _is_int(value):
out[key] = int(value)
elif _is_float(value):
out[key] = float(value)
elif value.lower() in ("true", "false"):
out[key] = value.lower() == "true"
else:
out[key] = value
return out
def _generate_audio_sync(servicer, payload, dst_path):
"""
Run full ACE-Step pipeline using acestep.inference:
- If sample_mode/sample_query: create_sample() for caption/lyrics/metadata.
- If use_format and caption/lyrics: format_sample().
- Build GenerationParams and GenerationConfig, then generate_music().
Writes the first generated audio to dst_path. Raises on failure.
"""
opts = servicer.options
dit_handler = servicer.dit_handler
llm_handler = servicer.llm_handler
for key, value in opts.items():
if key not in payload:
payload[key] = value
def _opt(name, default):
return opts.get(name, default)
lm_temperature = _opt("temperature", 0.85)
lm_cfg_scale = _opt("lm_cfg_scale", _opt("cfg_scale", 2.0))
lm_top_k = opts.get("top_k")
lm_top_p = _opt("top_p", 0.9)
if lm_top_p is not None and lm_top_p >= 1.0:
lm_top_p = None
inference_steps = _opt("inference_steps", 8)
guidance_scale = _opt("guidance_scale", 7.0)
batch_size = max(1, int(_opt("batch_size", 1)))
use_simple = bool(payload.get("sample_query") or payload.get("text"))
sample_mode = use_simple and (payload.get("thinking") or payload.get("sample_mode"))
sample_query = (payload.get("sample_query") or payload.get("text") or "").strip()
use_format = bool(payload.get("use_format"))
caption = (payload.get("prompt") or payload.get("caption") or "").strip()
lyrics = (payload.get("lyrics") or "").strip()
vocal_language = (payload.get("vocal_language") or "en").strip()
instrumental = bool(payload.get("instrumental"))
bpm = payload.get("bpm")
key_scale = (payload.get("key_scale") or "").strip()
time_signature = (payload.get("time_signature") or "").strip()
audio_duration = payload.get("audio_duration")
if audio_duration is not None:
try:
audio_duration = float(audio_duration)
except (TypeError, ValueError):
audio_duration = None
if sample_mode and llm_handler and getattr(llm_handler, "llm_initialized", False):
parsed_language = None
if sample_query:
for hint in ("english", "en", "chinese", "zh", "japanese", "ja"):
if hint in sample_query.lower():
parsed_language = "en" if hint == "english" or hint == "en" else hint
break
vocal_lang = vocal_language if vocal_language and vocal_language != "unknown" else parsed_language
sample_result = create_sample(
llm_handler=llm_handler,
query=sample_query or "NO USER INPUT",
instrumental=instrumental,
vocal_language=vocal_lang,
temperature=lm_temperature,
top_k=lm_top_k,
top_p=lm_top_p,
use_constrained_decoding=True,
)
if not sample_result.success:
raise RuntimeError(f"create_sample failed: {sample_result.error or sample_result.status_message}")
caption = sample_result.caption or caption
lyrics = sample_result.lyrics or lyrics
bpm = sample_result.bpm
key_scale = sample_result.keyscale or key_scale
time_signature = sample_result.timesignature or time_signature
if sample_result.duration is not None:
audio_duration = sample_result.duration
if getattr(sample_result, "language", None):
vocal_language = sample_result.language
if use_format and (caption or lyrics) and llm_handler and getattr(llm_handler, "llm_initialized", False):
user_metadata = {}
if bpm is not None:
user_metadata["bpm"] = bpm
if audio_duration is not None and float(audio_duration) > 0:
user_metadata["duration"] = int(audio_duration)
if key_scale:
user_metadata["keyscale"] = key_scale
if time_signature:
user_metadata["timesignature"] = time_signature
if vocal_language and vocal_language != "unknown":
user_metadata["language"] = vocal_language
format_result = format_sample(
llm_handler=llm_handler,
caption=caption,
lyrics=lyrics,
user_metadata=user_metadata if user_metadata else None,
temperature=lm_temperature,
top_k=lm_top_k,
top_p=lm_top_p,
use_constrained_decoding=True,
)
if format_result.success:
caption = format_result.caption or caption
lyrics = format_result.lyrics or lyrics
if format_result.duration is not None:
audio_duration = format_result.duration
if format_result.bpm is not None:
bpm = format_result.bpm
if format_result.keyscale:
key_scale = format_result.keyscale
if format_result.timesignature:
time_signature = format_result.timesignature
if getattr(format_result, "language", None):
vocal_language = format_result.language
thinking = bool(payload.get("thinking"))
use_cot_metas = not sample_mode
params = GenerationParams(
task_type=payload.get("task_type", "text2music"),
instruction=payload.get("instruction", "Fill the audio semantic mask based on the given conditions:"),
reference_audio=payload.get("reference_audio_path"),
src_audio=payload.get("src_audio_path"),
audio_codes=payload.get("audio_code_string", ""),
caption=caption,
lyrics=lyrics,
instrumental=instrumental or (not lyrics or str(lyrics).strip().lower() in ("[inst]", "[instrumental]")),
vocal_language=vocal_language or "unknown",
bpm=bpm,
keyscale=key_scale,
timesignature=time_signature,
duration=float(audio_duration) if audio_duration and float(audio_duration) > 0 else -1.0,
inference_steps=inference_steps,
seed=int(payload.get("seed", -1)),
guidance_scale=guidance_scale,
use_adg=bool(payload.get("use_adg")),
cfg_interval_start=float(payload.get("cfg_interval_start", 0.0)),
cfg_interval_end=float(payload.get("cfg_interval_end", 1.0)),
shift=float(payload.get("shift", 1.0)),
infer_method=(payload.get("infer_method") or "ode").strip(),
timesteps=_parse_timesteps(payload.get("timesteps")),
repainting_start=float(payload.get("repainting_start", 0.0)),
repainting_end=float(payload.get("repainting_end", -1)) if payload.get("repainting_end") is not None else -1,
audio_cover_strength=float(payload.get("audio_cover_strength", 1.0)),
thinking=thinking,
lm_temperature=lm_temperature,
lm_cfg_scale=lm_cfg_scale,
lm_top_k=lm_top_k or 0,
lm_top_p=lm_top_p if lm_top_p is not None and lm_top_p < 1.0 else 0.9,
lm_negative_prompt=payload.get("lm_negative_prompt", "NO USER INPUT"),
use_cot_metas=use_cot_metas,
use_cot_caption=bool(payload.get("use_cot_caption", True)),
use_cot_language=bool(payload.get("use_cot_language", True)),
use_constrained_decoding=True,
)
config = GenerationConfig(
batch_size=batch_size,
allow_lm_batch=bool(payload.get("allow_lm_batch", False)),
use_random_seed=bool(payload.get("use_random_seed", True)),
seeds=payload.get("seeds"),
lm_batch_chunk_size=max(1, int(payload.get("lm_batch_chunk_size", 8))),
constrained_decoding_debug=bool(payload.get("constrained_decoding_debug")),
audio_format=(payload.get("audio_format") or "flac").strip() or "flac",
)
save_dir = tempfile.mkdtemp(prefix="ace_step_")
try:
result = generate_music(
dit_handler=dit_handler,
llm_handler=llm_handler if (llm_handler and getattr(llm_handler, "llm_initialized", False)) else None,
params=params,
config=config,
save_dir=save_dir,
progress=None,
)
if not result.success:
raise RuntimeError(result.error or result.status_message or "generate_music failed")
audios = result.audios or []
if not audios:
raise RuntimeError("generate_music returned no audio")
first_path = audios[0].get("path") or ""
if not first_path or not os.path.isfile(first_path):
raise RuntimeError("first generated audio path missing or not a file")
shutil.copy2(first_path, dst_path)
finally:
try:
shutil.rmtree(save_dir, ignore_errors=True)
except Exception:
pass
class BackendServicer(backend_pb2_grpc.BackendServicer):
def __init__(self):
self.model_path = None
self.model_dir = None
self.checkpoint_dir = None
self.project_root = None
self.options = {}
self.dit_handler = None
self.llm_handler = None
def Health(self, request, context):
return backend_pb2.Reply(message=b"OK")
def LoadModel(self, request, context):
try:
self.options = _parse_options(list(getattr(request, "Options", []) or []))
model_path = getattr(request, "ModelPath", None) or ""
model_name = (request.Model or "").strip()
model_file = (getattr(request, "ModelFile", None) or "").strip()
# Model dir: where we store checkpoints (always under LocalAI models path, never backend dir)
if model_path and model_name:
model_dir = os.path.join(model_path, model_name)
elif model_file:
model_dir = model_file
else:
model_dir = os.path.abspath(model_name or ".")
self.model_dir = model_dir
self.checkpoint_dir = os.path.join(model_dir, "checkpoints")
self.project_root = model_dir
self.model_path = os.path.join(self.checkpoint_dir, model_name or os.path.basename(model_dir.rstrip("/\\")))
config_path = model_name or os.path.basename(model_dir.rstrip("/\\"))
os.makedirs(self.checkpoint_dir, exist_ok=True)
self.dit_handler = AceStepHandler()
# Patch handler so it uses our model dir instead of site-packages/checkpoints
self.dit_handler._get_project_root = lambda: self.project_root
device = self.options.get("device", "auto")
use_flash = self.options.get("use_flash_attention", True)
if isinstance(use_flash, str):
use_flash = str(use_flash).lower() in ("1", "true", "yes")
offload = self.options.get("offload_to_cpu", False)
if isinstance(offload, str):
offload = str(offload).lower() in ("1", "true", "yes")
status_msg, ok = self.dit_handler.initialize_service(
project_root=self.project_root,
config_path=config_path,
device=device,
use_flash_attention=use_flash,
compile_model=False,
offload_to_cpu=offload,
offload_dit_to_cpu=bool(self.options.get("offload_dit_to_cpu", False)),
)
if not ok:
return backend_pb2.Result(success=False, message=f"DiT init failed: {status_msg}")
self.llm_handler = None
if self.options.get("init_lm", True):
lm_model = self.options.get("lm_model_path", "acestep-5Hz-lm-0.6B")
# Ensure LM model is downloaded before initializing
try:
from pathlib import Path
lm_success, lm_msg = ensure_lm_model(
model_name=lm_model,
checkpoints_dir=Path(self.checkpoint_dir),
prefer_source=None, # Auto-detect HuggingFace vs ModelScope
)
if not lm_success:
print(f"[ace-step] Warning: LM model download failed: {lm_msg}", file=sys.stderr)
# Continue anyway - LLM initialization will fail gracefully
else:
print(f"[ace-step] LM model ready: {lm_msg}", file=sys.stderr)
except Exception as e:
print(f"[ace-step] Warning: LM model download check failed: {e}", file=sys.stderr)
# Continue anyway - LLM initialization will fail gracefully
self.llm_handler = LLMHandler()
lm_backend = (self.options.get("lm_backend") or "vllm").strip().lower()
if lm_backend not in ("vllm", "pt"):
lm_backend = "vllm"
lm_status, lm_ok = self.llm_handler.initialize(
checkpoint_dir=self.checkpoint_dir,
lm_model_path=lm_model,
backend=lm_backend,
device=device,
offload_to_cpu=offload,
dtype=getattr(self.dit_handler, "dtype", None),
)
if not lm_ok:
self.llm_handler = None
print(f"[ace-step] LM init failed (optional): {lm_status}", file=sys.stderr)
print(f"[ace-step] LoadModel: model={self.model_path}, options={list(self.options.keys())}", file=sys.stderr)
return backend_pb2.Result(success=True, message="Model loaded successfully")
except Exception as err:
return backend_pb2.Result(success=False, message=f"LoadModel error: {err}")
def SoundGeneration(self, request, context):
if not request.dst:
return backend_pb2.Result(success=False, message="request.dst is required")
use_simple = bool(request.text)
if use_simple:
payload = {
"sample_query": request.text or "",
"sample_mode": True,
"thinking": True,
"vocal_language": request.language or request.GetLanguage() or "en",
"instrumental": request.instrumental if request.HasField("instrumental") else False,
}
else:
caption = request.caption or request.GetCaption() or request.text
payload = {
"prompt": caption,
"lyrics": request.lyrics or request.lyrics or "",
"thinking": request.think if request.HasField("think") else False,
"vocal_language": request.language or request.GetLanguage() or "en",
}
if request.HasField("bpm"):
payload["bpm"] = request.bpm
if request.HasField("keyscale") and request.keyscale:
payload["key_scale"] = request.keyscale
if request.HasField("timesignature") and request.timesignature:
payload["time_signature"] = request.timesignature
if request.HasField("duration") and request.duration:
payload["audio_duration"] = int(request.duration) if request.duration else None
if request.src:
payload["src_audio_path"] = request.src
_generate_audio_sync(self, payload, request.dst)
return backend_pb2.Result(success=True, message="Sound generated successfully")
def TTS(self, request, context):
if not request.dst:
return backend_pb2.Result(success=False, message="request.dst is required")
payload = {
"sample_query": request.text,
"sample_mode": True,
"thinking": False,
"vocal_language": (request.language if request.language else "") or "en",
"instrumental": False,
}
_generate_audio_sync(self, payload, request.dst)
return backend_pb2.Result(success=True, message="TTS (music fallback) generated successfully")
def serve(address):
server = grpc.server(
futures.ThreadPoolExecutor(max_workers=MAX_WORKERS),
options=[
("grpc.max_message_length", 50 * 1024 * 1024),
("grpc.max_send_message_length", 50 * 1024 * 1024),
("grpc.max_receive_message_length", 50 * 1024 * 1024),
],
)
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
server.add_insecure_port(address)
server.start()
print(f"[ace-step] Server listening on {address}", file=sys.stderr)
def shutdown(sig, frame):
server.stop(0)
sys.exit(0)
signal.signal(signal.SIGINT, shutdown)
signal.signal(signal.SIGTERM, shutdown)
try:
while True:
import time
time.sleep(_ONE_DAY_IN_SECONDS)
except KeyboardInterrupt:
server.stop(0)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--addr", default="localhost:50051", help="Listen address")
args = parser.parse_args()
serve(args.addr)

View File

@@ -1,26 +0,0 @@
#!/bin/bash
set -e
backend_dir=$(dirname $0)
if [ -d $backend_dir/common ]; then
source $backend_dir/common/libbackend.sh
else
source $backend_dir/../common/libbackend.sh
fi
PYTHON_VERSION="3.11"
PYTHON_PATCH="14"
PY_STANDALONE_TAG="20260203"
installRequirements
if [ ! -d ACE-Step-1.5 ]; then
git clone https://github.com/ace-step/ACE-Step-1.5
cd ACE-Step-1.5/
if [ "x${USE_PIP}" == "xtrue" ]; then
pip install ${EXTRA_PIP_INSTALL_FLAGS:-} --no-deps .
else
uv pip install ${EXTRA_PIP_INSTALL_FLAGS:-} --no-deps .
fi
fi

View File

@@ -1,22 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/cpu
torch
torchaudio
torchvision
# Core dependencies
transformers>=4.51.0,<4.58.0
diffusers
gradio
matplotlib>=3.7.5
scipy>=1.10.1
soundfile>=0.13.1
loguru>=0.7.3
einops>=0.8.1
accelerate>=1.12.0
fastapi>=0.110.0
uvicorn[standard]>=0.27.0
numba>=0.63.1
vector-quantize-pytorch>=1.27.15
torchcodec>=0.9.1
torchao
modelscope

View File

@@ -1,22 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/cu128
torch
torchaudio
torchvision
# Core dependencies
transformers>=4.51.0,<4.58.0
diffusers
gradio>=6.5.1
matplotlib>=3.7.5
scipy>=1.10.1
soundfile>=0.13.1
loguru>=0.7.3
einops>=0.8.1
accelerate>=1.12.0
fastapi>=0.110.0
uvicorn[standard]>=0.27.0
numba>=0.63.1
vector-quantize-pytorch>=1.27.15
torchcodec>=0.9.1
torchao
modelscope

View File

@@ -1,22 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/cu130
torch
torchaudio
torchvision
# Core dependencies
transformers>=4.51.0,<4.58.0
diffusers
gradio>=6.5.1
matplotlib>=3.7.5
scipy>=1.10.1
soundfile>=0.13.1
loguru>=0.7.3
einops>=0.8.1
accelerate>=1.12.0
fastapi>=0.110.0
uvicorn[standard]>=0.27.0
numba>=0.63.1
vector-quantize-pytorch>=1.27.15
torchcodec>=0.9.1
torchao
modelscope

View File

@@ -1,22 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/rocm6.4
torch==2.8.0+rocm6.4
torchaudio
torchvision
# Core dependencies
transformers>=4.51.0,<4.58.0
diffusers
gradio>=6.5.1
matplotlib>=3.7.5
scipy>=1.10.1
soundfile>=0.13.1
loguru>=0.7.3
einops>=0.8.1
accelerate>=1.12.0
fastapi>=0.110.0
uvicorn[standard]>=0.27.0
numba>=0.63.1
vector-quantize-pytorch>=1.27.15
torchcodec>=0.9.1
torchao
modelscope

View File

@@ -1,26 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/xpu
torch
torchaudio
torchvision
# Core dependencies
transformers>=4.51.0,<4.58.0
diffusers
gradio
matplotlib>=3.7.5
scipy>=1.10.1
soundfile>=0.13.1
loguru>=0.7.3
einops>=0.8.1
accelerate>=1.12.0
fastapi>=0.110.0
uvicorn[standard]>=0.27.0
numba>=0.63.1
vector-quantize-pytorch>=1.27.15
torchcodec>=0.9.1
torchao
modelscope
# LoRA Training dependencies (optional)
peft>=0.7.0
lightning>=2.0.0

View File

@@ -1,21 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/cu130
torch
torchaudio
torchvision
# Core dependencies
transformers>=4.51.0,<4.58.0
diffusers
gradio>=6.5.1
matplotlib>=3.7.5
scipy>=1.10.1
soundfile>=0.13.1
loguru>=0.7.3
einops>=0.8.1
accelerate>=1.12.0
fastapi>=0.110.0
uvicorn[standard]>=0.27.0
numba>=0.63.1
vector-quantize-pytorch>=1.27.15
torchcodec>=0.9.1
torchao
modelscope

View File

@@ -1,25 +0,0 @@
torch
torchaudio
torchvision
# Core dependencies
transformers>=4.51.0,<4.58.0
diffusers
gradio
matplotlib>=3.7.5
scipy>=1.10.1
soundfile>=0.13.1
loguru>=0.7.3
einops>=0.8.1
accelerate>=1.12.0
fastapi>=0.110.0
uvicorn[standard]>=0.27.0
numba>=0.63.1
vector-quantize-pytorch>=1.27.15
torchcodec>=0.9.1
torchao
modelscope
# LoRA Training dependencies (optional)
peft>=0.7.0
lightning>=2.0.0

View File

@@ -1,4 +0,0 @@
setuptools
grpcio==1.76.0
protobuf
certifi

View File

@@ -1,9 +0,0 @@
#!/bin/bash
backend_dir=$(dirname $0)
if [ -d $backend_dir/common ]; then
source $backend_dir/common/libbackend.sh
else
source $backend_dir/../common/libbackend.sh
fi
startBackend $@

View File

@@ -1,53 +0,0 @@
"""
Tests for the ACE-Step gRPC backend.
"""
import os
import tempfile
import unittest
import backend_pb2
import backend_pb2_grpc
import grpc
class TestACEStepBackend(unittest.TestCase):
"""Test Health, LoadModel, and SoundGeneration (minimal; no real model required)."""
@classmethod
def setUpClass(cls):
port = os.environ.get("BACKEND_PORT", "50051")
cls.channel = grpc.insecure_channel(f"localhost:{port}")
cls.stub = backend_pb2_grpc.BackendStub(cls.channel)
@classmethod
def tearDownClass(cls):
cls.channel.close()
def test_health(self):
response = self.stub.Health(backend_pb2.HealthMessage())
self.assertEqual(response.message, b"OK")
def test_load_model(self):
response = self.stub.LoadModel(backend_pb2.ModelOptions(Model="ace-step-test"))
self.assertTrue(response.success, response.message)
def test_sound_generation_minimal(self):
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
dst = f.name
try:
req = backend_pb2.SoundGenerationRequest(
text="upbeat pop song",
model="ace-step-test",
dst=dst,
)
response = self.stub.SoundGeneration(req)
self.assertTrue(response.success, response.message)
self.assertTrue(os.path.exists(dst), f"Output file not created: {dst}")
self.assertGreater(os.path.getsize(dst), 0)
finally:
if os.path.exists(dst):
os.unlink(dst)
if __name__ == "__main__":
unittest.main()

View File

@@ -1,19 +0,0 @@
#!/bin/bash
set -e
backend_dir=$(dirname $0)
if [ -d $backend_dir/common ]; then
source $backend_dir/common/libbackend.sh
else
source $backend_dir/../common/libbackend.sh
fi
# Start backend in background (use env to avoid port conflict in parallel tests)
export PYTHONUNBUFFERED=1
BACKEND_PORT=${BACKEND_PORT:-50051}
python backend.py --addr "localhost:${BACKEND_PORT}" &
BACKEND_PID=$!
trap "kill $BACKEND_PID 2>/dev/null || true" EXIT
sleep 3
export BACKEND_PORT
runUnittests

View File

@@ -1,7 +0,0 @@
torch
torchaudio
accelerate
numpy>=1.24.0,<1.26.0
transformers
# https://github.com/mudler/LocalAI/pull/6240#issuecomment-3329518289
chatterbox-tts@git+https://git@github.com/mudler/chatterbox.git@faster

View File

@@ -1,4 +0,0 @@
torch==2.7.1
transformers==4.48.3
accelerate
coqui-tts

View File

@@ -1,8 +0,0 @@
torch==2.7.1
faster-whisper
opencv-python
accelerate
compel
peft
sentencepiece
optimum-quanto

View File

@@ -1,5 +0,0 @@
grpcio==1.71.0
protobuf
certifi
packaging==24.1
https://github.com/KittenML/KittenTTS/releases/download/0.1/kittentts-0.1.0-py3-none-any.whl

View File

@@ -1,5 +0,0 @@
torch==2.7.1
transformers
accelerate
kokoro
soundfile

View File

@@ -1,2 +0,0 @@
git+https://github.com/Blaizzy/mlx-audio
mlx[cpu]

View File

@@ -1,2 +0,0 @@
git+https://github.com/Blaizzy/mlx-audio
mlx[cuda12]

View File

@@ -1,2 +0,0 @@
git+https://github.com/Blaizzy/mlx-audio
mlx[cuda13]

View File

@@ -1,2 +0,0 @@
git+https://github.com/Blaizzy/mlx-audio
mlx[cuda12]

View File

@@ -1,2 +0,0 @@
git+https://github.com/Blaizzy/mlx-audio
mlx[cuda13]

View File

@@ -1,2 +0,0 @@
git+https://github.com/Blaizzy/mlx-vlm
mlx[cpu]

View File

@@ -1,2 +0,0 @@
git+https://github.com/Blaizzy/mlx-vlm
mlx[cuda12]

View File

@@ -1,2 +0,0 @@
git+https://github.com/Blaizzy/mlx-vlm
mlx[cuda13]

View File

@@ -1,2 +0,0 @@
git+https://github.com/Blaizzy/mlx-vlm
mlx[cuda12]

View File

@@ -1,2 +0,0 @@
git+https://github.com/Blaizzy/mlx-vlm
mlx[cuda13]

View File

@@ -1,2 +0,0 @@
mlx-lm
mlx[cpu]

View File

@@ -1,2 +0,0 @@
mlx-lm
mlx[cuda12]

View File

@@ -1,2 +0,0 @@
mlx-lm
mlx[cuda13]

View File

@@ -1,2 +0,0 @@
mlx-lm
mlx[cuda12]

View File

@@ -1,2 +0,0 @@
mlx-lm
mlx[cuda13]

View File

@@ -1,4 +0,0 @@
grpcio==1.71.0
protobuf
grpcio-tools
useful-moonshine-onnx@git+https://git@github.com/moonshine-ai/moonshine.git#subdirectory=moonshine-onnx

View File

@@ -1,23 +0,0 @@
.PHONY: outetts
outetts:
bash install.sh
.PHONY: run
run: outetts
@echo "Running outetts..."
bash run.sh
@echo "outetts run."
.PHONY: test
test: outetts
@echo "Testing outetts..."
bash test.sh
@echo "outetts tested."
.PHONY: protogen-clean
protogen-clean:
$(RM) backend_pb2_grpc.py backend_pb2.py
.PHONY: clean
clean: protogen-clean
rm -rf venv __pycache__

View File

@@ -1,138 +0,0 @@
#!/usr/bin/env python3
"""
gRPC server for OuteTTS (OuteAI TTS) models.
"""
from concurrent import futures
import argparse
import signal
import sys
import os
import asyncio
import backend_pb2
import backend_pb2_grpc
import grpc
import outetts
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
class BackendServicer(backend_pb2_grpc.BackendServicer):
def Health(self, request, context):
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
def LoadModel(self, request, context):
model_name = request.Model
if os.path.exists(request.ModelFile):
model_name = request.ModelFile
self.options = {}
for opt in request.Options:
if ":" not in opt:
continue
key, value = opt.split(":", 1)
try:
if "." in value:
value = float(value)
else:
value = int(value)
except ValueError:
pass
self.options[key] = value
MODELNAME = "OuteAI/OuteTTS-0.3-1B"
TOKENIZER = "OuteAI/OuteTTS-0.3-1B"
VERSION = "0.3"
SPEAKER = "en_male_1"
for opt in request.Options:
if opt.startswith("tokenizer:"):
TOKENIZER = opt.split(":")[1]
break
if opt.startswith("version:"):
VERSION = opt.split(":")[1]
break
if opt.startswith("speaker:"):
SPEAKER = opt.split(":")[1]
break
if model_name != "":
MODELNAME = model_name
try:
model_config = outetts.HFModelConfig_v2(
model_path=MODELNAME,
tokenizer_path=TOKENIZER
)
self.interface = outetts.InterfaceHF(model_version=VERSION, cfg=model_config)
self.interface.print_default_speakers()
if request.AudioPath:
if os.path.isabs(request.AudioPath):
self.AudioPath = request.AudioPath
else:
self.AudioPath = os.path.join(request.ModelPath, request.AudioPath)
self.speaker = self.interface.create_speaker(audio_path=self.AudioPath)
else:
self.speaker = self.interface.load_default_speaker(name=SPEAKER)
if request.ContextSize > 0:
self.max_tokens = request.ContextSize
else:
self.max_tokens = self.options.get("max_new_tokens", 512)
except Exception as err:
print("Error:", err, file=sys.stderr)
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
return backend_pb2.Result(message="Model loaded successfully", success=True)
def TTS(self, request, context):
try:
text = request.text if request.text else "Speech synthesis is the artificial production of human speech."
print("[OuteTTS] generating TTS", file=sys.stderr)
gen_cfg = outetts.GenerationConfig(
text=text,
temperature=self.options.get("temperature", 0.1),
repetition_penalty=self.options.get("repetition_penalty", 1.1),
max_length=self.max_tokens,
speaker=self.speaker,
)
output = self.interface.generate(config=gen_cfg)
print("[OuteTTS] Generated TTS", file=sys.stderr)
output.save(request.dst)
print("[OuteTTS] TTS done", file=sys.stderr)
except Exception as err:
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
return backend_pb2.Result(success=True)
async def serve(address):
server = grpc.aio.server(
migration_thread_pool=futures.ThreadPoolExecutor(max_workers=MAX_WORKERS),
options=[
('grpc.max_message_length', 50 * 1024 * 1024),
('grpc.max_send_message_length', 50 * 1024 * 1024),
('grpc.max_receive_message_length', 50 * 1024 * 1024),
])
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
server.add_insecure_port(address)
loop = asyncio.get_event_loop()
for sig in (signal.SIGINT, signal.SIGTERM):
loop.add_signal_handler(
sig, lambda: asyncio.ensure_future(server.stop(5))
)
await server.start()
print("Server started. Listening on: " + address, file=sys.stderr)
await server.wait_for_termination()
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Run the OuteTTS gRPC server.")
parser.add_argument("--addr", default="localhost:50051", help="The address to bind the server to.")
args = parser.parse_args()
asyncio.run(serve(args.addr))

View File

@@ -1,11 +0,0 @@
#!/bin/bash
set -e
backend_dir=$(dirname $0)
if [ -d $backend_dir/common ]; then
source $backend_dir/common/libbackend.sh
else
source $backend_dir/../common/libbackend.sh
fi
installRequirements

View File

@@ -1,7 +0,0 @@
torch==2.7.1
llvmlite==0.43.0
numba==0.60.0
accelerate
bitsandbytes
outetts
protobuf==6.33.5

View File

@@ -1,7 +0,0 @@
torch==2.7.1
accelerate
llvmlite==0.43.0
numba==0.60.0
bitsandbytes
protobuf==6.33.5
outetts

View File

@@ -1,7 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/cu130
torch==2.9.0
llvmlite==0.43.0
numba==0.60.0
bitsandbytes
outetts
protobuf==6.33.5

View File

@@ -1,8 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/rocm6.4
torch==2.8.0+rocm6.4
accelerate
llvmlite==0.43.0
numba==0.60.0
bitsandbytes
outetts
protobuf==6.33.5

View File

@@ -1,8 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/xpu
torch
optimum[openvino]
llvmlite==0.43.0
numba==0.60.0
bitsandbytes
outetts
protobuf==6.33.5

View File

@@ -1,6 +0,0 @@
grpcio==1.76.0
protobuf==6.33.5
certifi
setuptools
scipy==1.15.1
numpy>=2.0.0

View File

@@ -1,9 +0,0 @@
#!/bin/bash
backend_dir=$(dirname $0)
if [ -d $backend_dir/common ]; then
source $backend_dir/common/libbackend.sh
else
source $backend_dir/../common/libbackend.sh
fi
startBackend $@

View File

@@ -1,35 +0,0 @@
"""
Test script for the OuteTTS gRPC service.
"""
import unittest
import subprocess
import time
import backend_pb2
import backend_pb2_grpc
import grpc
class TestBackendServicer(unittest.TestCase):
def setUp(self):
self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"])
time.sleep(5)
def tearDown(self):
self.service.terminate()
self.service.wait()
def test_health(self):
try:
with grpc.insecure_channel("localhost:50051") as channel:
stub = backend_pb2_grpc.BackendStub(channel)
response = stub.Health(backend_pb2.HealthMessage())
self.assertEqual(response.message, b'OK')
except Exception as err:
self.fail(f"Health check failed: {err}")
finally:
self.tearDown()
if __name__ == "__main__":
unittest.main()

View File

@@ -1,11 +0,0 @@
#!/bin/bash
set -e
backend_dir=$(dirname $0)
if [ -d $backend_dir/common ]; then
source $backend_dir/common/libbackend.sh
else
source $backend_dir/../common/libbackend.sh
fi
runUnittests

View File

@@ -1,4 +1,4 @@
torch
torchaudio
torch==2.7.1
torchaudio==0.22.1
qwen-tts
sox
sox

View File

@@ -3,5 +3,4 @@ protobuf
certifi
packaging==24.1
soundfile
setuptools
six
setuptools

View File

@@ -28,7 +28,7 @@ class TestBackendServicer(unittest.TestCase):
stderr=subprocess.PIPE,
text=True
)
time.sleep(30)
time.sleep(5)
def tearDown(self) -> None:
"""

View File

@@ -1,4 +0,0 @@
torch==2.7.1
transformers
accelerate
rerankers[transformers]

View File

@@ -1,7 +0,0 @@
torch==2.7.1
rfdetr
opencv-python
accelerate
peft
inference
optimum-quanto

View File

@@ -24,6 +24,7 @@ XPU=os.environ.get("XPU", "0") == "1"
from transformers import AutoTokenizer, AutoModel, set_seed, TextIteratorStreamer, StoppingCriteriaList, StopStringCriteria, MambaConfig, MambaForCausalLM
from transformers import AutoProcessor, MusicgenForConditionalGeneration, DiaForConditionalGeneration
from scipy.io import wavfile
import outetts
from sentence_transformers import SentenceTransformer
@@ -88,6 +89,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
self.CUDA = torch.cuda.is_available()
self.OV=False
self.OuteTTS=False
self.DiaTTS=False
self.SentenceTransformer = False
@@ -237,6 +239,45 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
self.processor = self.processor.to("cuda")
print("DiaForConditionalGeneration loaded", file=sys.stderr)
self.DiaTTS = True
elif request.Type == "OuteTTS":
autoTokenizer = False
options = request.Options
MODELNAME = "OuteAI/OuteTTS-0.3-1B"
TOKENIZER = "OuteAI/OuteTTS-0.3-1B"
VERSION = "0.3"
SPEAKER = "en_male_1"
for opt in options:
if opt.startswith("tokenizer:"):
TOKENIZER = opt.split(":")[1]
break
if opt.startswith("version:"):
VERSION = opt.split(":")[1]
break
if opt.startswith("speaker:"):
SPEAKER = opt.split(":")[1]
break
if model_name != "":
MODELNAME = model_name
# Configure the model
model_config = outetts.HFModelConfig_v2(
model_path=MODELNAME,
tokenizer_path=TOKENIZER
)
# Initialize the interface
self.interface = outetts.InterfaceHF(model_version=VERSION, cfg=model_config)
self.OuteTTS = True
self.interface.print_default_speakers()
if request.AudioPath:
if os.path.isabs(request.AudioPath):
self.AudioPath = request.AudioPath
else:
self.AudioPath = os.path.join(request.ModelPath, request.AudioPath)
self.speaker = self.interface.create_speaker(audio_path=self.AudioPath)
else:
self.speaker = self.interface.load_default_speaker(name=SPEAKER)
elif request.Type == "SentenceTransformer":
autoTokenizer = False
self.model = SentenceTransformer(model_name, trust_remote_code=request.TrustRemoteCode)
@@ -547,8 +588,30 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
return backend_pb2.Result(success=True)
def CallOuteTTS(self, request, context):
try:
print("[OuteTTS] generating TTS", file=sys.stderr)
gen_cfg = outetts.GenerationConfig(
text="Speech synthesis is the artificial production of human speech.",
temperature=self.options.get("temperature", 0.1),
repetition_penalty=self.options.get("repetition_penalty", 1.1),
max_length=self.max_tokens,
speaker=self.speaker,
# voice_characteristics="upbeat enthusiasm, friendliness, clarity, professionalism, and trustworthiness"
)
output = self.interface.generate(config=gen_cfg)
print("[OuteTTS] Generated TTS", file=sys.stderr)
output.save(request.dst)
print("[OuteTTS] TTS done", file=sys.stderr)
except Exception as err:
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
return backend_pb2.Result(success=True)
# The TTS endpoint is older, and provides fewer features, but exists for compatibility reasons
def TTS(self, request, context):
if self.OuteTTS:
return self.CallOuteTTS(request, context)
if self.DiaTTS:
print("DiaTTS", file=sys.stderr)
return self.CallDiaTTS(request, context)

View File

@@ -4,5 +4,6 @@ numba==0.60.0
accelerate
transformers
bitsandbytes
sentence-transformers==5.2.2
protobuf==6.33.5
outetts
sentence-transformers==5.2.0
protobuf==6.33.4

View File

@@ -4,5 +4,6 @@ llvmlite==0.43.0
numba==0.60.0
transformers
bitsandbytes
sentence-transformers==5.2.2
protobuf==6.33.5
outetts
sentence-transformers==5.2.0
protobuf==6.33.4

View File

@@ -4,5 +4,6 @@ llvmlite==0.43.0
numba==0.60.0
transformers
bitsandbytes
sentence-transformers==5.2.2
protobuf==6.33.5
outetts
sentence-transformers==5.2.0
protobuf==6.33.4

View File

@@ -5,5 +5,7 @@ transformers
llvmlite==0.43.0
numba==0.60.0
bitsandbytes
sentence-transformers==5.2.2
protobuf==6.33.5
outetts
bitsandbytes
sentence-transformers==5.2.0
protobuf==6.33.4

View File

@@ -5,5 +5,6 @@ llvmlite==0.43.0
numba==0.60.0
transformers
bitsandbytes
sentence-transformers==5.2.2
protobuf==6.33.5
outetts
sentence-transformers==5.2.0
protobuf==6.33.4

View File

@@ -1,8 +0,0 @@
torch==2.7.1
llvmlite==0.43.0
numba==0.60.0
accelerate
transformers
bitsandbytes
sentence-transformers==5.2.2
protobuf==6.33.5

View File

@@ -1,5 +1,5 @@
grpcio==1.76.0
protobuf==6.33.5
protobuf==6.33.4
certifi
setuptools
scipy==1.15.1

View File

@@ -21,9 +21,9 @@ if [ -n "$VOXCPM_PATH" ] && [ -f "$VOXCPM_PATH/modules/minicpm4/model.py" ]; the
echo "Applying patch to voxcpm at $VOXCPM_PATH/modules/minicpm4/model.py"
# Replace .contiguous() with .unsqueeze(0) for the three lines in the attention forward_step method
# This fixes the dimension error in scaled_dot_product_attention
# Use temp file for in-place edit so it works on both BSD sed (macOS) and GNU sed (Linux)
PATCH_FILE="$VOXCPM_PATH/modules/minicpm4/model.py"
sed 's/query_states = query_states\.contiguous()/query_states = query_states.unsqueeze(0)/g; s/key_cache = key_cache\.contiguous()/key_cache = key_cache.unsqueeze(0)/g; s/value_cache = value_cache\.contiguous()/value_cache = value_cache.unsqueeze(0)/g' "$PATCH_FILE" > "${PATCH_FILE}.tmp" && mv "${PATCH_FILE}.tmp" "$PATCH_FILE"
sed -i 's/query_states = query_states\.contiguous()/query_states = query_states.unsqueeze(0)/g' "$VOXCPM_PATH/modules/minicpm4/model.py"
sed -i 's/key_cache = key_cache\.contiguous()/key_cache = key_cache.unsqueeze(0)/g' "$VOXCPM_PATH/modules/minicpm4/model.py"
sed -i 's/value_cache = value_cache\.contiguous()/value_cache = value_cache.unsqueeze(0)/g' "$VOXCPM_PATH/modules/minicpm4/model.py"
echo "Patch applied successfully"
else
echo "Warning: Could not find voxcpm installation to apply patch (path: ${VOXCPM_PATH:-not found})"

View File

@@ -8,8 +8,6 @@ else
source $backend_dir/../common/libbackend.sh
fi
if [ "x${BUILD_PROFILE}" != "xmetal" ] && [ "x${BUILD_PROFILE}" != "xmps" ]; then
EXTRA_PIP_INSTALL_FLAGS+=" --index-strategy unsafe-best-match"
fi
EXTRA_PIP_INSTALL_FLAGS+=" --index-strategy unsafe-best-match"
installRequirements

View File

@@ -1,2 +0,0 @@
torch
whisperx @ git+https://github.com/m-bain/whisperX.git

View File

@@ -19,14 +19,6 @@ func SoundGeneration(
doSample *bool,
sourceFile *string,
sourceDivisor *int32,
think *bool,
caption string,
lyrics string,
bpm *int32,
keyscale string,
language string,
timesignature string,
instrumental *bool,
loader *model.ModelLoader,
appConfig *config.ApplicationConfig,
modelConfig config.ModelConfig,
@@ -53,11 +45,8 @@ func SoundGeneration(
fileName := utils.GenerateUniqueFileName(audioDir, "sound_generation", ".wav")
filePath := filepath.Join(audioDir, fileName)
if filePath, err = filepath.Abs(filePath); err != nil {
return "", nil, fmt.Errorf("failed resolving sound generation path: %w", err)
}
req := &proto.SoundGenerationRequest{
res, err := soundGenModel.SoundGeneration(context.Background(), &proto.SoundGenerationRequest{
Text: text,
Model: modelConfig.Model,
Dst: filePath,
@@ -66,38 +55,12 @@ func SoundGeneration(
Temperature: temperature,
Src: sourceFile,
SrcDivisor: sourceDivisor,
}
if think != nil {
req.Think = think
}
if caption != "" {
req.Caption = &caption
}
if lyrics != "" {
req.Lyrics = &lyrics
}
if bpm != nil {
req.Bpm = bpm
}
if keyscale != "" {
req.Keyscale = &keyscale
}
if language != "" {
req.Language = &language
}
if timesignature != "" {
req.Timesignature = &timesignature
}
if instrumental != nil {
req.Instrumental = instrumental
}
})
res, err := soundGenModel.SoundGeneration(context.Background(), req)
if err != nil {
return "", nil, err
}
if res != nil && !res.Success {
// return RPC error if any
if !res.Success {
return "", nil, fmt.Errorf("error during sound generation: %s", res.Message)
}
return filePath, res, nil
return filePath, res, err
}

View File

@@ -100,9 +100,7 @@ func (t *SoundGenerationCMD) Run(ctx *cliContext.Context) error {
filePath, _, err := backend.SoundGeneration(text,
parseToFloat32Ptr(t.Duration), parseToFloat32Ptr(t.Temperature), &t.DoSample,
inputFile, parseToInt32Ptr(t.InputFileSampleDivisor),
nil, "", "", nil, "", "", "", nil,
ml, opts, options)
inputFile, parseToInt32Ptr(t.InputFileSampleDivisor), ml, opts, options)
if err != nil {
return err

View File

@@ -671,8 +671,7 @@ func (c *ModelConfig) GuessUsecases(u ModelConfigUsecase) bool {
}
if (u & FLAG_SOUND_GENERATION) == FLAG_SOUND_GENERATION {
soundGenBackends := []string{"transformers-musicgen", "ace-step", "mock-backend"}
if !slices.Contains(soundGenBackends, c.Backend) {
if c.Backend != "transformers-musicgen" {
return false
}
}

View File

@@ -843,7 +843,7 @@ parameters:
Expect(err).ToNot(HaveOccurred(), fmt.Sprint(resp))
Expect(resp.StatusCode).To(Equal(200), fmt.Sprint(string(dat)))
Expect(resp.Header.Get("Content-Type")).To(Or(Equal("audio/x-wav"), Equal("audio/wav"), Equal("audio/vnd.wave")))
Expect(resp.Header.Get("Content-Type")).To(Or(Equal("audio/x-wav"), Equal("audio/vnd.wave")))
})
It("installs and is capable to generate images", Label("stablediffusion"), func() {
if runtime.GOOS != "linux" {

View File

@@ -8,7 +8,6 @@ import (
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/http/middleware"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/pkg/audio"
"github.com/mudler/LocalAI/pkg/model"
"github.com/mudler/xlog"
)
@@ -33,30 +32,12 @@ func SoundGenerationEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader
xlog.Debug("Sound Generation Request about to be sent to backend", "modelFile", "modelFile", "backend", cfg.Backend)
language := input.Language
if language == "" {
language = input.VocalLanguage
}
var bpm *int32
if input.BPM != nil {
b := int32(*input.BPM)
bpm = &b
}
filePath, _, err := backend.SoundGeneration(
input.Text, input.Duration, input.Temperature, input.DoSample,
nil, nil,
input.Think, input.Caption, input.Lyrics, bpm, input.Keyscale,
language, input.Timesignature,
input.Instrumental,
ml, appConfig, *cfg)
// TODO: Support uploading files?
filePath, _, err := backend.SoundGeneration(input.Text, input.Duration, input.Temperature, input.DoSample, nil, nil, ml, appConfig, *cfg)
if err != nil {
return err
}
filePath, contentType := audio.NormalizeAudioFile(filePath)
if contentType != "" {
c.Response().Header().Set("Content-Type", contentType)
}
return c.Attachment(filePath, filepath.Base(filePath))
}
}

View File

@@ -8,7 +8,6 @@ import (
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/http/middleware"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/pkg/audio"
"github.com/mudler/LocalAI/pkg/model"
"github.com/mudler/xlog"
)
@@ -40,10 +39,6 @@ func TTSEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig
if err != nil {
return err
}
filePath, contentType := audio.NormalizeAudioFile(filePath)
if contentType != "" {
c.Response().Header().Set("Content-Type", contentType)
}
return c.Attachment(filePath, filepath.Base(filePath))
}
}

View File

@@ -7,11 +7,12 @@ import (
"github.com/mudler/LocalAI/core/backend"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/http/middleware"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/pkg/audio"
"github.com/mudler/LocalAI/pkg/model"
"github.com/mudler/LocalAI/pkg/utils"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/xlog"
"github.com/mudler/LocalAI/pkg/utils"
)
// TTSEndpoint is the OpenAI Speech API endpoint https://platform.openai.com/docs/api-reference/audio/createSpeech
@@ -85,10 +86,6 @@ func TTSEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig
return err
}
filePath, contentType := audio.NormalizeAudioFile(filePath)
if contentType != "" {
c.Response().Header().Set("Content-Type", contentType)
}
return c.Attachment(filePath, filepath.Base(filePath))
}
}

View File

@@ -28,7 +28,7 @@ type wrappedModel struct {
TTSConfig *config.ModelConfig
TranscriptionConfig *config.ModelConfig
LLMConfig *config.ModelConfig
VADConfig *config.ModelConfig
VADConfig *config.ModelConfig
appConfig *config.ApplicationConfig
modelLoader *model.ModelLoader
@@ -114,35 +114,6 @@ func (m *wrappedModel) Predict(ctx context.Context, messages schema.Messages, im
})
}
}
// Add noAction function before templating so it's included in the prompt
// Allow the user to set custom actions via config file
noActionName := "answer"
noActionDescription := "use this action to answer without performing any action"
if m.LLMConfig.FunctionsConfig.NoActionFunctionName != "" {
noActionName = m.LLMConfig.FunctionsConfig.NoActionFunctionName
}
if m.LLMConfig.FunctionsConfig.NoActionDescriptionName != "" {
noActionDescription = m.LLMConfig.FunctionsConfig.NoActionDescriptionName
}
noActionGrammar := functions.Function{
Name: noActionName,
Description: noActionDescription,
Parameters: map[string]interface{}{
"properties": map[string]interface{}{
"message": map[string]interface{}{
"type": "string",
"description": "The message to reply the user with",
},
},
},
}
if !m.LLMConfig.FunctionsConfig.DisableNoAction {
funcs = append(funcs, noActionGrammar)
}
}
predInput = m.evaluator.TemplateMessages(input, input.Messages, m.LLMConfig, funcs, len(funcs) > 0)
@@ -153,29 +124,38 @@ func (m *wrappedModel) Predict(ctx context.Context, messages schema.Messages, im
}
}
// Handle tool_choice parameter similar to the chat endpoint
if toolChoice != nil {
if toolChoice.Mode != "" {
// String values: "auto", "required", "none"
switch toolChoice.Mode {
case types.ToolChoiceModeRequired:
m.LLMConfig.SetFunctionCallString("required")
case types.ToolChoiceModeNone:
// Don't use tools
m.LLMConfig.SetFunctionCallString("none")
case types.ToolChoiceModeAuto:
// Default behavior - let model decide
}
} else if toolChoice.Function != nil {
// Specific function specified
m.LLMConfig.SetFunctionCallString(toolChoice.Function.Name)
}
}
// Generate grammar for function calling if tools are provided and grammar generation is enabled
shouldUseFn := len(tools) > 0 && m.LLMConfig.ShouldUseFunctions()
if !m.LLMConfig.FunctionsConfig.GrammarConfig.NoGrammar && shouldUseFn {
// Allow the user to set custom actions via config file
noActionName := "answer"
noActionDescription := "use this action to answer without performing any action"
if m.LLMConfig.FunctionsConfig.NoActionFunctionName != "" {
noActionName = m.LLMConfig.FunctionsConfig.NoActionFunctionName
}
if m.LLMConfig.FunctionsConfig.NoActionDescriptionName != "" {
noActionDescription = m.LLMConfig.FunctionsConfig.NoActionDescriptionName
}
noActionGrammar := functions.Function{
Name: noActionName,
Description: noActionDescription,
Parameters: map[string]interface{}{
"properties": map[string]interface{}{
"message": map[string]interface{}{
"type": "string",
"description": "The message to reply the user with",
},
},
},
}
if !m.LLMConfig.FunctionsConfig.DisableNoAction {
funcs = append(funcs, noActionGrammar)
}
// Force picking one of the functions by the request
if m.LLMConfig.FunctionToCall() != "" {
funcs = functions.Functions(funcs).Select(m.LLMConfig.FunctionToCall())
@@ -204,7 +184,7 @@ func (m *wrappedModel) Predict(ctx context.Context, messages schema.Messages, im
toolChoiceJSON = string(b)
}
return backend.ModelInference(ctx, predInput, messages, images, videos, audios, m.modelLoader, m.LLMConfig, m.confLoader, m.appConfig, tokenCallback, toolsJSON, toolChoiceJSON, logprobs, topLogprobs, logitBias)
return backend.ModelInference(ctx, predInput, messages, images, videos, audios, m.modelLoader, m.LLMConfig, m.confLoader, m.appConfig, tokenCallback, toolsJSON, toolChoiceJSON, logprobs, topLogprobs, logitBias, )
}
func (m *wrappedModel) TTS(ctx context.Context, text, voice, language string) (string, *proto.Result, error) {
@@ -238,11 +218,11 @@ func newTranscriptionOnlyModel(pipeline *config.Pipeline, cl *config.ModelConfig
return &transcriptOnlyModel{
TranscriptionConfig: cfgSST,
VADConfig: cfgVAD,
VADConfig: cfgVAD,
confLoader: cl,
confLoader: cl,
modelLoader: ml,
appConfig: appConfig,
appConfig: appConfig,
}, cfgSST, nil
}
@@ -317,11 +297,11 @@ func newModel(pipeline *config.Pipeline, cl *config.ModelConfigLoader, ml *model
TTSConfig: cfgTTS,
TranscriptionConfig: cfgSST,
LLMConfig: cfgLLM,
VADConfig: cfgVAD,
VADConfig: cfgVAD,
confLoader: cl,
confLoader: cl,
modelLoader: ml,
appConfig: appConfig,
evaluator: evaluator,
appConfig: appConfig,
evaluator: evaluator,
}, nil
}

Some files were not shown because too many files have changed in this diff Show More