Compare commits

..

1 Commits

Author SHA1 Message Date
Ettore Di Giacinto
9c40d9bbed feat(diffusers): add builds for nvidia-l4t
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-08-08 22:50:40 +02:00
275 changed files with 2712 additions and 13819 deletions

View File

@@ -6,10 +6,6 @@ models
backends
examples/chatbot-ui/models
backend/go/image/stablediffusion-ggml/build/
backend/go/*/build
backend/go/*/.cache
backend/go/*/sources
backend/go/*/package
examples/rwkv/models
examples/**/models
Dockerfile*

View File

@@ -89,8 +89,8 @@ jobs:
context: "./backend"
- build-type: 'l4t'
cuda-major-version: "12"
cuda-minor-version: "8"
platforms: 'linux/arm64'
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-l4t-diffusers'
runs-on: 'ubuntu-24.04-arm'
@@ -99,30 +99,6 @@ jobs:
backend: "diffusers"
dockerfile: "./backend/Dockerfile.python"
context: "./backend"
- build-type: ''
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-cpu-diffusers'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
skip-drivers: 'true'
backend: "diffusers"
dockerfile: "./backend/Dockerfile.python"
context: "./backend"
- build-type: ''
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-cpu-chatterbox'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
skip-drivers: 'true'
backend: "chatterbox"
dockerfile: "./backend/Dockerfile.python"
context: "./backend"
# CUDA 11 additional backends
- build-type: 'cublas'
cuda-major-version: "11"
@@ -187,7 +163,7 @@ jobs:
# CUDA 12 builds
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "8"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12-rerankers'
@@ -199,7 +175,7 @@ jobs:
context: "./backend"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "8"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12-llama-cpp'
@@ -211,11 +187,11 @@ jobs:
context: "./"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "8"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12-vllm'
runs-on: 'arc-runner-set'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
skip-drivers: 'false'
backend: "vllm"
@@ -223,7 +199,7 @@ jobs:
context: "./backend"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "8"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12-transformers'
@@ -235,20 +211,20 @@ jobs:
context: "./backend"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "8"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12-diffusers'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
skip-drivers: 'false'
backend: "diffusers"
backend: "diffusers"
dockerfile: "./backend/Dockerfile.python"
context: "./backend"
# CUDA 12 additional backends
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "8"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12-kokoro'
@@ -260,7 +236,7 @@ jobs:
context: "./backend"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "8"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12-faster-whisper'
@@ -272,7 +248,7 @@ jobs:
context: "./backend"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "8"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12-coqui'
@@ -284,7 +260,7 @@ jobs:
context: "./backend"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "8"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12-bark'
@@ -296,7 +272,7 @@ jobs:
context: "./backend"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "8"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12-chatterbox'
@@ -314,7 +290,7 @@ jobs:
tag-latest: 'auto'
tag-suffix: '-gpu-rocm-hipblas-rerankers'
runs-on: 'ubuntu-latest'
base-image: "rocm/dev-ubuntu-22.04:6.4.3"
base-image: "rocm/dev-ubuntu-22.04:6.1"
skip-drivers: 'false'
backend: "rerankers"
dockerfile: "./backend/Dockerfile.python"
@@ -326,7 +302,7 @@ jobs:
tag-latest: 'auto'
tag-suffix: '-gpu-rocm-hipblas-llama-cpp'
runs-on: 'ubuntu-latest'
base-image: "rocm/dev-ubuntu-22.04:6.4.3"
base-image: "rocm/dev-ubuntu-22.04:6.1"
skip-drivers: 'false'
backend: "llama-cpp"
dockerfile: "./backend/Dockerfile.llama-cpp"
@@ -337,8 +313,8 @@ jobs:
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-rocm-hipblas-vllm'
runs-on: 'arc-runner-set'
base-image: "rocm/dev-ubuntu-22.04:6.4.3"
runs-on: 'ubuntu-latest'
base-image: "rocm/dev-ubuntu-22.04:6.1"
skip-drivers: 'false'
backend: "vllm"
dockerfile: "./backend/Dockerfile.python"
@@ -350,7 +326,7 @@ jobs:
tag-latest: 'auto'
tag-suffix: '-gpu-rocm-hipblas-transformers'
runs-on: 'arc-runner-set'
base-image: "rocm/dev-ubuntu-22.04:6.4.3"
base-image: "rocm/dev-ubuntu-22.04:6.1"
skip-drivers: 'false'
backend: "transformers"
dockerfile: "./backend/Dockerfile.python"
@@ -362,7 +338,7 @@ jobs:
tag-latest: 'auto'
tag-suffix: '-gpu-rocm-hipblas-diffusers'
runs-on: 'arc-runner-set'
base-image: "rocm/dev-ubuntu-22.04:6.4.3"
base-image: "rocm/dev-ubuntu-22.04:6.1"
skip-drivers: 'false'
backend: "diffusers"
dockerfile: "./backend/Dockerfile.python"
@@ -375,7 +351,7 @@ jobs:
tag-latest: 'auto'
tag-suffix: '-gpu-rocm-hipblas-kokoro'
runs-on: 'arc-runner-set'
base-image: "rocm/dev-ubuntu-22.04:6.4.3"
base-image: "rocm/dev-ubuntu-22.04:6.1"
skip-drivers: 'false'
backend: "kokoro"
dockerfile: "./backend/Dockerfile.python"
@@ -387,7 +363,7 @@ jobs:
tag-latest: 'auto'
tag-suffix: '-gpu-rocm-hipblas-faster-whisper'
runs-on: 'ubuntu-latest'
base-image: "rocm/dev-ubuntu-22.04:6.4.3"
base-image: "rocm/dev-ubuntu-22.04:6.1"
skip-drivers: 'false'
backend: "faster-whisper"
dockerfile: "./backend/Dockerfile.python"
@@ -399,7 +375,7 @@ jobs:
tag-latest: 'auto'
tag-suffix: '-gpu-rocm-hipblas-coqui'
runs-on: 'ubuntu-latest'
base-image: "rocm/dev-ubuntu-22.04:6.4.3"
base-image: "rocm/dev-ubuntu-22.04:6.1"
skip-drivers: 'false'
backend: "coqui"
dockerfile: "./backend/Dockerfile.python"
@@ -411,7 +387,7 @@ jobs:
tag-latest: 'auto'
tag-suffix: '-gpu-rocm-hipblas-bark'
runs-on: 'arc-runner-set'
base-image: "rocm/dev-ubuntu-22.04:6.4.3"
base-image: "rocm/dev-ubuntu-22.04:6.1"
skip-drivers: 'false'
backend: "bark"
dockerfile: "./backend/Dockerfile.python"
@@ -459,7 +435,7 @@ jobs:
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-intel-vllm'
runs-on: 'arc-runner-set'
runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
skip-drivers: 'false'
backend: "vllm"
@@ -578,7 +554,7 @@ jobs:
context: "./"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "8"
cuda-minor-version: "0"
platforms: 'linux/arm64'
skip-drivers: 'true'
tag-latest: 'auto'
@@ -615,7 +591,7 @@ jobs:
context: "./"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "8"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12-stablediffusion-ggml'
@@ -675,7 +651,7 @@ jobs:
context: "./"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "8"
cuda-minor-version: "0"
platforms: 'linux/arm64'
skip-drivers: 'true'
tag-latest: 'auto'
@@ -700,7 +676,7 @@ jobs:
context: "./"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "8"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12-whisper'
@@ -760,7 +736,7 @@ jobs:
context: "./"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "8"
cuda-minor-version: "0"
platforms: 'linux/arm64'
skip-drivers: 'true'
tag-latest: 'auto'
@@ -776,7 +752,7 @@ jobs:
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-hipblas-whisper'
base-image: "rocm/dev-ubuntu-22.04:6.4.3"
base-image: "rocm/dev-ubuntu-22.04:6.1"
runs-on: 'ubuntu-latest'
skip-drivers: 'false'
backend: "whisper"
@@ -836,7 +812,7 @@ jobs:
context: "./backend"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "8"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12-rfdetr'
@@ -872,7 +848,7 @@ jobs:
context: "./backend"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "8"
cuda-minor-version: "0"
platforms: 'linux/arm64'
skip-drivers: 'true'
tag-latest: 'auto'
@@ -897,7 +873,7 @@ jobs:
context: "./backend"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "8"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12-exllama2'
@@ -938,7 +914,7 @@ jobs:
skip-drivers: 'true'
tag-latest: 'auto'
tag-suffix: '-gpu-hipblas-exllama2'
base-image: "rocm/dev-ubuntu-22.04:6.4.3"
base-image: "rocm/dev-ubuntu-22.04:6.1"
runs-on: 'ubuntu-latest'
backend: "exllama2"
dockerfile: "./backend/Dockerfile.python"
@@ -950,7 +926,7 @@ jobs:
# platforms: 'linux/amd64'
# tag-latest: 'auto'
# tag-suffix: '-gpu-hipblas-rfdetr'
# base-image: "rocm/dev-ubuntu-22.04:6.4.3"
# base-image: "rocm/dev-ubuntu-22.04:6.1"
# runs-on: 'ubuntu-latest'
# skip-drivers: 'false'
# backend: "rfdetr"
@@ -969,47 +945,6 @@ jobs:
backend: "kitten-tts"
dockerfile: "./backend/Dockerfile.python"
context: "./backend"
backend-jobs-darwin:
uses: ./.github/workflows/backend_build_darwin.yml
strategy:
matrix:
include:
- backend: "diffusers"
tag-suffix: "-metal-darwin-arm64-diffusers"
build-type: "mps"
- backend: "mlx"
tag-suffix: "-metal-darwin-arm64-mlx"
build-type: "mps"
- backend: "chatterbox"
tag-suffix: "-metal-darwin-arm64-chatterbox"
build-type: "mps"
- backend: "mlx-vlm"
tag-suffix: "-metal-darwin-arm64-mlx-vlm"
build-type: "mps"
- backend: "mlx-audio"
tag-suffix: "-metal-darwin-arm64-mlx-audio"
build-type: "mps"
- backend: "stablediffusion-ggml"
tag-suffix: "-metal-darwin-arm64-stablediffusion-ggml"
build-type: "metal"
lang: "go"
- backend: "whisper"
tag-suffix: "-metal-darwin-arm64-whisper"
build-type: "metal"
lang: "go"
with:
backend: ${{ matrix.backend }}
build-type: ${{ matrix.build-type }}
go-version: "1.24.x"
tag-suffix: ${{ matrix.tag-suffix }}
lang: ${{ matrix.lang || 'python' }}
use-pip: ${{ matrix.backend == 'diffusers' }}
runs-on: "macOS-14"
secrets:
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
llama-cpp-darwin:
runs-on: macOS-14
strategy:
@@ -1017,7 +952,7 @@ jobs:
go-version: ['1.21.x']
steps:
- name: Clone
uses: actions/checkout@v5
uses: actions/checkout@v4
with:
submodules: true
- name: Setup Go ${{ matrix.go-version }}
@@ -1034,19 +969,22 @@ jobs:
- name: Build llama-cpp-darwin
run: |
make protogen-go
make backends/llama-cpp-darwin
make build
bash scripts/build-llama-cpp-darwin.sh
ls -la build/darwin.tar
mv build/darwin.tar build/llama-cpp.tar
- name: Upload llama-cpp.tar
uses: actions/upload-artifact@v4
with:
name: llama-cpp-tar
path: backend-images/llama-cpp.tar
path: build/llama-cpp.tar
llama-cpp-darwin-publish:
needs: llama-cpp-darwin
if: github.event_name != 'pull_request'
runs-on: ubuntu-latest
steps:
- name: Download llama-cpp.tar
uses: actions/download-artifact@v5
uses: actions/download-artifact@v4
with:
name: llama-cpp-tar
path: .
@@ -1103,7 +1041,7 @@ jobs:
go-version: ['1.21.x']
steps:
- name: Clone
uses: actions/checkout@v5
uses: actions/checkout@v4
with:
submodules: true
- name: Setup Go ${{ matrix.go-version }}
@@ -1122,19 +1060,21 @@ jobs:
make protogen-go
make build
export PLATFORMARCH=darwin/amd64
make backends/llama-cpp-darwin
bash scripts/build-llama-cpp-darwin.sh
ls -la build/darwin.tar
mv build/darwin.tar build/llama-cpp.tar
- name: Upload llama-cpp.tar
uses: actions/upload-artifact@v4
with:
name: llama-cpp-tar-x86
path: backend-images/llama-cpp.tar
path: build/llama-cpp.tar
llama-cpp-darwin-x86-publish:
if: github.event_name != 'pull_request'
needs: llama-cpp-darwin-x86
runs-on: ubuntu-latest
steps:
- name: Download llama-cpp.tar
uses: actions/download-artifact@v5
uses: actions/download-artifact@v4
with:
name: llama-cpp-tar-x86
path: .

View File

@@ -55,9 +55,9 @@ on:
type: string
secrets:
dockerUsername:
required: false
required: true
dockerPassword:
required: false
required: true
quayUsername:
required: true
quayPassword:
@@ -66,8 +66,6 @@ on:
jobs:
backend-build:
runs-on: ${{ inputs.runs-on }}
env:
quay_username: ${{ secrets.quayUsername }}
steps:
@@ -97,7 +95,7 @@ jobs:
&& sudo apt-get install -y git
- name: Checkout
uses: actions/checkout@v5
uses: actions/checkout@v4
- name: Release space from worker
if: inputs.runs-on == 'ubuntu-latest'
@@ -189,7 +187,7 @@ jobs:
password: ${{ secrets.dockerPassword }}
- name: Login to Quay.io
if: ${{ env.quay_username != '' }}
# if: github.event_name != 'pull_request'
uses: docker/login-action@v3
with:
registry: quay.io
@@ -232,7 +230,7 @@ jobs:
file: ${{ inputs.dockerfile }}
cache-from: type=gha
platforms: ${{ inputs.platforms }}
push: ${{ env.quay_username != '' }}
push: true
tags: ${{ steps.meta_pull_request.outputs.tags }}
labels: ${{ steps.meta_pull_request.outputs.labels }}
@@ -240,4 +238,4 @@ jobs:
- name: job summary
run: |
echo "Built image: ${{ steps.meta.outputs.labels }}" >> $GITHUB_STEP_SUMMARY
echo "Built image: ${{ steps.meta.outputs.labels }}" >> $GITHUB_STEP_SUMMARY

View File

@@ -1,144 +0,0 @@
---
name: 'build darwin python backend container images (reusable)'
on:
workflow_call:
inputs:
backend:
description: 'Backend to build'
required: true
type: string
build-type:
description: 'Build type (e.g., mps)'
default: ''
type: string
use-pip:
description: 'Use pip to install dependencies'
default: false
type: boolean
lang:
description: 'Programming language (e.g. go)'
default: 'python'
type: string
go-version:
description: 'Go version to use'
default: '1.24.x'
type: string
tag-suffix:
description: 'Tag suffix for the built image'
required: true
type: string
runs-on:
description: 'Runner to use'
default: 'macOS-14'
type: string
secrets:
dockerUsername:
required: false
dockerPassword:
required: false
quayUsername:
required: true
quayPassword:
required: true
jobs:
darwin-backend-build:
runs-on: ${{ inputs.runs-on }}
strategy:
matrix:
go-version: ['${{ inputs.go-version }}']
steps:
- name: Clone
uses: actions/checkout@v5
with:
submodules: true
- name: Setup Go ${{ matrix.go-version }}
uses: actions/setup-go@v5
with:
go-version: ${{ matrix.go-version }}
cache: false
# You can test your matrix by printing the current Go version
- name: Display Go version
run: go version
- name: Dependencies
run: |
brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm
- name: Build ${{ inputs.backend }}-darwin
run: |
make protogen-go
BACKEND=${{ inputs.backend }} BUILD_TYPE=${{ inputs.build-type }} USE_PIP=${{ inputs.use-pip }} make build-darwin-${{ inputs.lang }}-backend
- name: Upload ${{ inputs.backend }}.tar
uses: actions/upload-artifact@v4
with:
name: ${{ inputs.backend }}-tar
path: backend-images/${{ inputs.backend }}.tar
darwin-backend-publish:
needs: darwin-backend-build
if: github.event_name != 'pull_request'
runs-on: ubuntu-latest
steps:
- name: Download ${{ inputs.backend }}.tar
uses: actions/download-artifact@v5
with:
name: ${{ inputs.backend }}-tar
path: .
- name: Install crane
run: |
curl -L https://github.com/google/go-containerregistry/releases/latest/download/go-containerregistry_Linux_x86_64.tar.gz | tar -xz
sudo mv crane /usr/local/bin/
- name: Log in to DockerHub
run: |
echo "${{ secrets.dockerPassword }}" | crane auth login docker.io -u "${{ secrets.dockerUsername }}" --password-stdin
- name: Log in to quay.io
run: |
echo "${{ secrets.quayPassword }}" | crane auth login quay.io -u "${{ secrets.quayUsername }}" --password-stdin
- name: Docker meta
id: meta
uses: docker/metadata-action@v5
with:
images: |
localai/localai-backends
tags: |
type=ref,event=branch
type=semver,pattern={{raw}}
type=sha
flavor: |
latest=auto
suffix=${{ inputs.tag-suffix }},onlatest=true
- name: Docker meta
id: quaymeta
uses: docker/metadata-action@v5
with:
images: |
quay.io/go-skynet/local-ai-backends
tags: |
type=ref,event=branch
type=semver,pattern={{raw}}
type=sha
flavor: |
latest=auto
suffix=${{ inputs.tag-suffix }},onlatest=true
- name: Push Docker image (DockerHub)
run: |
for tag in $(echo "${{ steps.meta.outputs.tags }}" | tr ',' '\n'); do
crane push ${{ inputs.backend }}.tar $tag
done
- name: Push Docker image (Quay)
run: |
for tag in $(echo "${{ steps.quaymeta.outputs.tags }}" | tr ',' '\n'); do
crane push ${{ inputs.backend }}.tar $tag
done

View File

@@ -1,78 +0,0 @@
name: 'build backend container images (PR-filtered)'
on:
pull_request:
concurrency:
group: ci-backends-pr-${{ github.head_ref || github.ref }}-${{ github.repository }}
cancel-in-progress: true
jobs:
generate-matrix:
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
matrix-darwin: ${{ steps.set-matrix.outputs.matrix-darwin }}
has-backends: ${{ steps.set-matrix.outputs.has-backends }}
has-backends-darwin: ${{ steps.set-matrix.outputs.has-backends-darwin }}
steps:
- name: Checkout repository
uses: actions/checkout@v5
- name: Setup Bun
uses: oven-sh/setup-bun@v2
- name: Install dependencies
run: |
bun add js-yaml
bun add @octokit/core
# filters the matrix in backend.yml
- name: Filter matrix for changed backends
id: set-matrix
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
GITHUB_EVENT_PATH: ${{ github.event_path }}
run: bun run scripts/changed-backends.js
backend-jobs:
needs: generate-matrix
uses: ./.github/workflows/backend_build.yml
if: needs.generate-matrix.outputs.has-backends == 'true'
with:
tag-latest: ${{ matrix.tag-latest }}
tag-suffix: ${{ matrix.tag-suffix }}
build-type: ${{ matrix.build-type }}
cuda-major-version: ${{ matrix.cuda-major-version }}
cuda-minor-version: ${{ matrix.cuda-minor-version }}
platforms: ${{ matrix.platforms }}
runs-on: ${{ matrix.runs-on }}
base-image: ${{ matrix.base-image }}
backend: ${{ matrix.backend }}
dockerfile: ${{ matrix.dockerfile }}
skip-drivers: ${{ matrix.skip-drivers }}
context: ${{ matrix.context }}
secrets:
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
strategy:
fail-fast: true
matrix: ${{ fromJson(needs.generate-matrix.outputs.matrix) }}
backend-jobs-darwin:
needs: generate-matrix
uses: ./.github/workflows/backend_build_darwin.yml
if: needs.generate-matrix.outputs.has-backends-darwin == 'true'
with:
backend: ${{ matrix.backend }}
build-type: ${{ matrix.build-type }}
go-version: "1.24.x"
tag-suffix: ${{ matrix.tag-suffix }}
lang: ${{ matrix.lang || 'python' }}
use-pip: ${{ matrix.backend == 'diffusers' }}
runs-on: "macOS-14"
secrets:
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
strategy:
fail-fast: true
matrix: ${{ fromJson(needs.generate-matrix.outputs.matrix-darwin) }}

View File

@@ -11,7 +11,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v5
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Set up Go
@@ -21,47 +21,3 @@ jobs:
- name: Run GoReleaser
run: |
make dev-dist
launcher-build-darwin:
runs-on: macos-latest
steps:
- name: Checkout
uses: actions/checkout@v5
with:
fetch-depth: 0
- name: Set up Go
uses: actions/setup-go@v5
with:
go-version: 1.23
- name: Build launcher for macOS ARM64
run: |
make build-launcher-darwin
ls -liah dist
- name: Upload macOS launcher artifacts
uses: actions/upload-artifact@v4
with:
name: launcher-macos
path: dist/
retention-days: 30
launcher-build-linux:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v5
with:
fetch-depth: 0
- name: Set up Go
uses: actions/setup-go@v5
with:
go-version: 1.23
- name: Build launcher for Linux
run: |
sudo apt-get update
sudo apt-get install golang gcc libgl1-mesa-dev xorg-dev libxkbcommon-dev
make build-launcher-linux
- name: Upload Linux launcher artifacts
uses: actions/upload-artifact@v4
with:
name: launcher-linux
path: local-ai-launcher-linux.tar.xz
retention-days: 30

View File

@@ -31,7 +31,7 @@ jobs:
file: "backend/go/piper/Makefile"
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v5
- uses: actions/checkout@v4
- name: Bump dependencies 🔧
id: bump
run: |

View File

@@ -12,7 +12,7 @@ jobs:
- repository: "mudler/LocalAI"
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v5
- uses: actions/checkout@v4
- name: Bump dependencies 🔧
run: |
bash .github/bump_docs.sh ${{ matrix.repository }}

View File

@@ -15,7 +15,7 @@ jobs:
&& sudo add-apt-repository -y ppa:git-core/ppa \
&& sudo apt-get update \
&& sudo apt-get install -y git
- uses: actions/checkout@v5
- uses: actions/checkout@v4
- name: Install dependencies
run: |
sudo apt-get update

View File

@@ -20,7 +20,7 @@ jobs:
skip-commit-verification: true
- name: Checkout repository
uses: actions/checkout@v5
uses: actions/checkout@v4
- name: Approve a PR if not already approved
run: |

View File

@@ -15,7 +15,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Clone
uses: actions/checkout@v5
uses: actions/checkout@v4
with:
submodules: true
- uses: actions/setup-go@v5

View File

@@ -73,7 +73,7 @@ jobs:
uses: docker/setup-buildx-action@master
- name: Checkout
uses: actions/checkout@v5
uses: actions/checkout@v4
- name: Cache GRPC
uses: docker/build-push-action@v6

View File

@@ -43,7 +43,7 @@ jobs:
uses: docker/setup-buildx-action@master
- name: Checkout
uses: actions/checkout@v5
uses: actions/checkout@v4
- name: Cache Intel images
uses: docker/build-push-action@v6

View File

@@ -36,7 +36,7 @@ jobs:
include:
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "8"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-gpu-nvidia-cuda-12'
@@ -47,7 +47,7 @@ jobs:
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-hipblas'
base-image: "rocm/dev-ubuntu-22.04:6.4.3"
base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04"
runs-on: 'ubuntu-latest'
makeflags: "--jobs=3 --output-sync=target"

View File

@@ -39,7 +39,7 @@ jobs:
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-hipblas'
base-image: "rocm/dev-ubuntu-22.04:6.4.3"
base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04"
runs-on: 'ubuntu-latest'
makeflags: "--jobs=3 --output-sync=target"
@@ -91,7 +91,7 @@ jobs:
aio: "-aio-gpu-nvidia-cuda-11"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "8"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12'
@@ -144,7 +144,7 @@ jobs:
include:
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "8"
cuda-minor-version: "0"
platforms: 'linux/arm64'
tag-latest: 'auto'
tag-suffix: '-nvidia-l4t-arm64'

View File

@@ -94,7 +94,7 @@ jobs:
&& sudo apt-get update \
&& sudo apt-get install -y git
- name: Checkout
uses: actions/checkout@v5
uses: actions/checkout@v4
- name: Release space from worker
if: inputs.runs-on == 'ubuntu-latest'

View File

@@ -9,4 +9,4 @@ jobs:
pull-requests: write
runs-on: ubuntu-latest
steps:
- uses: actions/labeler@v6
- uses: actions/labeler@v5

View File

@@ -13,7 +13,7 @@ jobs:
if: ${{ github.actor == 'localai-bot' }}
steps:
- name: Checkout repository
uses: actions/checkout@v5
uses: actions/checkout@v4
- name: Approve a PR if not already approved
run: |

View File

@@ -11,7 +11,7 @@ jobs:
MODEL_NAME: gemma-3-12b-it
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v5
- uses: actions/checkout@v4
with:
fetch-depth: 0 # needed to checkout all branches for this Action to work
- uses: mudler/localai-github-action@v1
@@ -90,7 +90,7 @@ jobs:
MODEL_NAME: gemma-3-12b-it
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v5
- uses: actions/checkout@v4
with:
fetch-depth: 0 # needed to checkout all branches for this Action to work
- name: Start LocalAI

View File

@@ -10,7 +10,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v5
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Set up Go
@@ -23,42 +23,4 @@ jobs:
version: v2.11.0
args: release --clean
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
launcher-build-darwin:
runs-on: macos-latest
steps:
- name: Checkout
uses: actions/checkout@v5
with:
fetch-depth: 0
- name: Set up Go
uses: actions/setup-go@v5
with:
go-version: 1.23
- name: Build launcher for macOS ARM64
run: |
make build-launcher-darwin
- name: Upload DMG to Release
uses: softprops/action-gh-release@v2
with:
files: ./dist/LocalAI.dmg
launcher-build-linux:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v5
with:
fetch-depth: 0
- name: Set up Go
uses: actions/setup-go@v5
with:
go-version: 1.23
- name: Build launcher for Linux
run: |
sudo apt-get update
sudo apt-get install golang gcc libgl1-mesa-dev xorg-dev libxkbcommon-dev
make build-launcher-linux
- name: Upload Linux launcher artifacts
uses: softprops/action-gh-release@v2
with:
files: ./local-ai-launcher-linux.tar.xz
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

View File

@@ -14,11 +14,11 @@ jobs:
GO111MODULE: on
steps:
- name: Checkout Source
uses: actions/checkout@v5
uses: actions/checkout@v4
if: ${{ github.actor != 'dependabot[bot]' }}
- name: Run Gosec Security Scanner
if: ${{ github.actor != 'dependabot[bot]' }}
uses: securego/gosec@v2.22.8
uses: securego/gosec@v2.22.7
with:
# we let the report trigger content trigger a failure using the GitHub Security features.
args: '-no-fail -fmt sarif -out results.sarif ./...'

View File

@@ -10,7 +10,7 @@ jobs:
stale:
runs-on: ubuntu-latest
steps:
- uses: actions/stale@3a9db7e6a41a89f618792c92c0e97cc736e1b13f # v9
- uses: actions/stale@5bef64f19d7facfb25b37b414482c7164d639639 # v9
with:
stale-issue-message: 'This issue is stale because it has been open 90 days with no activity. Remove stale label or comment or this will be closed in 5 days.'
stale-pr-message: 'This PR is stale because it has been open 90 days with no activity. Remove stale label or comment or this will be closed in 10 days.'

View File

@@ -19,7 +19,7 @@ jobs:
# runs-on: ubuntu-latest
# steps:
# - name: Clone
# uses: actions/checkout@v5
# uses: actions/checkout@v4
# with:
# submodules: true
# - name: Dependencies
@@ -40,7 +40,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Clone
uses: actions/checkout@v5
uses: actions/checkout@v4
with:
submodules: true
- name: Dependencies
@@ -61,7 +61,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Clone
uses: actions/checkout@v5
uses: actions/checkout@v4
with:
submodules: true
- name: Dependencies
@@ -83,7 +83,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Clone
uses: actions/checkout@v5
uses: actions/checkout@v4
with:
submodules: true
- name: Dependencies
@@ -104,7 +104,7 @@ jobs:
# runs-on: ubuntu-latest
# steps:
# - name: Clone
# uses: actions/checkout@v5
# uses: actions/checkout@v4
# with:
# submodules: true
# - name: Dependencies
@@ -124,7 +124,7 @@ jobs:
# runs-on: ubuntu-latest
# steps:
# - name: Clone
# uses: actions/checkout@v5
# uses: actions/checkout@v4
# with:
# submodules: true
# - name: Dependencies
@@ -186,7 +186,7 @@ jobs:
# sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
# df -h
# - name: Clone
# uses: actions/checkout@v5
# uses: actions/checkout@v4
# with:
# submodules: true
# - name: Dependencies
@@ -211,7 +211,7 @@ jobs:
# runs-on: ubuntu-latest
# steps:
# - name: Clone
# uses: actions/checkout@v5
# uses: actions/checkout@v4
# with:
# submodules: true
# - name: Dependencies
@@ -232,7 +232,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Clone
uses: actions/checkout@v5
uses: actions/checkout@v4
with:
submodules: true
- name: Dependencies

View File

@@ -23,20 +23,6 @@ jobs:
matrix:
go-version: ['1.21.x']
steps:
- name: Free Disk Space (Ubuntu)
uses: jlumbroso/free-disk-space@main
with:
# this might remove tools that are actually needed,
# if set to "true" but frees about 6 GB
tool-cache: true
# all of these default to true, but feel free to set to
# "false" if necessary for your workflow
android: true
dotnet: true
haskell: true
large-packages: true
docker-images: true
swap-storage: true
- name: Release space from worker
run: |
echo "Listing top largest packages"
@@ -70,7 +56,7 @@ jobs:
sudo rm -rfv build || true
df -h
- name: Clone
uses: actions/checkout@v5
uses: actions/checkout@v4
with:
submodules: true
- name: Setup Go ${{ matrix.go-version }}
@@ -166,7 +152,7 @@ jobs:
sudo rm -rfv build || true
df -h
- name: Clone
uses: actions/checkout@v5
uses: actions/checkout@v4
with:
submodules: true
- name: Dependencies
@@ -196,7 +182,7 @@ jobs:
go-version: ['1.21.x']
steps:
- name: Clone
uses: actions/checkout@v5
uses: actions/checkout@v4
with:
submodules: true
- name: Setup Go ${{ matrix.go-version }}
@@ -214,7 +200,11 @@ jobs:
- name: Build llama-cpp-darwin
run: |
make protogen-go
make backends/llama-cpp-darwin
make build
bash scripts/build-llama-cpp-darwin.sh
ls -la build/darwin.tar
mv build/darwin.tar build/llama-cpp.tar
./local-ai backends install "ocifile://$PWD/build/llama-cpp.tar"
- name: Test
run: |
export C_INCLUDE_PATH=/usr/local/include

View File

@@ -9,7 +9,7 @@ jobs:
fail-fast: false
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v5
- uses: actions/checkout@v4
- uses: actions/setup-go@v5
with:
go-version: 'stable'

2
.gitignore vendored
View File

@@ -24,7 +24,7 @@ go-bert
# LocalAI build binary
LocalAI
/local-ai
local-ai
# prevent above rules from omitting the helm chart
!charts/*
# prevent above rules from omitting the api/localai folder

View File

@@ -8,7 +8,7 @@ source:
enabled: true
name_template: '{{ .ProjectName }}-{{ .Tag }}-source'
builds:
- main: ./cmd/local-ai
-
env:
- CGO_ENABLED=0
ldflags:

View File

@@ -9,7 +9,7 @@ ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update && \
apt-get install -y --no-install-recommends \
ca-certificates curl wget espeak-ng libgomp1 \
ffmpeg libopenblas-base libopenblas-dev && \
python3 python-is-python3 ffmpeg libopenblas-base libopenblas-dev && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
@@ -18,7 +18,7 @@ FROM requirements AS requirements-drivers
ARG BUILD_TYPE
ARG CUDA_MAJOR_VERSION=12
ARG CUDA_MINOR_VERSION=8
ARG CUDA_MINOR_VERSION=0
ARG SKIP_DRIVERS=false
ARG TARGETARCH
ARG TARGETVARIANT
@@ -100,10 +100,6 @@ RUN if [ "${BUILD_TYPE}" = "hipblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
ldconfig \
; fi
RUN if [ "${BUILD_TYPE}" = "hipblas" ]; then \
ln -s /opt/rocm-**/lib/llvm/lib/libomp.so /usr/lib/libomp.so \
; fi
RUN expr "${BUILD_TYPE}" = intel && echo "intel" > /run/localai/capability || echo "not intel"
# Cuda

133
Makefile
View File

@@ -2,7 +2,6 @@ GOCMD=go
GOTEST=$(GOCMD) test
GOVET=$(GOCMD) vet
BINARY_NAME=local-ai
LAUNCHER_BINARY_NAME=local-ai-launcher
GORELEASER?=
@@ -91,17 +90,7 @@ build: protogen-go install-go-tools ## Build the project
$(info ${GREEN}I LD_FLAGS: ${YELLOW}$(LD_FLAGS)${RESET})
$(info ${GREEN}I UPX: ${YELLOW}$(UPX)${RESET})
rm -rf $(BINARY_NAME) || true
CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(BINARY_NAME) ./cmd/local-ai
build-launcher: ## Build the launcher application
$(info ${GREEN}I local-ai launcher build info:${RESET})
$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
$(info ${GREEN}I GO_TAGS: ${YELLOW}$(GO_TAGS)${RESET})
$(info ${GREEN}I LD_FLAGS: ${YELLOW}$(LD_FLAGS)${RESET})
rm -rf $(LAUNCHER_BINARY_NAME) || true
CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(LAUNCHER_BINARY_NAME) ./cmd/launcher
build-all: build build-launcher ## Build both server and launcher
CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(BINARY_NAME) ./
dev-dist:
$(GORELEASER) build --snapshot --clean
@@ -143,6 +132,39 @@ test: test-models/testmodel.ggml protogen-go
$(MAKE) test-tts
$(MAKE) test-stablediffusion
backends/diffusers: docker-build-diffusers docker-save-diffusers build
./local-ai backends install "ocifile://$(abspath ./backend-images/diffusers.tar)"
backends/llama-cpp: docker-build-llama-cpp docker-save-llama-cpp build
./local-ai backends install "ocifile://$(abspath ./backend-images/llama-cpp.tar)"
backends/piper: docker-build-piper docker-save-piper build
./local-ai backends install "ocifile://$(abspath ./backend-images/piper.tar)"
backends/stablediffusion-ggml: docker-build-stablediffusion-ggml docker-save-stablediffusion-ggml build
./local-ai backends install "ocifile://$(abspath ./backend-images/stablediffusion-ggml.tar)"
backends/whisper: docker-build-whisper docker-save-whisper build
./local-ai backends install "ocifile://$(abspath ./backend-images/whisper.tar)"
backends/silero-vad: docker-build-silero-vad docker-save-silero-vad build
./local-ai backends install "ocifile://$(abspath ./backend-images/silero-vad.tar)"
backends/local-store: docker-build-local-store docker-save-local-store build
./local-ai backends install "ocifile://$(abspath ./backend-images/local-store.tar)"
backends/huggingface: docker-build-huggingface docker-save-huggingface build
./local-ai backends install "ocifile://$(abspath ./backend-images/huggingface.tar)"
backends/rfdetr: docker-build-rfdetr docker-save-rfdetr build
./local-ai backends install "ocifile://$(abspath ./backend-images/rfdetr.tar)"
backends/kitten-tts: docker-build-kitten-tts docker-save-kitten-tts build
./local-ai backends install "ocifile://$(abspath ./backend-images/kitten-tts.tar)"
backends/kokoro: docker-build-kokoro docker-save-kokoro build
./local-ai backends install "ocifile://$(abspath ./backend-images/kokoro.tar)"
########################################################
## AIO tests
########################################################
@@ -170,7 +192,7 @@ prepare-e2e:
mkdir -p $(TEST_DIR)
cp -rfv $(abspath ./tests/e2e-fixtures)/gpu.yaml $(TEST_DIR)/gpu.yaml
test -e $(TEST_DIR)/ggllm-test-model.bin || wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(TEST_DIR)/ggllm-test-model.bin
docker build --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=8 -t localai-tests .
docker build --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=0 -t localai-tests .
run-e2e-image:
ls -liah $(abspath ./tests/e2e-fixtures)
@@ -335,73 +357,6 @@ docker-image-intel:
## Backends
########################################################
backends/diffusers: docker-build-diffusers docker-save-diffusers build
./local-ai backends install "ocifile://$(abspath ./backend-images/diffusers.tar)"
backends/llama-cpp: docker-build-llama-cpp docker-save-llama-cpp build
./local-ai backends install "ocifile://$(abspath ./backend-images/llama-cpp.tar)"
backends/piper: docker-build-piper docker-save-piper build
./local-ai backends install "ocifile://$(abspath ./backend-images/piper.tar)"
backends/stablediffusion-ggml: docker-build-stablediffusion-ggml docker-save-stablediffusion-ggml build
./local-ai backends install "ocifile://$(abspath ./backend-images/stablediffusion-ggml.tar)"
backends/whisper: docker-build-whisper docker-save-whisper build
./local-ai backends install "ocifile://$(abspath ./backend-images/whisper.tar)"
backends/silero-vad: docker-build-silero-vad docker-save-silero-vad build
./local-ai backends install "ocifile://$(abspath ./backend-images/silero-vad.tar)"
backends/local-store: docker-build-local-store docker-save-local-store build
./local-ai backends install "ocifile://$(abspath ./backend-images/local-store.tar)"
backends/huggingface: docker-build-huggingface docker-save-huggingface build
./local-ai backends install "ocifile://$(abspath ./backend-images/huggingface.tar)"
backends/rfdetr: docker-build-rfdetr docker-save-rfdetr build
./local-ai backends install "ocifile://$(abspath ./backend-images/rfdetr.tar)"
backends/kitten-tts: docker-build-kitten-tts docker-save-kitten-tts build
./local-ai backends install "ocifile://$(abspath ./backend-images/kitten-tts.tar)"
backends/kokoro: docker-build-kokoro docker-save-kokoro build
./local-ai backends install "ocifile://$(abspath ./backend-images/kokoro.tar)"
backends/chatterbox: docker-build-chatterbox docker-save-chatterbox build
./local-ai backends install "ocifile://$(abspath ./backend-images/chatterbox.tar)"
backends/llama-cpp-darwin: build
bash ./scripts/build/llama-cpp-darwin.sh
./local-ai backends install "ocifile://$(abspath ./backend-images/llama-cpp.tar)"
build-darwin-python-backend: build
bash ./scripts/build/python-darwin.sh
build-darwin-go-backend: build
bash ./scripts/build/golang-darwin.sh
backends/mlx:
BACKEND=mlx $(MAKE) build-darwin-python-backend
./local-ai backends install "ocifile://$(abspath ./backend-images/mlx.tar)"
backends/diffuser-darwin:
BACKEND=diffusers $(MAKE) build-darwin-python-backend
./local-ai backends install "ocifile://$(abspath ./backend-images/diffusers.tar)"
backends/mlx-vlm:
BACKEND=mlx-vlm $(MAKE) build-darwin-python-backend
./local-ai backends install "ocifile://$(abspath ./backend-images/mlx-vlm.tar)"
backends/mlx-audio:
BACKEND=mlx-audio $(MAKE) build-darwin-python-backend
./local-ai backends install "ocifile://$(abspath ./backend-images/mlx-audio.tar)"
backends/stablediffusion-ggml-darwin:
BACKEND=stablediffusion-ggml BUILD_TYPE=metal $(MAKE) build-darwin-go-backend
./local-ai backends install "ocifile://$(abspath ./backend-images/stablediffusion-ggml.tar)"
backend-images:
mkdir -p backend-images
@@ -496,7 +451,7 @@ docker-build-bark:
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:bark -f backend/Dockerfile.python --build-arg BACKEND=bark .
docker-build-chatterbox:
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:chatterbox -f backend/Dockerfile.python --build-arg BACKEND=chatterbox ./backend
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:chatterbox -f backend/Dockerfile.python --build-arg BACKEND=chatterbox .
docker-build-exllama2:
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:exllama2 -f backend/Dockerfile.python --build-arg BACKEND=exllama2 .
@@ -532,19 +487,3 @@ docs-clean:
.PHONY: docs
docs: docs/static/gallery.html
cd docs && hugo serve
########################################################
## Platform-specific builds
########################################################
## fyne cross-platform build
build-launcher-darwin: build-launcher
go run github.com/tiagomelo/macos-dmg-creator/cmd/createdmg@latest \
--appName "LocalAI" \
--appBinaryPath "$(LAUNCHER_BINARY_NAME)" \
--bundleIdentifier "com.localai.launcher" \
--iconPath "core/http/static/logo.png" \
--outputDir "dist/"
build-launcher-linux:
cd cmd/launcher && go run fyne.io/tools/cmd/fyne@latest package -os linux -icon ../../core/http/static/logo.png --executable $(LAUNCHER_BINARY_NAME)-linux && mv launcher.tar.xz ../../$(LAUNCHER_BINARY_NAME)-linux.tar.xz

View File

@@ -110,12 +110,6 @@ curl https://localai.io/install.sh | sh
For more installation options, see [Installer Options](https://localai.io/docs/advanced/installer/).
### macOS Download:
<a href="https://github.com/mudler/LocalAI/releases/latest/download/LocalAI.dmg">
<img src="https://img.shields.io/badge/Download-macOS-blue?style=for-the-badge&logo=apple&logoColor=white" alt="Download LocalAI for macOS"/>
</a>
Or run with docker:
### CPU only image:
@@ -197,7 +191,6 @@ For more information, see [💻 Getting started](https://localai.io/basics/getti
## 📰 Latest project news
- August 2025: MLX, MLX-VLM, Diffusers and llama.cpp are now supported on Mac M1/M2/M3+ chips ( with `development` suffix in the gallery ): https://github.com/mudler/LocalAI/pull/6049 https://github.com/mudler/LocalAI/pull/6119 https://github.com/mudler/LocalAI/pull/6121 https://github.com/mudler/LocalAI/pull/6060
- July/August 2025: 🔍 [Object Detection](https://localai.io/features/object-detection/) added to the API featuring [rf-detr](https://github.com/roboflow/rf-detr)
- July 2025: All backends migrated outside of the main binary. LocalAI is now more lightweight, small, and automatically downloads the required backend to run the model. [Read the release notes](https://github.com/mudler/LocalAI/releases/tag/v3.2.0)
- June 2025: [Backend management](https://github.com/mudler/LocalAI/pull/5607) has been added. Attention: extras images are going to be deprecated from the next release! Read [the backend management PR](https://github.com/mudler/LocalAI/pull/5607).
@@ -239,60 +232,6 @@ Roadmap items: [List of issues](https://github.com/mudler/LocalAI/issues?q=is%3A
- 🔊 Voice activity detection (Silero-VAD support)
- 🌍 Integrated WebUI!
## 🧩 Supported Backends & Acceleration
LocalAI supports a comprehensive range of AI backends with multiple acceleration options:
### Text Generation & Language Models
| Backend | Description | Acceleration Support |
|---------|-------------|---------------------|
| **llama.cpp** | LLM inference in C/C++ | CUDA 11/12, ROCm, Intel SYCL, Vulkan, Metal, CPU |
| **vLLM** | Fast LLM inference with PagedAttention | CUDA 12, ROCm, Intel |
| **transformers** | HuggingFace transformers framework | CUDA 11/12, ROCm, Intel, CPU |
| **exllama2** | GPTQ inference library | CUDA 12 |
| **MLX** | Apple Silicon LLM inference | Metal (M1/M2/M3+) |
| **MLX-VLM** | Apple Silicon Vision-Language Models | Metal (M1/M2/M3+) |
### Audio & Speech Processing
| Backend | Description | Acceleration Support |
|---------|-------------|---------------------|
| **whisper.cpp** | OpenAI Whisper in C/C++ | CUDA 12, ROCm, Intel SYCL, Vulkan, CPU |
| **faster-whisper** | Fast Whisper with CTranslate2 | CUDA 12, ROCm, Intel, CPU |
| **bark** | Text-to-audio generation | CUDA 12, ROCm, Intel |
| **bark-cpp** | C++ implementation of Bark | CUDA, Metal, CPU |
| **coqui** | Advanced TTS with 1100+ languages | CUDA 12, ROCm, Intel, CPU |
| **kokoro** | Lightweight TTS model | CUDA 12, ROCm, Intel, CPU |
| **chatterbox** | Production-grade TTS | CUDA 11/12, CPU |
| **piper** | Fast neural TTS system | CPU |
| **kitten-tts** | Kitten TTS models | CPU |
| **silero-vad** | Voice Activity Detection | CPU |
### Image & Video Generation
| Backend | Description | Acceleration Support |
|---------|-------------|---------------------|
| **stablediffusion.cpp** | Stable Diffusion in C/C++ | CUDA 12, Intel SYCL, Vulkan, CPU |
| **diffusers** | HuggingFace diffusion models | CUDA 11/12, ROCm, Intel, Metal, CPU |
### Specialized AI Tasks
| Backend | Description | Acceleration Support |
|---------|-------------|---------------------|
| **rfdetr** | Real-time object detection | CUDA 12, Intel, CPU |
| **rerankers** | Document reranking API | CUDA 11/12, ROCm, Intel, CPU |
| **local-store** | Vector database | CPU |
| **huggingface** | HuggingFace API integration | API-based |
### Hardware Acceleration Matrix
| Acceleration Type | Supported Backends | Hardware Support |
|-------------------|-------------------|------------------|
| **NVIDIA CUDA 11** | llama.cpp, whisper, stablediffusion, diffusers, rerankers, bark, chatterbox | Nvidia hardware |
| **NVIDIA CUDA 12** | All CUDA-compatible backends | Nvidia hardware |
| **AMD ROCm** | llama.cpp, whisper, vllm, transformers, diffusers, rerankers, coqui, kokoro, bark | AMD Graphics |
| **Intel oneAPI** | llama.cpp, whisper, stablediffusion, vllm, transformers, diffusers, rfdetr, rerankers, exllama2, coqui, kokoro, bark | Intel Arc, Intel iGPUs |
| **Apple Metal** | llama.cpp, whisper, diffusers, MLX, MLX-VLM, bark-cpp | Apple M1/M2/M3+ |
| **Vulkan** | llama.cpp, whisper, stablediffusion | Cross-platform GPUs |
| **NVIDIA Jetson** | llama.cpp, whisper, stablediffusion, diffusers, rfdetr | ARM64 embedded AI |
| **CPU Optimized** | All backends | AVX/AVX2/AVX512, quantization support |
### 🔗 Community and integrations
@@ -307,9 +246,6 @@ WebUIs:
Model galleries
- https://github.com/go-skynet/model-gallery
Voice:
- https://github.com/richiejp/VoxInput
Other:
- Helm chart https://github.com/go-skynet/helm-charts
- VSCode extension https://github.com/badgooooor/localai-vscode-plugin

View File

@@ -23,7 +23,7 @@ RUN apt-get update && \
libssl-dev \
git \
git-lfs \
unzip clang \
unzip \
upx-ucl \
curl python3-pip \
python-is-python3 \
@@ -116,7 +116,7 @@ COPY python/${BACKEND} /${BACKEND}
COPY backend.proto /${BACKEND}/backend.proto
COPY python/common/ /${BACKEND}/common
RUN cd /${BACKEND} && PORTABLE_PYTHON=true make
RUN cd /${BACKEND} && make
FROM scratch
ARG BACKEND=rerankers

View File

@@ -1,213 +0,0 @@
# LocalAI Backend Architecture
This directory contains the core backend infrastructure for LocalAI, including the gRPC protocol definition, multi-language Dockerfiles, and language-specific backend implementations.
## Overview
LocalAI uses a unified gRPC-based architecture that allows different programming languages to implement AI backends while maintaining consistent interfaces and capabilities. The backend system supports multiple hardware acceleration targets and provides a standardized way to integrate various AI models and frameworks.
## Architecture Components
### 1. Protocol Definition (`backend.proto`)
The `backend.proto` file defines the gRPC service interface that all backends must implement. This ensures consistency across different language implementations and provides a contract for communication between LocalAI core and backend services.
#### Core Services
- **Text Generation**: `Predict`, `PredictStream` for LLM inference
- **Embeddings**: `Embedding` for text vectorization
- **Image Generation**: `GenerateImage` for stable diffusion and image models
- **Audio Processing**: `AudioTranscription`, `TTS`, `SoundGeneration`
- **Video Generation**: `GenerateVideo` for video synthesis
- **Object Detection**: `Detect` for computer vision tasks
- **Vector Storage**: `StoresSet`, `StoresGet`, `StoresFind` for RAG operations
- **Reranking**: `Rerank` for document relevance scoring
- **Voice Activity Detection**: `VAD` for audio segmentation
#### Key Message Types
- **`PredictOptions`**: Comprehensive configuration for text generation
- **`ModelOptions`**: Model loading and configuration parameters
- **`Result`**: Standardized response format
- **`StatusResponse`**: Backend health and memory usage information
### 2. Multi-Language Dockerfiles
The backend system provides language-specific Dockerfiles that handle the build environment and dependencies for different programming languages:
- `Dockerfile.python`
- `Dockerfile.golang`
- `Dockerfile.llama-cpp`
### 3. Language-Specific Implementations
#### Python Backends (`python/`)
- **transformers**: Hugging Face Transformers framework
- **vllm**: High-performance LLM inference
- **mlx**: Apple Silicon optimization
- **diffusers**: Stable Diffusion models
- **Audio**: bark, coqui, faster-whisper, kitten-tts
- **Vision**: mlx-vlm, rfdetr
- **Specialized**: rerankers, chatterbox, kokoro
#### Go Backends (`go/`)
- **whisper**: OpenAI Whisper speech recognition in Go with GGML cpp backend (whisper.cpp)
- **stablediffusion-ggml**: Stable Diffusion in Go with GGML Cpp backend
- **huggingface**: Hugging Face model integration
- **piper**: Text-to-speech synthesis Golang with C bindings using rhaspy/piper
- **bark-cpp**: Bark TTS models Golang with Cpp bindings
- **local-store**: Vector storage backend
#### C++ Backends (`cpp/`)
- **llama-cpp**: Llama.cpp integration
- **grpc**: GRPC utilities and helpers
## Hardware Acceleration Support
### CUDA (NVIDIA)
- **Versions**: CUDA 11.x, 12.x
- **Features**: cuBLAS, cuDNN, TensorRT optimization
- **Targets**: x86_64, ARM64 (Jetson)
### ROCm (AMD)
- **Features**: HIP, rocBLAS, MIOpen
- **Targets**: AMD GPUs with ROCm support
### Intel
- **Features**: oneAPI, Intel Extension for PyTorch
- **Targets**: Intel GPUs, XPUs, CPUs
### Vulkan
- **Features**: Cross-platform GPU acceleration
- **Targets**: Windows, Linux, Android, macOS
### Apple Silicon
- **Features**: MLX framework, Metal Performance Shaders
- **Targets**: M1/M2/M3 Macs
## Backend Registry (`index.yaml`)
The `index.yaml` file serves as a central registry for all available backends, providing:
- **Metadata**: Name, description, license, icons
- **Capabilities**: Hardware targets and optimization profiles
- **Tags**: Categorization for discovery
- **URLs**: Source code and documentation links
## Building Backends
### Prerequisites
- Docker with multi-architecture support
- Appropriate hardware drivers (CUDA, ROCm, etc.)
- Build tools (make, cmake, compilers)
### Build Commands
Example of build commands with Docker
```bash
# Build Python backend
docker build -f backend/Dockerfile.python \
--build-arg BACKEND=transformers \
--build-arg BUILD_TYPE=cublas12 \
--build-arg CUDA_MAJOR_VERSION=12 \
--build-arg CUDA_MINOR_VERSION=8 \
-t localai-backend-transformers .
# Build Go backend
docker build -f backend/Dockerfile.golang \
--build-arg BACKEND=whisper \
--build-arg BUILD_TYPE=cpu \
-t localai-backend-whisper .
# Build C++ backend
docker build -f backend/Dockerfile.llama-cpp \
--build-arg BACKEND=llama-cpp \
--build-arg BUILD_TYPE=cublas12 \
-t localai-backend-llama-cpp .
```
For ARM64/Mac builds, docker can't be used, and the makefile in the respective backend has to be used.
### Build Types
- **`cpu`**: CPU-only optimization
- **`cublas11`**: CUDA 11.x with cuBLAS
- **`cublas12`**: CUDA 12.x with cuBLAS
- **`hipblas`**: ROCm with rocBLAS
- **`intel`**: Intel oneAPI optimization
- **`vulkan`**: Vulkan-based acceleration
- **`metal`**: Apple Metal optimization
## Backend Development
### Creating a New Backend
1. **Choose Language**: Select Python, Go, or C++ based on requirements
2. **Implement Interface**: Implement the gRPC service defined in `backend.proto`
3. **Add Dependencies**: Create appropriate requirements files
4. **Configure Build**: Set up Dockerfile and build scripts
5. **Register Backend**: Add entry to `index.yaml`
6. **Test Integration**: Verify gRPC communication and functionality
### Backend Structure
```
backend-name/
├── backend.py/go/cpp # Main implementation
├── requirements.txt # Dependencies
├── Dockerfile # Build configuration
├── install.sh # Installation script
├── run.sh # Execution script
├── test.sh # Test script
└── README.md # Backend documentation
```
### Required gRPC Methods
At minimum, backends must implement:
- `Health()` - Service health check
- `LoadModel()` - Model loading and initialization
- `Predict()` - Main inference endpoint
- `Status()` - Backend status and metrics
## Integration with LocalAI Core
Backends communicate with LocalAI core through gRPC:
1. **Service Discovery**: Core discovers available backends
2. **Model Loading**: Core requests model loading via `LoadModel`
3. **Inference**: Core sends requests via `Predict` or specialized endpoints
4. **Streaming**: Core handles streaming responses for real-time generation
5. **Monitoring**: Core tracks backend health and performance
## Performance Optimization
### Memory Management
- **Model Caching**: Efficient model loading and caching
- **Batch Processing**: Optimize for multiple concurrent requests
- **Memory Pinning**: GPU memory optimization for CUDA/ROCm
### Hardware Utilization
- **Multi-GPU**: Support for tensor parallelism
- **Mixed Precision**: FP16/BF16 for memory efficiency
- **Kernel Fusion**: Optimized CUDA/ROCm kernels
## Troubleshooting
### Common Issues
1. **GRPC Connection**: Verify backend service is running and accessible
2. **Model Loading**: Check model paths and dependencies
3. **Hardware Detection**: Ensure appropriate drivers and libraries
4. **Memory Issues**: Monitor GPU memory usage and model sizes
## Contributing
When contributing to the backend system:
1. **Follow Protocol**: Implement the exact gRPC interface
2. **Add Tests**: Include comprehensive test coverage
3. **Document**: Provide clear usage examples
4. **Optimize**: Consider performance and resource usage
5. **Validate**: Test across different hardware targets

View File

@@ -242,7 +242,7 @@ message ModelOptions {
string Type = 49;
string FlashAttention = 56;
bool FlashAttention = 56;
bool NoKVOffload = 57;
string ModelPath = 59;
@@ -276,7 +276,6 @@ message TranscriptRequest {
string language = 3;
uint32 threads = 4;
bool translate = 5;
bool diarize = 6;
}
message TranscriptResult {
@@ -306,24 +305,22 @@ message GenerateImageRequest {
// Diffusers
string EnableParameters = 10;
int32 CLIPSkip = 11;
// Reference images for models that support them (e.g., Flux Kontext)
repeated string ref_images = 12;
}
message GenerateVideoRequest {
string prompt = 1;
string negative_prompt = 2; // Negative prompt for video generation
string start_image = 3; // Path or base64 encoded image for the start frame
string end_image = 4; // Path or base64 encoded image for the end frame
int32 width = 5;
int32 height = 6;
int32 num_frames = 7; // Number of frames to generate
int32 fps = 8; // Frames per second
int32 seed = 9;
float cfg_scale = 10; // Classifier-free guidance scale
int32 step = 11; // Number of inference steps
string dst = 12; // Output path for the generated video
string start_image = 2; // Path or base64 encoded image for the start frame
string end_image = 3; // Path or base64 encoded image for the end frame
int32 width = 4;
int32 height = 5;
int32 num_frames = 6; // Number of frames to generate
int32 fps = 7; // Frames per second
int32 seed = 8;
float cfg_scale = 9; // Classifier-free guidance scale
string dst = 10; // Output path for the generated video
}
message TTSRequest {

View File

@@ -1,6 +1,6 @@
LLAMA_VERSION?=fe4eb4f8ec25a1239b0923f1c7f87adf5730c3e5
LLAMA_REPO?=https://github.com/JohannesGaessler/llama.cpp
LLAMA_VERSION?=a0552c8beef74e843bb085c8ef0c63f9ed7a2b27
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
CMAKE_ARGS?=
BUILD_TYPE?=
@@ -32,8 +32,10 @@ else ifeq ($(BUILD_TYPE),hipblas)
ROCM_PATH ?= /opt/rocm
export CXX=$(ROCM_HOME)/llvm/bin/clang++
export CC=$(ROCM_HOME)/llvm/bin/clang
AMDGPU_TARGETS?=gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102,gfx1200,gfx1201
CMAKE_ARGS+=-DGGML_HIP=ON -DAMDGPU_TARGETS=$(AMDGPU_TARGETS)
# GPU_TARGETS ?= gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102
# AMDGPU_TARGETS ?= "$(GPU_TARGETS)"
CMAKE_ARGS+=-DGGML_HIP=ON
# CMAKE_ARGS+=-DGGML_HIP=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
else ifeq ($(BUILD_TYPE),vulkan)
CMAKE_ARGS+=-DGGML_VULKAN=1
else ifeq ($(OS),Darwin)

View File

@@ -53,9 +53,9 @@ static void start_llama_server(server_context& ctx_server) {
LOG_INF("%s: model loaded\n", __func__);
// print sample chat example to make it clear which template is used
// LOG_INF("%s: chat template, chat_template: %s, example_format: '%s'\n", __func__,
// common_chat_templates_source(ctx_server.chat_templates.get()),
// common_chat_format_example(ctx_server.chat_templates.get(), ctx_server.params_base.use_jinja).c_str(), ctx_server.params_base.default_template_kwargs);
LOG_INF("%s: chat template, chat_template: %s, example_format: '%s'\n", __func__,
common_chat_templates_source(ctx_server.chat_templates.get()),
common_chat_format_example(ctx_server.chat_templates.get(), ctx_server.params_base.use_jinja).c_str());
// Reset the chat templates
// TODO: We should make this configurable by respecting the option that is already present in LocalAI for vLLM
@@ -304,15 +304,7 @@ static void params_parse(const backend::ModelOptions* request,
}
params.use_mlock = request->mlock();
params.use_mmap = request->mmap();
if (request->flashattention() == "on" || request->flashattention() == "enabled") {
params.flash_attn_type = LLAMA_FLASH_ATTN_TYPE_ENABLED;
} else if (request->flashattention() == "off" || request->flashattention() == "disabled") {
params.flash_attn_type = LLAMA_FLASH_ATTN_TYPE_DISABLED;
} else if (request->flashattention() == "auto") {
params.flash_attn_type = LLAMA_FLASH_ATTN_TYPE_AUTO;
}
params.flash_attn = request->flashattention();
params.no_kv_offload = request->nokvoffload();
params.ctx_shift = false; // We control context-shifting in any case (and we disable it as it could just lead to infinite loops)
@@ -445,7 +437,24 @@ public:
}
}
// process files
mtmd::bitmaps bitmaps;
const bool has_mtmd = ctx_server.mctx != nullptr;
{
if (!has_mtmd && !files.empty()) {
throw std::runtime_error("This server does not support multimodal");
}
for (auto & file : files) {
mtmd::bitmap bmp(mtmd_helper_bitmap_init_from_buf(ctx_server.mctx, file.data(), file.size()));
if (!bmp.ptr) {
throw std::runtime_error("Failed to load image/audio");
}
// calculate bitmap hash (for KV caching)
std::string hash = fnv_hash(bmp.data(), bmp.n_bytes());
bmp.set_id(hash.c_str());
bitmaps.entries.push_back(std::move(bmp));
}
}
// process prompt
std::vector<server_tokens> inputs;
@@ -455,10 +464,32 @@ public:
if (has_mtmd) {
// multimodal
inputs.push_back(process_mtmd_prompt(ctx_server.mctx, prompt.get<std::string>(), files));
std::string prompt_str = prompt.get<std::string>();
mtmd_input_text inp_txt = {
prompt_str.c_str(),
/* add_special */ true,
/* parse_special */ true,
};
mtmd::input_chunks chunks(mtmd_input_chunks_init());
auto bitmaps_c_ptr = bitmaps.c_ptr();
int32_t tokenized = mtmd_tokenize(ctx_server.mctx,
chunks.ptr.get(),
&inp_txt,
bitmaps_c_ptr.data(),
bitmaps_c_ptr.size());
if (tokenized != 0) {
throw std::runtime_error("Failed to tokenize prompt");
}
server_tokens tmp(chunks, true);
inputs.push_back(std::move(tmp));
} else {
// Everything else, including multimodal completions.
inputs = tokenize_input_prompts(ctx_server.vocab, ctx_server.mctx, prompt, true, true);
// non-multimodal version
auto tokenized_prompts = tokenize_input_prompts(ctx_server.vocab, prompt, true, true);
for (auto & p : tokenized_prompts) {
auto tmp = server_tokens(p, ctx_server.mctx != nullptr);
inputs.push_back(std::move(tmp));
}
}
tasks.reserve(inputs.size());
@@ -599,7 +630,23 @@ public:
}
// process files
mtmd::bitmaps bitmaps;
const bool has_mtmd = ctx_server.mctx != nullptr;
{
if (!has_mtmd && !files.empty()) {
throw std::runtime_error("This server does not support multimodal");
}
for (auto & file : files) {
mtmd::bitmap bmp(mtmd_helper_bitmap_init_from_buf(ctx_server.mctx, file.data(), file.size()));
if (!bmp.ptr) {
throw std::runtime_error("Failed to load image/audio");
}
// calculate bitmap hash (for KV caching)
std::string hash = fnv_hash(bmp.data(), bmp.n_bytes());
bmp.set_id(hash.c_str());
bitmaps.entries.push_back(std::move(bmp));
}
}
// process prompt
std::vector<server_tokens> inputs;
@@ -610,10 +657,33 @@ public:
if (has_mtmd) {
// multimodal
inputs.push_back(process_mtmd_prompt(ctx_server.mctx, prompt.get<std::string>(), files));
std::string prompt_str = prompt.get<std::string>();
mtmd_input_text inp_txt = {
prompt_str.c_str(),
/* add_special */ true,
/* parse_special */ true,
};
mtmd::input_chunks chunks(mtmd_input_chunks_init());
auto bitmaps_c_ptr = bitmaps.c_ptr();
int32_t tokenized = mtmd_tokenize(ctx_server.mctx,
chunks.ptr.get(),
&inp_txt,
bitmaps_c_ptr.data(),
bitmaps_c_ptr.size());
if (tokenized != 0) {
std::cout << "[PREDICT] Failed to tokenize prompt" << std::endl;
throw std::runtime_error("Failed to tokenize prompt");
}
server_tokens tmp(chunks, true);
inputs.push_back(std::move(tmp));
} else {
// Everything else, including multimodal completions.
inputs = tokenize_input_prompts(ctx_server.vocab, ctx_server.mctx, prompt, true, true);
// non-multimodal version
auto tokenized_prompts = tokenize_input_prompts(ctx_server.vocab, prompt, true, true);
for (auto & p : tokenized_prompts) {
auto tmp = server_tokens(p, ctx_server.mctx != nullptr);
inputs.push_back(std::move(tmp));
}
}
tasks.reserve(inputs.size());
@@ -704,7 +774,7 @@ public:
json prompt = body.at("prompt");
auto tokenized_prompts = tokenize_input_prompts(ctx_server.vocab, ctx_server.mctx, prompt, true, true);
auto tokenized_prompts = tokenize_input_prompts(ctx_server.vocab, prompt, true, true);
for (const auto & tokens : tokenized_prompts) {
// this check is necessary for models that do not add BOS token to the input
if (tokens.empty()) {
@@ -723,7 +793,7 @@ public:
task.id = ctx_server.queue_tasks.get_new_id();
task.index = i;
task.prompt_tokens = std::move(tokenized_prompts[i]);
task.prompt_tokens = server_tokens(tokenized_prompts[i], ctx_server.mctx != nullptr);
// OAI-compat
task.params.oaicompat = OAICOMPAT_TYPE_EMBEDDING;
@@ -779,10 +849,8 @@ public:
}
// Tokenize the query
auto tokenized_query = tokenize_input_prompts(ctx_server.vocab, ctx_server.mctx, request->query(), /* add_special */ false, true);
if (tokenized_query.size() != 1) {
return grpc::Status(grpc::StatusCode::INVALID_ARGUMENT, "\"query\" must contain only a single prompt");
}
llama_tokens tokenized_query = tokenize_input_prompts(ctx_server.vocab, request->query(), /* add_special */ false, true)[0];
// Create and queue the task
json responses = json::array();
bool error = false;
@@ -794,14 +862,14 @@ public:
documents.push_back(request->documents(i));
}
auto tokenized_docs = tokenize_input_prompts(ctx_server.vocab, ctx_server.mctx, documents, /* add_special */ false, true);
auto tokenized_docs = tokenize_input_prompts(ctx_server.vocab, documents, /* add_special */ false, true);
tasks.reserve(tokenized_docs.size());
for (size_t i = 0; i < tokenized_docs.size(); i++) {
auto tmp = format_rerank(ctx_server.vocab, tokenized_query[0], tokenized_docs[i]);
auto tmp = format_rerank(ctx_server.vocab, tokenized_query, tokenized_docs[i]);
server_task task = server_task(SERVER_TASK_TYPE_RERANK);
task.id = ctx_server.queue_tasks.get_new_id();
task.index = i;
task.prompt_tokens = std::move(tmp);
task.prompt_tokens = server_tokens(tmp, ctx_server.mctx != nullptr);
tasks.push_back(std::move(task));
}

View File

@@ -42,8 +42,7 @@ fi
# Extend ld library path with the dir where this script is located/lib
if [ "$(uname)" == "Darwin" ]; then
export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
#export DYLD_FALLBACK_LIBRARY_PATH=$CURDIR/lib:$DYLD_FALLBACK_LIBRARY_PATH
DYLD_FALLBACK_LIBRARY_PATH=$CURDIR/lib:$DYLD_FALLBACK_LIBRARY_PATH
else
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
fi
@@ -58,5 +57,5 @@ fi
echo "Using binary: $BINARY"
exec $CURDIR/$BINARY "$@"
# We should never reach this point, however just in case we do, run fallback
# In case we fail execing, just run fallback
exec $CURDIR/llama-cpp-fallback "$@"

View File

@@ -1,6 +0,0 @@
package/
sources/
.cache/
build/
libgosd.so
stablediffusion-ggml

View File

@@ -1,20 +0,0 @@
cmake_minimum_required(VERSION 3.12)
project(gosd LANGUAGES C CXX)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
add_subdirectory(./sources/stablediffusion-ggml.cpp)
add_library(gosd MODULE gosd.cpp)
target_link_libraries(gosd PRIVATE stable-diffusion ggml)
if(CMAKE_CXX_COMPILER_ID MATCHES "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 9.0)
target_link_libraries(gosd PRIVATE stdc++fs)
endif()
target_include_directories(gosd PUBLIC
stable-diffusion.cpp
stable-diffusion.cpp/thirdparty
)
set_property(TARGET gosd PROPERTY CXX_STANDARD 17)
set_target_properties(gosd PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})

View File

@@ -1,16 +1,28 @@
INCLUDE_PATH := $(abspath ./)
LIBRARY_PATH := $(abspath ./)
AR?=ar
CMAKE_ARGS?=
BUILD_TYPE?=
NATIVE?=false
CUDA_LIBPATH?=/usr/local/cuda/lib64/
ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh
# keep standard at C11 and C++11
CXXFLAGS = -I. -I$(INCLUDE_PATH)/sources/stablediffusion-ggml.cpp/thirdparty -I$(INCLUDE_PATH)/sources/stablediffusion-ggml.cpp/ggml/include -I$(INCLUDE_PATH)/sources/stablediffusion-ggml.cpp -O3 -DNDEBUG -std=c++17 -fPIC
GOCMD?=go
CGO_LDFLAGS?=
# Avoid parent make file overwriting CGO_LDFLAGS which is needed for hipblas
CGO_LDFLAGS_SYCL=
GO_TAGS?=
JOBS?=$(shell nproc --ignore=1)
LD_FLAGS?=
# stablediffusion.cpp (ggml)
STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
STABLEDIFFUSION_GGML_VERSION?=b0179181069254389ccad604e44f17a2c25b4094
STABLEDIFFUSION_GGML_VERSION?=5900ef6605c6fbf7934239f795c13c97bc993853
CMAKE_ARGS+=-DGGML_MAX_NAME=128
# Disable Shared libs as we are linking on static gRPC and we can't mix shared and static
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF -DGGML_MAX_NAME=128 -DSD_USE_SYSTEM_GGML=OFF
ifeq ($(NATIVE),false)
CMAKE_ARGS+=-DGGML_NATIVE=OFF
@@ -19,6 +31,7 @@ endif
# If build type is cublas, then we set -DGGML_CUDA=ON to CMAKE_ARGS automatically
ifeq ($(BUILD_TYPE),cublas)
CMAKE_ARGS+=-DSD_CUDA=ON -DGGML_CUDA=ON
CGO_LDFLAGS+=-lcublas -lcudart -L$(CUDA_LIBPATH) -L$(CUDA_LIBPATH)/stubs/ -lcuda
# If build type is openblas then we set -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
# to CMAKE_ARGS automatically
else ifeq ($(BUILD_TYPE),openblas)
@@ -29,14 +42,18 @@ else ifeq ($(BUILD_TYPE),clblas)
# If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++
else ifeq ($(BUILD_TYPE),hipblas)
CMAKE_ARGS+=-DSD_HIPBLAS=ON -DGGML_HIPBLAS=ON
# If it's OSX, DO NOT embed the metal library - -DGGML_METAL_EMBED_LIBRARY=ON requires further investigation
# But if it's OSX without metal, disable it here
else ifeq ($(BUILD_TYPE),vulkan)
CMAKE_ARGS+=-DSD_VULKAN=ON -DGGML_VULKAN=ON
CGO_LDFLAGS+=-lvulkan
else ifeq ($(OS),Darwin)
ifneq ($(BUILD_TYPE),metal)
CMAKE_ARGS+=-DSD_METAL=OFF -DGGML_METAL=OFF
else
CMAKE_ARGS+=-DSD_METAL=ON -DGGML_METAL=ON
CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON
TARGET+=--target ggml-metal
endif
endif
@@ -46,6 +63,12 @@ ifeq ($(BUILD_TYPE),sycl_f16)
-DCMAKE_CXX_COMPILER=icpx \
-DSD_SYCL=ON \
-DGGML_SYCL_F16=ON
export CC=icx
export CXX=icpx
CGO_LDFLAGS_SYCL += -fsycl -L${DNNLROOT}/lib -ldnnl ${MKLROOT}/lib/intel64/libmkl_sycl.a -fiopenmp -fopenmp-targets=spir64 -lOpenCL
CGO_LDFLAGS_SYCL += $(shell pkg-config --libs mkl-static-lp64-gomp)
CGO_CXXFLAGS += -fiopenmp -fopenmp-targets=spir64
CGO_CXXFLAGS += $(shell pkg-config --cflags mkl-static-lp64-gomp )
endif
ifeq ($(BUILD_TYPE),sycl_f32)
@@ -53,29 +76,78 @@ ifeq ($(BUILD_TYPE),sycl_f32)
-DCMAKE_C_COMPILER=icx \
-DCMAKE_CXX_COMPILER=icpx \
-DSD_SYCL=ON
export CC=icx
export CXX=icpx
CGO_LDFLAGS_SYCL += -fsycl -L${DNNLROOT}/lib -ldnnl ${MKLROOT}/lib/intel64/libmkl_sycl.a -fiopenmp -fopenmp-targets=spir64 -lOpenCL
CGO_LDFLAGS_SYCL += $(shell pkg-config --libs mkl-static-lp64-gomp)
CGO_CXXFLAGS += -fiopenmp -fopenmp-targets=spir64
CGO_CXXFLAGS += $(shell pkg-config --cflags mkl-static-lp64-gomp )
endif
# warnings
# CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function
# Find all .a archives in ARCHIVE_DIR
# (ggml can have different backends cpu, cuda, etc., each backend generates a .a archive)
GGML_ARCHIVE_DIR := build/ggml/src/
ALL_ARCHIVES := $(shell find $(GGML_ARCHIVE_DIR) -type f -name '*.a')
ALL_OBJS := $(shell find $(GGML_ARCHIVE_DIR) -type f -name '*.o')
# Name of the single merged library
COMBINED_LIB := libggmlall.a
# Instead of using the archives generated by GGML, use the object files directly to avoid overwriting objects with the same base name
$(COMBINED_LIB): $(ALL_ARCHIVES)
@echo "Merging all .o into $(COMBINED_LIB): $(ALL_OBJS)"
rm -f $@
ar -qc $@ $(ALL_OBJS)
# Ensure we have a proper index
ranlib $@
build/libstable-diffusion.a:
@echo "Building SD with $(BUILD_TYPE) build type and $(CMAKE_ARGS)"
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
+bash -c "source $(ONEAPI_VARS); \
mkdir -p build && \
cd build && \
cmake $(CMAKE_ARGS) ../sources/stablediffusion-ggml.cpp && \
cmake --build . --config Release"
else
mkdir -p build && \
cd build && \
cmake $(CMAKE_ARGS) ../sources/stablediffusion-ggml.cpp && \
cmake --build . --config Release
endif
$(MAKE) $(COMBINED_LIB)
gosd.o:
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
+bash -c "source $(ONEAPI_VARS); \
$(CXX) $(CXXFLAGS) gosd.cpp -o gosd.o -c"
else
$(CXX) $(CXXFLAGS) gosd.cpp -o gosd.o -c
endif
## stablediffusion (ggml)
sources/stablediffusion-ggml.cpp:
git clone --recursive $(STABLEDIFFUSION_GGML_REPO) sources/stablediffusion-ggml.cpp && \
cd sources/stablediffusion-ggml.cpp && \
git checkout $(STABLEDIFFUSION_GGML_VERSION) && \
git submodule update --init --recursive --depth 1 --single-branch
libgosd.so: sources/stablediffusion-ggml.cpp CMakeLists.txt gosd.cpp gosd.h
mkdir -p build && \
cd build && \
cmake .. $(CMAKE_ARGS) && \
cmake --build . --config Release -j$(JOBS) && \
cd .. && \
mv build/libgosd.so ./
libsd.a: sources/stablediffusion-ggml.cpp build/libstable-diffusion.a gosd.o
cp $(INCLUDE_PATH)/build/libstable-diffusion.a ./libsd.a
$(AR) rcs libsd.a gosd.o
stablediffusion-ggml: main.go gosd.go libgosd.so
CGO_ENABLED=0 $(GOCMD) build -tags "$(GO_TAGS)" -o stablediffusion-ggml ./
stablediffusion-ggml: libsd.a
CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_SYCL)" C_INCLUDE_PATH="$(INCLUDE_PATH)" LIBRARY_PATH="$(LIBRARY_PATH)" \
CC="$(CC)" CXX="$(CXX)" CGO_CXXFLAGS="$(CGO_CXXFLAGS)" \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o stablediffusion-ggml ./
package: stablediffusion-ggml
package:
bash package.sh
build: package
build: stablediffusion-ggml package
clean:
rm -rf libgosd.so build stablediffusion-ggml package sources
rm -rf gosd.o libsd.a build $(COMBINED_LIB)

View File

@@ -1,4 +1,3 @@
#include <cstdint>
#define GGML_MAX_NAME 128
#include <stdio.h>
@@ -44,7 +43,7 @@ const char* sample_method_str[] = {
};
// Names of the sigma schedule overrides, same order as sample_schedule in stable-diffusion.h
const char* schedulers[] = {
const char* schedule_str[] = {
"default",
"discrete",
"karras",
@@ -54,13 +53,11 @@ const char* schedulers[] = {
};
sd_ctx_t* sd_c;
// Moved from the context (load time) to generation time params
scheduler_t scheduler = scheduler_t::DEFAULT;
sample_method_t sample_method;
// Copied from the upstream CLI
static void sd_log_cb(enum sd_log_level_t level, const char* log, void* data) {
void sd_log_cb(enum sd_log_level_t level, const char* log, void* data) {
//SDParams* params = (SDParams*)data;
const char* level_str;
@@ -91,33 +88,33 @@ static void sd_log_cb(enum sd_log_level_t level, const char* log, void* data) {
fflush(stderr);
}
int load_model(const char *model, char *model_path, char* options[], int threads, int diff) {
fprintf (stderr, "Loading model: %p=%s\n", model, model);
int load_model(char *model, char *model_path, char* options[], int threads, int diff) {
fprintf (stderr, "Loading model!\n");
sd_set_log_callback(sd_log_cb, NULL);
const char *stableDiffusionModel = "";
char *stableDiffusionModel = "";
if (diff == 1 ) {
stableDiffusionModel = model;
model = "";
}
// decode options. Options are in form optname:optvale, or if booleans only optname.
const char *clip_l_path = "";
const char *clip_g_path = "";
const char *t5xxl_path = "";
const char *vae_path = "";
const char *scheduler_str = "";
const char *sampler = "";
char *clip_l_path = "";
char *clip_g_path = "";
char *t5xxl_path = "";
char *vae_path = "";
char *scheduler = "";
char *sampler = "";
char *lora_dir = model_path;
bool lora_dir_allocated = false;
fprintf(stderr, "parsing options: %p\n", options);
fprintf(stderr, "parsing options\n");
// If options is not NULL, parse options
for (int i = 0; options[i] != NULL; i++) {
const char *optname = strtok(options[i], ":");
const char *optval = strtok(NULL, ":");
char *optname = strtok(options[i], ":");
char *optval = strtok(NULL, ":");
if (optval == NULL) {
optval = "true";
}
@@ -135,7 +132,7 @@ int load_model(const char *model, char *model_path, char* options[], int threads
vae_path = optval;
}
if (!strcmp(optname, "scheduler")) {
scheduler_str = optval;
scheduler = optval;
}
if (!strcmp(optname, "sampler")) {
sampler = optval;
@@ -150,8 +147,7 @@ int load_model(const char *model, char *model_path, char* options[], int threads
lora_dir_allocated = true;
fprintf(stderr, "Lora dir resolved to: %s\n", lora_dir);
} else {
lora_dir = strdup(optval);
lora_dir_allocated = true;
lora_dir = optval;
fprintf(stderr, "No model path provided, using lora dir as-is: %s\n", lora_dir);
}
}
@@ -172,13 +168,22 @@ int load_model(const char *model, char *model_path, char* options[], int threads
}
sample_method = (sample_method_t)sample_method_found;
int schedule_found = -1;
for (int d = 0; d < SCHEDULE_COUNT; d++) {
if (!strcmp(scheduler_str, schedulers[d])) {
scheduler = (scheduler_t)d;
fprintf (stderr, "Found scheduler: %s\n", scheduler_str);
if (!strcmp(scheduler, schedule_str[d])) {
schedule_found = d;
fprintf (stderr, "Found scheduler: %s\n", scheduler);
}
}
if (schedule_found == -1) {
fprintf (stderr, "Invalid scheduler! using DEFAULT\n");
schedule_found = DEFAULT;
}
schedule_t schedule = (schedule_t)schedule_found;
fprintf (stderr, "Creating context\n");
sd_ctx_params_t ctx_params;
sd_ctx_params_init(&ctx_params);
@@ -198,6 +203,7 @@ int load_model(const char *model, char *model_path, char* options[], int threads
ctx_params.free_params_immediately = false;
ctx_params.n_threads = threads;
ctx_params.rng_type = STD_DEFAULT_RNG;
ctx_params.schedule = schedule;
sd_ctx_t* sd_ctx = new_sd_ctx(&ctx_params);
if (sd_ctx == NULL) {
@@ -220,7 +226,7 @@ int load_model(const char *model, char *model_path, char* options[], int threads
return 0;
}
int gen_image(char *text, char *negativeText, int width, int height, int steps, int64_t seed, char *dst, float cfg_scale, char *src_image, float strength, char *mask_image, char **ref_images, int ref_images_count) {
int gen_image(char *text, char *negativeText, int width, int height, int steps, int seed , char *dst, float cfg_scale, char *src_image, float strength, char *mask_image, char **ref_images, int ref_images_count) {
sd_image_t* results;
@@ -233,28 +239,27 @@ int gen_image(char *text, char *negativeText, int width, int height, int steps,
p.prompt = text;
p.negative_prompt = negativeText;
p.sample_params.guidance.txt_cfg = cfg_scale;
p.sample_params.guidance.slg.layers = skip_layers.data();
p.sample_params.guidance.slg.layer_count = skip_layers.size();
p.guidance.txt_cfg = cfg_scale;
p.guidance.slg.layers = skip_layers.data();
p.guidance.slg.layer_count = skip_layers.size();
p.width = width;
p.height = height;
p.sample_params.sample_method = sample_method;
p.sample_params.sample_steps = steps;
p.sample_method = sample_method;
p.sample_steps = steps;
p.seed = seed;
p.input_id_images_path = "";
p.sample_params.scheduler = scheduler;
// Handle input image for img2img
bool has_input_image = (src_image != NULL && strlen(src_image) > 0);
bool has_mask_image = (mask_image != NULL && strlen(mask_image) > 0);
uint8_t* input_image_buffer = NULL;
uint8_t* mask_image_buffer = NULL;
std::vector<uint8_t> default_mask_image_vec;
if (has_input_image) {
fprintf(stderr, "Loading input image: %s\n", src_image);
int c = 0;
int img_width = 0;
int img_height = 0;
@@ -268,29 +273,29 @@ int gen_image(char *text, char *negativeText, int width, int height, int steps,
free(input_image_buffer);
return 1;
}
// Resize input image if dimensions don't match
if (img_width != width || img_height != height) {
fprintf(stderr, "Resizing input image from %dx%d to %dx%d\n", img_width, img_height, width, height);
uint8_t* resized_image_buffer = (uint8_t*)malloc(height * width * 3);
if (resized_image_buffer == NULL) {
fprintf(stderr, "Failed to allocate memory for resized image\n");
free(input_image_buffer);
return 1;
}
stbir_resize(input_image_buffer, img_width, img_height, 0,
resized_image_buffer, width, height, 0, STBIR_TYPE_UINT8,
3, STBIR_ALPHA_CHANNEL_NONE, 0,
STBIR_EDGE_CLAMP, STBIR_EDGE_CLAMP,
STBIR_FILTER_BOX, STBIR_FILTER_BOX,
STBIR_COLORSPACE_SRGB, nullptr);
free(input_image_buffer);
input_image_buffer = resized_image_buffer;
}
p.init_image = {(uint32_t)width, (uint32_t)height, 3, input_image_buffer};
p.strength = strength;
fprintf(stderr, "Using img2img with strength: %.2f\n", strength);
@@ -299,11 +304,11 @@ int gen_image(char *text, char *negativeText, int width, int height, int steps,
p.init_image = {(uint32_t)width, (uint32_t)height, 3, NULL};
p.strength = 0.0f;
}
// Handle mask image for inpainting
if (has_mask_image) {
fprintf(stderr, "Loading mask image: %s\n", mask_image);
int c = 0;
int mask_width = 0;
int mask_height = 0;
@@ -313,11 +318,11 @@ int gen_image(char *text, char *negativeText, int width, int height, int steps,
if (input_image_buffer) free(input_image_buffer);
return 1;
}
// Resize mask if dimensions don't match
if (mask_width != width || mask_height != height) {
fprintf(stderr, "Resizing mask image from %dx%d to %dx%d\n", mask_width, mask_height, width, height);
uint8_t* resized_mask_buffer = (uint8_t*)malloc(height * width);
if (resized_mask_buffer == NULL) {
fprintf(stderr, "Failed to allocate memory for resized mask\n");
@@ -325,18 +330,18 @@ int gen_image(char *text, char *negativeText, int width, int height, int steps,
if (input_image_buffer) free(input_image_buffer);
return 1;
}
stbir_resize(mask_image_buffer, mask_width, mask_height, 0,
resized_mask_buffer, width, height, 0, STBIR_TYPE_UINT8,
1, STBIR_ALPHA_CHANNEL_NONE, 0,
STBIR_EDGE_CLAMP, STBIR_EDGE_CLAMP,
STBIR_FILTER_BOX, STBIR_FILTER_BOX,
STBIR_COLORSPACE_SRGB, nullptr);
free(mask_image_buffer);
mask_image_buffer = resized_mask_buffer;
}
p.mask_image = {(uint32_t)width, (uint32_t)height, 1, mask_image_buffer};
fprintf(stderr, "Using inpainting with mask\n");
} else {
@@ -348,17 +353,17 @@ int gen_image(char *text, char *negativeText, int width, int height, int steps,
// Handle reference images
std::vector<sd_image_t> ref_images_vec;
std::vector<uint8_t*> ref_image_buffers;
if (ref_images_count > 0 && ref_images != NULL) {
fprintf(stderr, "Loading %d reference images\n", ref_images_count);
for (int i = 0; i < ref_images_count; i++) {
if (ref_images[i] == NULL || strlen(ref_images[i]) == 0) {
continue;
}
fprintf(stderr, "Loading reference image %d: %s\n", i + 1, ref_images[i]);
int c = 0;
int ref_width = 0;
int ref_height = 0;
@@ -372,33 +377,33 @@ int gen_image(char *text, char *negativeText, int width, int height, int steps,
free(ref_image_buffer);
continue;
}
// Resize reference image if dimensions don't match
if (ref_width != width || ref_height != height) {
fprintf(stderr, "Resizing reference image from %dx%d to %dx%d\n", ref_width, ref_height, width, height);
uint8_t* resized_ref_buffer = (uint8_t*)malloc(height * width * 3);
if (resized_ref_buffer == NULL) {
fprintf(stderr, "Failed to allocate memory for resized reference image\n");
free(ref_image_buffer);
continue;
}
stbir_resize(ref_image_buffer, ref_width, ref_height, 0,
resized_ref_buffer, width, height, 0, STBIR_TYPE_UINT8,
3, STBIR_ALPHA_CHANNEL_NONE, 0,
STBIR_EDGE_CLAMP, STBIR_EDGE_CLAMP,
STBIR_FILTER_BOX, STBIR_FILTER_BOX,
STBIR_COLORSPACE_SRGB, nullptr);
free(ref_image_buffer);
ref_image_buffer = resized_ref_buffer;
}
ref_image_buffers.push_back(ref_image_buffer);
ref_images_vec.push_back({(uint32_t)width, (uint32_t)height, 3, ref_image_buffer});
}
if (!ref_images_vec.empty()) {
p.ref_images = ref_images_vec.data();
p.ref_images_count = ref_images_vec.size();
@@ -449,12 +454,12 @@ int gen_image(char *text, char *negativeText, int width, int height, int steps,
for (auto buffer : ref_image_buffers) {
if (buffer) free(buffer);
}
fprintf (stderr, "gen_image is done: %s", dst);
fprintf (stderr, "gen_image is done", dst);
return 0;
}
int unload() {
free_sd_ctx(sd_c);
return 0;
}

View File

@@ -1,10 +1,15 @@
package main
// #cgo CXXFLAGS: -I${SRCDIR}/sources/stablediffusion-ggml.cpp/thirdparty -I${SRCDIR}/sources/stablediffusion-ggml.cpp -I${SRCDIR}/sources/stablediffusion-ggml.cpp/ggml/include
// #cgo LDFLAGS: -L${SRCDIR}/ -lsd -lstdc++ -lm -lggmlall -lgomp
// #include <gosd.h>
// #include <stdlib.h>
import "C"
import (
"fmt"
"os"
"path/filepath"
"runtime"
"strings"
"unsafe"
@@ -20,34 +25,25 @@ type SDGGML struct {
cfgScale float32
}
var (
LoadModel func(model, model_apth string, options []uintptr, threads int32, diff int) int
GenImage func(text, negativeText string, width, height, steps int, seed int64, dst string, cfgScale float32, srcImage string, strength float32, maskImage string, refImages []string, refImagesCount int) int
)
// Copied from Purego internal/strings
// TODO: We should upstream sending []string
func hasSuffix(s, suffix string) bool {
return len(s) >= len(suffix) && s[len(s)-len(suffix):] == suffix
}
func CString(name string) *byte {
if hasSuffix(name, "\x00") {
return &(*(*[]byte)(unsafe.Pointer(&name)))[0]
}
b := make([]byte, len(name)+1)
copy(b, name)
return &b[0]
}
func (sd *SDGGML) Load(opts *pb.ModelOptions) error {
sd.threads = int(opts.Threads)
modelPath := opts.ModelPath
modelFile := opts.ModelFile
modelPathC := modelPath
modelFile := C.CString(opts.ModelFile)
defer C.free(unsafe.Pointer(modelFile))
modelPathC := C.CString(modelPath)
defer C.free(unsafe.Pointer(modelPathC))
var options **C.char
// prepare the options array to pass to C
size := C.size_t(unsafe.Sizeof((*C.char)(nil)))
length := C.size_t(len(opts.Options))
options = (**C.char)(C.malloc((length + 1) * size))
view := (*[1 << 30]*C.char)(unsafe.Pointer(options))[0 : len(opts.Options)+1 : len(opts.Options)+1]
var diffusionModel int
@@ -72,55 +68,81 @@ func (sd *SDGGML) Load(opts *pb.ModelOptions) error {
fmt.Fprintf(os.Stderr, "Options: %+v\n", oo)
// At the time of writing Purego doesn't recurse into slices and convert Go strings to pointers so we need to do that
var keepAlive []any
options := make([]uintptr, len(oo), len(oo)+1)
for i, op := range oo {
bytep := CString(op)
options[i] = uintptr(unsafe.Pointer(bytep))
keepAlive = append(keepAlive, bytep)
for i, x := range oo {
view[i] = C.CString(x)
}
view[len(oo)] = nil
sd.cfgScale = opts.CFGScale
ret := LoadModel(modelFile, modelPathC, options, opts.Threads, diffusionModel)
ret := C.load_model(modelFile, modelPathC, options, C.int(opts.Threads), C.int(diffusionModel))
if ret != 0 {
return fmt.Errorf("could not load model")
}
runtime.KeepAlive(keepAlive)
return nil
}
func (sd *SDGGML) GenerateImage(opts *pb.GenerateImageRequest) error {
t := opts.PositivePrompt
dst := opts.Dst
negative := opts.NegativePrompt
srcImage := opts.Src
t := C.CString(opts.PositivePrompt)
defer C.free(unsafe.Pointer(t))
var maskImage string
dst := C.CString(opts.Dst)
defer C.free(unsafe.Pointer(dst))
negative := C.CString(opts.NegativePrompt)
defer C.free(unsafe.Pointer(negative))
// Handle source image path
var srcImage *C.char
if opts.Src != "" {
srcImage = C.CString(opts.Src)
defer C.free(unsafe.Pointer(srcImage))
}
// Handle mask image path
var maskImage *C.char
if opts.EnableParameters != "" {
// Parse EnableParameters for mask path if provided
// This is a simple approach - in a real implementation you might want to parse JSON
if strings.Contains(opts.EnableParameters, "mask:") {
parts := strings.Split(opts.EnableParameters, "mask:")
if len(parts) > 1 {
maskPath := strings.TrimSpace(parts[1])
if maskPath != "" {
maskImage = maskPath
maskImage = C.CString(maskPath)
defer C.free(unsafe.Pointer(maskImage))
}
}
}
}
refImagesCount := len(opts.RefImages)
refImages := make([]string, refImagesCount, refImagesCount+1)
copy(refImages, opts.RefImages)
*(*uintptr)(unsafe.Add(unsafe.Pointer(&refImages), refImagesCount)) = 0
// Handle reference images
var refImages **C.char
var refImagesCount C.int
if len(opts.RefImages) > 0 {
refImagesCount = C.int(len(opts.RefImages))
// Allocate array of C strings
size := C.size_t(unsafe.Sizeof((*C.char)(nil)))
refImages = (**C.char)(C.malloc((C.size_t(len(opts.RefImages)) + 1) * size))
view := (*[1 << 30]*C.char)(unsafe.Pointer(refImages))[0 : len(opts.RefImages)+1 : len(opts.RefImages)+1]
for i, refImagePath := range opts.RefImages {
view[i] = C.CString(refImagePath)
defer C.free(unsafe.Pointer(view[i]))
}
view[len(opts.RefImages)] = nil
}
// Default strength for img2img (0.75 is a good default)
strength := float32(0.75)
strength := C.float(0.75)
if opts.Src != "" {
// If we have a source image, use img2img mode
// You could also parse strength from EnableParameters if needed
strength = C.float(0.75)
}
ret := GenImage(t, negative, int(opts.Width), int(opts.Height), int(opts.Step), int64(opts.Seed), dst, sd.cfgScale, srcImage, strength, maskImage, refImages, refImagesCount)
ret := C.gen_image(t, negative, C.int(opts.Width), C.int(opts.Height), C.int(opts.Step), C.int(opts.Seed), dst, C.float(sd.cfgScale), srcImage, strength, maskImage, refImages, refImagesCount)
if ret != 0 {
return fmt.Errorf("inference failed")
}

View File

@@ -1,8 +1,8 @@
#ifdef __cplusplus
extern "C" {
#endif
int load_model(const char *model, char *model_path, char* options[], int threads, int diffusionModel);
int gen_image(char *text, char *negativeText, int width, int height, int steps, int64_t seed, char *dst, float cfg_scale, char *src_image, float strength, char *mask_image, char **ref_images, int ref_images_count);
int load_model(char *model, char *model_path, char* options[], int threads, int diffusionModel);
int gen_image(char *text, char *negativeText, int width, int height, int steps, int seed, char *dst, float cfg_scale, char *src_image, float strength, char *mask_image, char **ref_images, int ref_images_count);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -1,9 +1,9 @@
package main
// Note: this is started internally by LocalAI and a server is allocated for each model
import (
"flag"
"github.com/ebitengine/purego"
grpc "github.com/mudler/LocalAI/pkg/grpc"
)
@@ -12,14 +12,6 @@ var (
)
func main() {
gosd, err := purego.Dlopen("./libgosd.so", purego.RTLD_NOW|purego.RTLD_GLOBAL)
if err != nil {
panic(err)
}
purego.RegisterLibFunc(&LoadModel, gosd, "load_model")
purego.RegisterLibFunc(&GenImage, gosd, "gen_image")
flag.Parse()
if err := grpc.StartServer(*addr, &SDGGML{}); err != nil {

View File

@@ -10,9 +10,8 @@ CURDIR=$(dirname "$(realpath $0)")
# Create lib directory
mkdir -p $CURDIR/package/lib
cp -avf $CURDIR/libgosd.so $CURDIR/package/
cp -avf $CURDIR/stablediffusion-ggml $CURDIR/package/
cp -fv $CURDIR/run.sh $CURDIR/package/
cp -avrf $CURDIR/stablediffusion-ggml $CURDIR/package/
cp -rfv $CURDIR/run.sh $CURDIR/package/
# Detect architecture and copy appropriate libraries
if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
@@ -43,13 +42,11 @@ elif [ -f "/lib/ld-linux-aarch64.so.1" ]; then
cp -arfLv /lib/aarch64-linux-gnu/libdl.so.2 $CURDIR/package/lib/libdl.so.2
cp -arfLv /lib/aarch64-linux-gnu/librt.so.1 $CURDIR/package/lib/librt.so.1
cp -arfLv /lib/aarch64-linux-gnu/libpthread.so.0 $CURDIR/package/lib/libpthread.so.0
elif [ $(uname -s) = "Darwin" ]; then
echo "Detected Darwin"
else
echo "Error: Could not detect architecture"
exit 1
fi
echo "Packaging completed successfully"
echo "Packaging completed successfully"
ls -liah $CURDIR/package/
ls -liah $CURDIR/package/lib/
ls -liah $CURDIR/package/lib/

View File

@@ -1,7 +0,0 @@
.cache/
sources/
build/
package/
whisper
libgowhisper.so

View File

@@ -1,16 +0,0 @@
cmake_minimum_required(VERSION 3.12)
project(gowhisper LANGUAGES C CXX)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
add_subdirectory(./sources/whisper.cpp)
add_library(gowhisper MODULE gowhisper.cpp)
target_link_libraries(gowhisper PRIVATE whisper ggml)
if(CMAKE_CXX_COMPILER_ID MATCHES "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 9.0)
target_link_libraries(gosd PRIVATE stdc++fs)
endif()
set_property(TARGET gowhisper PROPERTY CXX_STANDARD 17)
set_target_properties(gowhisper PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})

View File

@@ -1,53 +1,110 @@
CMAKE_ARGS?=
BUILD_TYPE?=
GOCMD=go
NATIVE?=false
GOCMD?=go
GO_TAGS?=
JOBS?=$(shell nproc --ignore=1)
BUILD_TYPE?=
CMAKE_ARGS?=
# whisper.cpp version
WHISPER_REPO?=https://github.com/ggml-org/whisper.cpp
WHISPER_CPP_VERSION?=edea8a9c3cf0eb7676dcdb604991eb2f95c3d984
WHISPER_CPP_VERSION?=4245c77b654cd384ad9f53a4a302be716b3e5861
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
export WHISPER_CMAKE_ARGS?=-DBUILD_SHARED_LIBS=OFF
export WHISPER_DIR=$(abspath ./sources/whisper.cpp)
export WHISPER_INCLUDE_PATH=$(WHISPER_DIR)/include:$(WHISPER_DIR)/ggml/include
export WHISPER_LIBRARY_PATH=$(WHISPER_DIR)/build/src/:$(WHISPER_DIR)/build/ggml/src
CGO_LDFLAGS_WHISPER?=
CGO_LDFLAGS_WHISPER+=-lggml
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF -DLLAMA_CURL=OFF
CUDA_LIBPATH?=/usr/local/cuda/lib64/
ONEAPI_VERSION?=2025.2
# IF native is false, we add -DGGML_NATIVE=OFF to CMAKE_ARGS
ifeq ($(NATIVE),false)
CMAKE_ARGS+=-DGGML_NATIVE=OFF
WHISPER_CMAKE_ARGS+=-DGGML_NATIVE=OFF
endif
CURRENT_MAKEFILE_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
ifeq ($(NATIVE),false)
CMAKE_ARGS+=-DGGML_NATIVE=OFF
endif
# If build type is cublas, then we set -DGGML_CUDA=ON to CMAKE_ARGS automatically
ifeq ($(BUILD_TYPE),cublas)
CGO_LDFLAGS+=-lcublas -lcudart -L$(CUDA_LIBPATH) -L$(CUDA_LIBPATH)/stubs/ -lcuda
CMAKE_ARGS+=-DGGML_CUDA=ON
CGO_LDFLAGS_WHISPER+=-lcufft -lggml-cuda
export WHISPER_LIBRARY_PATH:=$(WHISPER_LIBRARY_PATH):$(WHISPER_DIR)/build/ggml/src/ggml-cuda/
# If build type is openblas then we set -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
# to CMAKE_ARGS automatically
else ifeq ($(BUILD_TYPE),openblas)
CMAKE_ARGS+=-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
# If build type is clblas (openCL) we set -DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
else ifeq ($(BUILD_TYPE),clblas)
CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
# If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++
else ifeq ($(BUILD_TYPE),hipblas)
CMAKE_ARGS+=-DGGML_HIPBLAS=ON
ROCM_HOME ?= /opt/rocm
ROCM_PATH ?= /opt/rocm
LD_LIBRARY_PATH ?= /opt/rocm/lib:/opt/rocm/llvm/lib
export STABLE_BUILD_TYPE=
export CXX=$(ROCM_HOME)/llvm/bin/clang++
export CC=$(ROCM_HOME)/llvm/bin/clang
# GPU_TARGETS ?= gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102
# AMDGPU_TARGETS ?= "$(GPU_TARGETS)"
CMAKE_ARGS+=-DGGML_HIP=ON
CGO_LDFLAGS += -O3 --rtlib=compiler-rt -unwindlib=libgcc -lhipblas -lrocblas --hip-link -L${ROCM_HOME}/lib/llvm/lib -L$(CURRENT_MAKEFILE_DIR)/sources/whisper.cpp/build/ggml/src/ggml-hip/ -lggml-hip
# CMAKE_ARGS+=-DGGML_HIP=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
else ifeq ($(BUILD_TYPE),vulkan)
CMAKE_ARGS+=-DGGML_VULKAN=ON
CMAKE_ARGS+=-DGGML_VULKAN=1
CGO_LDFLAGS_WHISPER+=-lggml-vulkan -lvulkan
export WHISPER_LIBRARY_PATH:=$(WHISPER_LIBRARY_PATH):$(WHISPER_DIR)/build/ggml/src/ggml-vulkan/
else ifeq ($(OS),Darwin)
ifeq ($(BUILD_TYPE),)
BUILD_TYPE=metal
endif
ifneq ($(BUILD_TYPE),metal)
CMAKE_ARGS+=-DGGML_METAL=OFF
CGO_LDFLAGS_WHISPER+=-lggml-blas
export WHISPER_LIBRARY_PATH:=$(WHISPER_LIBRARY_PATH):$(WHISPER_DIR)/build/ggml/src/ggml-blas
else
CMAKE_ARGS+=-DGGML_METAL=ON
CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON
CMAKE_ARGS+=-DGGML_METAL_USE_BF16=ON
CMAKE_ARGS+=-DGGML_OPENMP=OFF
CMAKE_ARGS+=-DWHISPER_BUILD_EXAMPLES=OFF
CMAKE_ARGS+=-DWHISPER_BUILD_TESTS=OFF
CMAKE_ARGS+=-DWHISPER_BUILD_SERVER=OFF
CGO_LDFLAGS += -framework Accelerate
CGO_LDFLAGS_WHISPER+=-lggml-metal -lggml-blas
export WHISPER_LIBRARY_PATH:=$(WHISPER_LIBRARY_PATH):$(WHISPER_DIR)/build/ggml/src/ggml-metal/:$(WHISPER_DIR)/build/ggml/src/ggml-blas
endif
TARGET+=--target ggml-metal
endif
ifeq ($(BUILD_TYPE),sycl_f16)
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
export CC=icx
export CXX=icpx
CGO_LDFLAGS_WHISPER += -fsycl -L${DNNLROOT}/lib -rpath ${ONEAPI_ROOT}/${ONEAPI_VERSION}/lib -ldnnl ${MKLROOT}/lib/intel64/libmkl_sycl.a -fiopenmp -fopenmp-targets=spir64 -lOpenCL -lggml-sycl
CGO_LDFLAGS_WHISPER += $(shell pkg-config --libs mkl-static-lp64-gomp)
CGO_CXXFLAGS_WHISPER += -fiopenmp -fopenmp-targets=spir64
CGO_CXXFLAGS_WHISPER += $(shell pkg-config --cflags mkl-static-lp64-gomp )
export WHISPER_LIBRARY_PATH:=$(WHISPER_LIBRARY_PATH):$(WHISPER_DIR)/build/ggml/src/ggml-sycl/
CMAKE_ARGS+=-DGGML_SYCL=ON \
-DCMAKE_C_COMPILER=icx \
-DCMAKE_CXX_COMPILER=icpx \
-DGGML_SYCL_F16=ON
-DCMAKE_CXX_FLAGS="-fsycl"
endif
ifeq ($(BUILD_TYPE),sycl_f32)
CMAKE_ARGS+=-DGGML_SYCL=ON \
-DCMAKE_C_COMPILER=icx \
-DCMAKE_CXX_COMPILER=icpx
ifeq ($(BUILD_TYPE),sycl_f16)
CMAKE_ARGS+=-DGGML_SYCL_F16=ON
endif
ifneq ($(OS),Darwin)
CGO_LDFLAGS_WHISPER+=-lgomp
endif
## whisper
sources/whisper.cpp:
mkdir -p sources/whisper.cpp
cd sources/whisper.cpp && \
@@ -57,21 +114,18 @@ sources/whisper.cpp:
git checkout $(WHISPER_CPP_VERSION) && \
git submodule update --init --recursive --depth 1 --single-branch
libgowhisper.so: sources/whisper.cpp CMakeLists.txt gowhisper.cpp gowhisper.h
mkdir -p build && \
cd build && \
cmake .. $(CMAKE_ARGS) && \
cmake --build . --config Release -j$(JOBS) && \
cd .. && \
mv build/libgowhisper.so ./
sources/whisper.cpp/build/src/libwhisper.a: sources/whisper.cpp
cd sources/whisper.cpp && cmake $(CMAKE_ARGS) $(WHISPER_CMAKE_ARGS) . -B ./build
cd sources/whisper.cpp/build && cmake --build . --config Release
whisper: main.go gowhisper.go libgowhisper.so
CGO_ENABLED=0 $(GOCMD) build -tags "$(GO_TAGS)" -o whisper ./
whisper: sources/whisper.cpp sources/whisper.cpp/build/src/libwhisper.a
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go
CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_WHISPER)" C_INCLUDE_PATH="${WHISPER_INCLUDE_PATH}" LIBRARY_PATH="${WHISPER_LIBRARY_PATH}" LD_LIBRARY_PATH="${WHISPER_LIBRARY_PATH}" \
CGO_CXXFLAGS="$(CGO_CXXFLAGS_WHISPER)" \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o whisper ./
package: whisper
package:
bash package.sh
build: package
clean:
rm -rf libgowhisper.o build whisper
build: whisper package

View File

@@ -1,154 +0,0 @@
#include "gowhisper.h"
#include "ggml-backend.h"
#include "whisper.h"
#include <vector>
static struct whisper_vad_context *vctx;
static struct whisper_context *ctx;
static std::vector<float> flat_segs;
static void ggml_log_cb(enum ggml_log_level level, const char *log,
void *data) {
const char *level_str;
if (!log) {
return;
}
switch (level) {
case GGML_LOG_LEVEL_DEBUG:
level_str = "DEBUG";
break;
case GGML_LOG_LEVEL_INFO:
level_str = "INFO";
break;
case GGML_LOG_LEVEL_WARN:
level_str = "WARN";
break;
case GGML_LOG_LEVEL_ERROR:
level_str = "ERROR";
break;
default: /* Potential future-proofing */
level_str = "?????";
break;
}
fprintf(stderr, "[%-5s] ", level_str);
fputs(log, stderr);
fflush(stderr);
}
int load_model(const char *const model_path) {
whisper_log_set(ggml_log_cb, nullptr);
ggml_backend_load_all();
struct whisper_context_params cparams = whisper_context_default_params();
ctx = whisper_init_from_file_with_params(model_path, cparams);
if (ctx == nullptr) {
fprintf(stderr, "error: Also failed to init model as transcriber\n");
return 1;
}
return 0;
}
int load_model_vad(const char *const model_path) {
whisper_log_set(ggml_log_cb, nullptr);
ggml_backend_load_all();
struct whisper_vad_context_params vcparams =
whisper_vad_default_context_params();
// XXX: Overridden to false in upstream due to performance?
// vcparams.use_gpu = true;
vctx = whisper_vad_init_from_file_with_params(model_path, vcparams);
if (vctx == nullptr) {
fprintf(stderr, "error: Failed to init model as VAD\n");
return 1;
}
return 0;
}
int vad(float pcmf32[], size_t pcmf32_len, float **segs_out,
size_t *segs_out_len) {
if (!whisper_vad_detect_speech(vctx, pcmf32, pcmf32_len)) {
fprintf(stderr, "error: failed to detect speech\n");
return 1;
}
struct whisper_vad_params params = whisper_vad_default_params();
struct whisper_vad_segments *segs =
whisper_vad_segments_from_probs(vctx, params);
size_t segn = whisper_vad_segments_n_segments(segs);
// fprintf(stderr, "Got segments %zd\n", segn);
flat_segs.clear();
for (int i = 0; i < segn; i++) {
flat_segs.push_back(whisper_vad_segments_get_segment_t0(segs, i));
flat_segs.push_back(whisper_vad_segments_get_segment_t1(segs, i));
}
// fprintf(stderr, "setting out variables: %p=%p -> %p, %p=%zx -> %zx\n",
// segs_out, *segs_out, flat_segs.data(), segs_out_len, *segs_out_len,
// flat_segs.size());
*segs_out = flat_segs.data();
*segs_out_len = flat_segs.size();
// fprintf(stderr, "freeing segs\n");
whisper_vad_free_segments(segs);
// fprintf(stderr, "returning\n");
return 0;
}
int transcribe(uint32_t threads, char *lang, bool translate, bool tdrz,
float pcmf32[], size_t pcmf32_len, size_t *segs_out_len) {
whisper_full_params wparams =
whisper_full_default_params(WHISPER_SAMPLING_GREEDY);
wparams.n_threads = threads;
if (*lang != '\0')
wparams.language = lang;
else {
wparams.language = nullptr;
}
wparams.translate = translate;
wparams.debug_mode = true;
wparams.print_progress = true;
wparams.tdrz_enable = tdrz;
fprintf(stderr, "info: Enable tdrz: %d\n", tdrz);
if (whisper_full(ctx, wparams, pcmf32, pcmf32_len)) {
fprintf(stderr, "error: transcription failed\n");
return 1;
}
*segs_out_len = whisper_full_n_segments(ctx);
return 0;
}
const char *get_segment_text(int i) {
return whisper_full_get_segment_text(ctx, i);
}
int64_t get_segment_t0(int i) { return whisper_full_get_segment_t0(ctx, i); }
int64_t get_segment_t1(int i) { return whisper_full_get_segment_t1(ctx, i); }
int n_tokens(int i) { return whisper_full_n_tokens(ctx, i); }
int32_t get_token_id(int i, int j) {
return whisper_full_get_token_id(ctx, i, j);
}
bool get_segment_speaker_turn_next(int i) {
return whisper_full_get_segment_speaker_turn_next(ctx, i);
}

View File

@@ -1,161 +0,0 @@
package main
import (
"fmt"
"os"
"path/filepath"
"strings"
"unsafe"
"github.com/go-audio/wav"
"github.com/mudler/LocalAI/pkg/grpc/base"
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
"github.com/mudler/LocalAI/pkg/utils"
)
var (
CppLoadModel func(modelPath string) int
CppLoadModelVAD func(modelPath string) int
CppVAD func(pcmf32 []float32, pcmf32Size uintptr, segsOut unsafe.Pointer, segsOutLen unsafe.Pointer) int
CppTranscribe func(threads uint32, lang string, translate bool, diarize bool, pcmf32 []float32, pcmf32Len uintptr, segsOutLen unsafe.Pointer) int
CppGetSegmentText func(i int) string
CppGetSegmentStart func(i int) int64
CppGetSegmentEnd func(i int) int64
CppNTokens func(i int) int
CppGetTokenID func(i int, j int) int
CppGetSegmentSpeakerTurnNext func(i int) bool
)
type Whisper struct {
base.SingleThread
}
func (w *Whisper) Load(opts *pb.ModelOptions) error {
vadOnly := false
for _, oo := range opts.Options {
if oo == "vad_only" {
vadOnly = true
} else {
fmt.Fprintf(os.Stderr, "Unrecognized option: %v\n", oo)
}
}
if vadOnly {
if ret := CppLoadModelVAD(opts.ModelFile); ret != 0 {
return fmt.Errorf("Failed to load Whisper VAD model")
}
return nil
}
if ret := CppLoadModel(opts.ModelFile); ret != 0 {
return fmt.Errorf("Failed to load Whisper transcription model")
}
return nil
}
func (w *Whisper) VAD(req *pb.VADRequest) (pb.VADResponse, error) {
audio := req.Audio
// We expect 0xdeadbeef to be overwritten and if we see it in a stack trace we know it wasn't
segsPtr, segsLen := uintptr(0xdeadbeef), uintptr(0xdeadbeef)
segsPtrPtr, segsLenPtr := unsafe.Pointer(&segsPtr), unsafe.Pointer(&segsLen)
if ret := CppVAD(audio, uintptr(len(audio)), segsPtrPtr, segsLenPtr); ret != 0 {
return pb.VADResponse{}, fmt.Errorf("Failed VAD")
}
// Happens when CPP vector has not had any elements pushed to it
if segsPtr == 0 {
return pb.VADResponse{
Segments: []*pb.VADSegment{},
}, nil
}
// unsafeptr warning is caused by segsPtr being on the stack and therefor being subject to stack copying AFAICT
// however the stack shouldn't have grown between setting segsPtr and now, also the memory pointed to is allocated by C++
segs := unsafe.Slice((*float32)(unsafe.Pointer(segsPtr)), segsLen)
vadSegments := []*pb.VADSegment{}
for i := range len(segs) >> 1 {
s := segs[2*i] / 100
t := segs[2*i+1] / 100
vadSegments = append(vadSegments, &pb.VADSegment{
Start: s,
End: t,
})
}
return pb.VADResponse{
Segments: vadSegments,
}, nil
}
func (w *Whisper) AudioTranscription(opts *pb.TranscriptRequest) (pb.TranscriptResult, error) {
dir, err := os.MkdirTemp("", "whisper")
if err != nil {
return pb.TranscriptResult{}, err
}
defer os.RemoveAll(dir)
convertedPath := filepath.Join(dir, "converted.wav")
if err := utils.AudioToWav(opts.Dst, convertedPath); err != nil {
return pb.TranscriptResult{}, err
}
// Open samples
fh, err := os.Open(convertedPath)
if err != nil {
return pb.TranscriptResult{}, err
}
defer fh.Close()
// Read samples
d := wav.NewDecoder(fh)
buf, err := d.FullPCMBuffer()
if err != nil {
return pb.TranscriptResult{}, err
}
data := buf.AsFloat32Buffer().Data
segsLen := uintptr(0xdeadbeef)
segsLenPtr := unsafe.Pointer(&segsLen)
if ret := CppTranscribe(opts.Threads, opts.Language, opts.Translate, opts.Diarize, data, uintptr(len(data)), segsLenPtr); ret != 0 {
return pb.TranscriptResult{}, fmt.Errorf("Failed Transcribe")
}
segments := []*pb.TranscriptSegment{}
text := ""
for i := range int(segsLen) {
s := CppGetSegmentStart(i)
t := CppGetSegmentEnd(i)
txt := strings.Clone(CppGetSegmentText(i))
tokens := make([]int32, CppNTokens(i))
if opts.Diarize && CppGetSegmentSpeakerTurnNext(i) {
txt += " [SPEAKER_TURN]"
}
for j := range tokens {
tokens[j] = int32(CppGetTokenID(i, j))
}
segment := &pb.TranscriptSegment{
Id: int32(i),
Text: txt,
Start: s, End: t,
Tokens: tokens,
}
segments = append(segments, segment)
text += " " + strings.TrimSpace(txt)
}
return pb.TranscriptResult{
Segments: segments,
Text: strings.TrimSpace(text),
}, nil
}

View File

@@ -1,17 +0,0 @@
#include <cstddef>
#include <cstdint>
extern "C" {
int load_model(const char *const model_path);
int load_model_vad(const char *const model_path);
int vad(float pcmf32[], size_t pcmf32_size, float **segs_out,
size_t *segs_out_len);
int transcribe(uint32_t threads, char *lang, bool translate, bool tdrz,
float pcmf32[], size_t pcmf32_len, size_t *segs_out_len);
const char *get_segment_text(int i);
int64_t get_segment_t0(int i);
int64_t get_segment_t1(int i);
int n_tokens(int i);
int32_t get_token_id(int i, int j);
bool get_segment_speaker_turn_next(int i);
}

View File

@@ -1,10 +1,10 @@
package main
// Note: this is started internally by LocalAI and a server is allocated for each model
import (
"flag"
"github.com/ebitengine/purego"
grpc "github.com/mudler/LocalAI/pkg/grpc"
)
@@ -12,34 +12,7 @@ var (
addr = flag.String("addr", "localhost:50051", "the address to connect to")
)
type LibFuncs struct {
FuncPtr any
Name string
}
func main() {
gosd, err := purego.Dlopen("./libgowhisper.so", purego.RTLD_NOW|purego.RTLD_GLOBAL)
if err != nil {
panic(err)
}
libFuncs := []LibFuncs{
{&CppLoadModel, "load_model"},
{&CppLoadModelVAD, "load_model_vad"},
{&CppVAD, "vad"},
{&CppTranscribe, "transcribe"},
{&CppGetSegmentText, "get_segment_text"},
{&CppGetSegmentStart, "get_segment_t0"},
{&CppGetSegmentEnd, "get_segment_t1"},
{&CppNTokens, "n_tokens"},
{&CppGetTokenID, "get_token_id"},
{&CppGetSegmentSpeakerTurnNext, "get_segment_speaker_turn_next"},
}
for _, lf := range libFuncs {
purego.RegisterLibFunc(lf.FuncPtr, gosd, lf.Name)
}
flag.Parse()
if err := grpc.StartServer(*addr, &Whisper{}); err != nil {

View File

@@ -10,8 +10,8 @@ CURDIR=$(dirname "$(realpath $0)")
# Create lib directory
mkdir -p $CURDIR/package/lib
cp -avf $CURDIR/whisper $CURDIR/libgowhisper.so $CURDIR/package/
cp -fv $CURDIR/run.sh $CURDIR/package/
cp -avrf $CURDIR/whisper $CURDIR/package/
cp -rfv $CURDIR/run.sh $CURDIR/package/
# Detect architecture and copy appropriate libraries
if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
@@ -42,13 +42,11 @@ elif [ -f "/lib/ld-linux-aarch64.so.1" ]; then
cp -arfLv /lib/aarch64-linux-gnu/libdl.so.2 $CURDIR/package/lib/libdl.so.2
cp -arfLv /lib/aarch64-linux-gnu/librt.so.1 $CURDIR/package/lib/librt.so.1
cp -arfLv /lib/aarch64-linux-gnu/libpthread.so.0 $CURDIR/package/lib/libpthread.so.0
elif [ $(uname -s) = "Darwin" ]; then
echo "Detected Darwin"
else
echo "Error: Could not detect architecture"
exit 1
fi
echo "Packaging completed successfully"
echo "Packaging completed successfully"
ls -liah $CURDIR/package/
ls -liah $CURDIR/package/lib/
ls -liah $CURDIR/package/lib/

View File

@@ -0,0 +1,105 @@
package main
// This is a wrapper to statisfy the GRPC service interface
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import (
"os"
"path/filepath"
"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
"github.com/go-audio/wav"
"github.com/mudler/LocalAI/pkg/grpc/base"
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
"github.com/mudler/LocalAI/pkg/utils"
)
type Whisper struct {
base.SingleThread
whisper whisper.Model
}
func (sd *Whisper) Load(opts *pb.ModelOptions) error {
// Note: the Model here is a path to a directory containing the model files
w, err := whisper.New(opts.ModelFile)
sd.whisper = w
return err
}
func (sd *Whisper) AudioTranscription(opts *pb.TranscriptRequest) (pb.TranscriptResult, error) {
dir, err := os.MkdirTemp("", "whisper")
if err != nil {
return pb.TranscriptResult{}, err
}
defer os.RemoveAll(dir)
convertedPath := filepath.Join(dir, "converted.wav")
if err := utils.AudioToWav(opts.Dst, convertedPath); err != nil {
return pb.TranscriptResult{}, err
}
// Open samples
fh, err := os.Open(convertedPath)
if err != nil {
return pb.TranscriptResult{}, err
}
defer fh.Close()
// Read samples
d := wav.NewDecoder(fh)
buf, err := d.FullPCMBuffer()
if err != nil {
return pb.TranscriptResult{}, err
}
data := buf.AsFloat32Buffer().Data
// Process samples
context, err := sd.whisper.NewContext()
if err != nil {
return pb.TranscriptResult{}, err
}
context.SetThreads(uint(opts.Threads))
if opts.Language != "" {
context.SetLanguage(opts.Language)
} else {
context.SetLanguage("auto")
}
if opts.Translate {
context.SetTranslate(true)
}
if err := context.Process(data, nil, nil, nil); err != nil {
return pb.TranscriptResult{}, err
}
segments := []*pb.TranscriptSegment{}
text := ""
for {
s, err := context.NextSegment()
if err != nil {
break
}
var tokens []int32
for _, t := range s.Tokens {
tokens = append(tokens, int32(t.Id))
}
segment := &pb.TranscriptSegment{Id: int32(s.Num), Text: s.Text, Start: int64(s.Start), End: int64(s.End), Tokens: tokens}
segments = append(segments, segment)
text += s.Text
}
return pb.TranscriptResult{
Segments: segments,
Text: text,
}, nil
}

View File

@@ -45,7 +45,6 @@
default: "cpu-whisper"
nvidia: "cuda12-whisper"
intel: "intel-sycl-f16-whisper"
metal: "metal-whisper"
amd: "rocm-whisper"
vulkan: "vulkan-whisper"
nvidia-l4t: "nvidia-l4t-arm64-whisper"
@@ -72,7 +71,7 @@
# amd: "rocm-stablediffusion-ggml"
vulkan: "vulkan-stablediffusion-ggml"
nvidia-l4t: "nvidia-l4t-arm64-stablediffusion-ggml"
metal: "metal-stablediffusion-ggml"
# metal: "metal-stablediffusion-ggml"
# darwin-x86: "darwin-x86-stablediffusion-ggml"
- &rfdetr
name: "rfdetr"
@@ -128,55 +127,6 @@
nvidia: "cuda12-vllm"
amd: "rocm-vllm"
intel: "intel-vllm"
- &mlx
name: "mlx"
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-mlx"
icon: https://avatars.githubusercontent.com/u/102832242?s=200&v=4
urls:
- https://github.com/ml-explore/mlx-lm
mirrors:
- localai/localai-backends:latest-metal-darwin-arm64-mlx
license: MIT
description: |
Run LLMs with MLX
tags:
- text-to-text
- LLM
- MLX
- &mlx-vlm
name: "mlx-vlm"
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-mlx-vlm"
icon: https://avatars.githubusercontent.com/u/102832242?s=200&v=4
urls:
- https://github.com/Blaizzy/mlx-vlm
mirrors:
- localai/localai-backends:latest-metal-darwin-arm64-mlx-vlm
license: MIT
description: |
Run Vision-Language Models with MLX
tags:
- text-to-text
- multimodal
- vision-language
- LLM
- MLX
- &mlx-audio
name: "mlx-audio"
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-mlx-audio"
icon: https://avatars.githubusercontent.com/u/102832242?s=200&v=4
urls:
- https://github.com/Blaizzy/mlx-audio
mirrors:
- localai/localai-backends:latest-metal-darwin-arm64-mlx-audio
license: MIT
description: |
Run Audio Models with MLX
tags:
- audio-to-text
- audio-generation
- text-to-audio
- LLM
- MLX
- &rerankers
name: "rerankers"
alias: "rerankers"
@@ -219,8 +169,6 @@
intel: "intel-diffusers"
amd: "rocm-diffusers"
nvidia-l4t: "nvidia-l4t-diffusers"
metal: "metal-diffusers"
default: "cpu-diffusers"
- &exllama2
name: "exllama2"
urls:
@@ -350,8 +298,6 @@
alias: "chatterbox"
capabilities:
nvidia: "cuda12-chatterbox"
metal: "metal-chatterbox"
default: "cpu-chatterbox"
- &piper
name: "piper"
uri: "quay.io/go-skynet/local-ai-backends:latest-piper"
@@ -425,21 +371,6 @@
- text-to-speech
- TTS
license: apache-2.0
- !!merge <<: *mlx
name: "mlx-development"
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-mlx"
mirrors:
- localai/localai-backends:master-metal-darwin-arm64-mlx
- !!merge <<: *mlx-vlm
name: "mlx-vlm-development"
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-mlx-vlm"
mirrors:
- localai/localai-backends:master-metal-darwin-arm64-mlx-vlm
- !!merge <<: *mlx-audio
name: "mlx-audio-development"
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-mlx-audio"
mirrors:
- localai/localai-backends:master-metal-darwin-arm64-mlx-audio
- !!merge <<: *kitten-tts
name: "kitten-tts-development"
uri: "quay.io/go-skynet/local-ai-backends:master-kitten-tts"
@@ -582,16 +513,6 @@
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-whisper"
mirrors:
- localai/localai-backends:latest-cpu-whisper
- !!merge <<: *whispercpp
name: "metal-whisper"
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-whisper"
mirrors:
- localai/localai-backends:latest-metal-darwin-arm64-whisper
- !!merge <<: *whispercpp
name: "metal-whisper-development"
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-whisper"
mirrors:
- localai/localai-backends:master-metal-darwin-arm64-whisper
- !!merge <<: *whispercpp
name: "cpu-whisper-development"
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-whisper"
@@ -678,16 +599,6 @@
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-stablediffusion-ggml"
mirrors:
- localai/localai-backends:master-cpu-stablediffusion-ggml
- !!merge <<: *stablediffusionggml
name: "metal-stablediffusion-ggml"
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-stablediffusion-ggml"
mirrors:
- localai/localai-backends:latest-metal-darwin-arm64-stablediffusion-ggml
- !!merge <<: *stablediffusionggml
name: "metal-stablediffusion-ggml-development"
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-stablediffusion-ggml"
mirrors:
- localai/localai-backends:master-metal-darwin-arm64-stablediffusion-ggml
- !!merge <<: *stablediffusionggml
name: "vulkan-stablediffusion-ggml"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-vulkan-stablediffusion-ggml"
@@ -944,18 +855,6 @@
intel: "intel-diffusers-development"
amd: "rocm-diffusers-development"
nvidia-l4t: "nvidia-l4t-diffusers-development"
metal: "metal-diffusers-development"
default: "cpu-diffusers-development"
- !!merge <<: *diffusers
name: "cpu-diffusers"
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-diffusers"
mirrors:
- localai/localai-backends:latest-cpu-diffusers
- !!merge <<: *diffusers
name: "cpu-diffusers-development"
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-diffusers"
mirrors:
- localai/localai-backends:master-cpu-diffusers
- !!merge <<: *diffusers
name: "nvidia-l4t-diffusers"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-l4t-diffusers"
@@ -1006,16 +905,6 @@
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-diffusers"
mirrors:
- localai/localai-backends:master-gpu-intel-diffusers
- !!merge <<: *diffusers
name: "metal-diffusers"
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-diffusers"
mirrors:
- localai/localai-backends:latest-metal-darwin-arm64-diffusers
- !!merge <<: *diffusers
name: "metal-diffusers-development"
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-diffusers"
mirrors:
- localai/localai-backends:master-metal-darwin-arm64-diffusers
## exllama2
- !!merge <<: *exllama2
name: "exllama2-development"
@@ -1225,28 +1114,6 @@
name: "chatterbox-development"
capabilities:
nvidia: "cuda12-chatterbox-development"
metal: "metal-chatterbox-development"
default: "cpu-chatterbox-development"
- !!merge <<: *chatterbox
name: "cpu-chatterbox"
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-chatterbox"
mirrors:
- localai/localai-backends:latest-cpu-chatterbox
- !!merge <<: *chatterbox
name: "cpu-chatterbox-development"
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-chatterbox"
mirrors:
- localai/localai-backends:master-cpu-chatterbox
- !!merge <<: *chatterbox
name: "metal-chatterbox"
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-chatterbox"
mirrors:
- localai/localai-backends:latest-metal-darwin-arm64-chatterbox
- !!merge <<: *chatterbox
name: "metal-chatterbox-development"
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-chatterbox"
mirrors:
- localai/localai-backends:master-metal-darwin-arm64-chatterbox
- !!merge <<: *chatterbox
name: "cuda12-chatterbox-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-chatterbox"

View File

@@ -1,190 +1,38 @@
# Python Backends for LocalAI
# Common commands about conda environment
This directory contains Python-based AI backends for LocalAI, providing support for various AI models and hardware acceleration targets.
## Create a new empty conda environment
## Overview
```
conda create --name <env-name> python=<your version> -y
The Python backends use a unified build system based on `libbackend.sh` that provides:
- **Automatic virtual environment management** with support for both `uv` and `pip`
- **Hardware-specific dependency installation** (CPU, CUDA, Intel, MLX, etc.)
- **Portable Python support** for standalone deployments
- **Consistent backend execution** across different environments
## Available Backends
### Core AI Models
- **transformers** - Hugging Face Transformers framework (PyTorch-based)
- **vllm** - High-performance LLM inference engine
- **mlx** - Apple Silicon optimized ML framework
- **exllama2** - ExLlama2 quantized models
### Audio & Speech
- **bark** - Text-to-speech synthesis
- **coqui** - Coqui TTS models
- **faster-whisper** - Fast Whisper speech recognition
- **kitten-tts** - Lightweight TTS
- **mlx-audio** - Apple Silicon audio processing
- **chatterbox** - TTS model
- **kokoro** - TTS models
### Computer Vision
- **diffusers** - Stable Diffusion and image generation
- **mlx-vlm** - Vision-language models for Apple Silicon
- **rfdetr** - Object detection models
### Specialized
- **rerankers** - Text reranking models
## Quick Start
### Prerequisites
- Python 3.10+ (default: 3.10.18)
- `uv` package manager (recommended) or `pip`
- Appropriate hardware drivers for your target (CUDA, Intel, etc.)
### Installation
Each backend can be installed individually:
```bash
# Navigate to a specific backend
cd backend/python/transformers
# Install dependencies
make transformers
# or
bash install.sh
# Run the backend
make run
# or
bash run.sh
conda create --name autogptq python=3.11 -y
```
### Using the Unified Build System
## To activate the environment
The `libbackend.sh` script provides consistent commands across all backends:
```bash
# Source the library in your backend script
source $(dirname $0)/../common/libbackend.sh
# Install requirements (automatically handles hardware detection)
installRequirements
# Start the backend server
startBackend $@
# Run tests
runUnittests
As of conda 4.4
```
conda activate autogptq
```
## Hardware Targets
The conda version older than 4.4
The build system automatically detects and configures for different hardware:
- **CPU** - Standard CPU-only builds
- **CUDA** - NVIDIA GPU acceleration (supports CUDA 11/12)
- **Intel** - Intel XPU/GPU optimization
- **MLX** - Apple Silicon (M1/M2/M3) optimization
- **HIP** - AMD GPU acceleration
### Target-Specific Requirements
Backends can specify hardware-specific dependencies:
- `requirements.txt` - Base requirements
- `requirements-cpu.txt` - CPU-specific packages
- `requirements-cublas11.txt` - CUDA 11 packages
- `requirements-cublas12.txt` - CUDA 12 packages
- `requirements-intel.txt` - Intel-optimized packages
- `requirements-mps.txt` - Apple Silicon packages
## Configuration Options
### Environment Variables
- `PYTHON_VERSION` - Python version (default: 3.10)
- `PYTHON_PATCH` - Python patch version (default: 18)
- `BUILD_TYPE` - Force specific build target
- `USE_PIP` - Use pip instead of uv (default: false)
- `PORTABLE_PYTHON` - Enable portable Python builds
- `LIMIT_TARGETS` - Restrict backend to specific targets
### Example: CUDA 12 Only Backend
```bash
# In your backend script
LIMIT_TARGETS="cublas12"
source $(dirname $0)/../common/libbackend.sh
```
source activate autogptq
```
### Example: Intel-Optimized Backend
## Install the packages to your environment
```bash
# In your backend script
LIMIT_TARGETS="intel"
source $(dirname $0)/../common/libbackend.sh
Sometimes you need to install the packages from the conda-forge channel
By using `conda`
```
conda install <your-package-name>
conda install -c conda-forge <your package-name>
```
## Development
### Adding a New Backend
1. Create a new directory in `backend/python/`
2. Copy the template structure from `common/template/`
3. Implement your `backend.py` with the required gRPC interface
4. Add appropriate requirements files for your target hardware
5. Use `libbackend.sh` for consistent build and execution
### Testing
```bash
# Run backend tests
make test
# or
bash test.sh
Or by using `pip`
```
### Building
```bash
# Install dependencies
make <backend-name>
# Clean build artifacts
make clean
pip install <your-package-name>
```
## Architecture
Each backend follows a consistent structure:
```
backend-name/
├── backend.py # Main backend implementation
├── requirements.txt # Base dependencies
├── requirements-*.txt # Hardware-specific dependencies
├── install.sh # Installation script
├── run.sh # Execution script
├── test.sh # Test script
├── Makefile # Build targets
└── test.py # Unit tests
```
## Troubleshooting
### Common Issues
1. **Missing dependencies**: Ensure all requirements files are properly configured
2. **Hardware detection**: Check that `BUILD_TYPE` matches your system
3. **Python version**: Verify Python 3.10+ is available
4. **Virtual environment**: Use `ensureVenv` to create/activate environments
## Contributing
When adding new backends or modifying existing ones:
1. Follow the established directory structure
2. Use `libbackend.sh` for consistent behavior
3. Include appropriate requirements files for all target hardware
4. Add comprehensive tests
5. Update this README if adding new backend types

View File

@@ -1,23 +1,29 @@
.PHONY: ttsbark
ttsbark:
ttsbark: protogen
bash install.sh
.PHONY: run
run: ttsbark
run: protogen
@echo "Running bark..."
bash run.sh
@echo "bark run."
.PHONY: test
test: ttsbark
test: protogen
@echo "Testing bark..."
bash test.sh
@echo "bark tested."
.PHONY: protogen
protogen: backend_pb2_grpc.py backend_pb2.py
.PHONY: protogen-clean
protogen-clean:
$(RM) backend_pb2_grpc.py backend_pb2.py
backend_pb2_grpc.py backend_pb2.py:
python3 -m grpc_tools.protoc -I../.. -I./ --python_out=. --grpc_python_out=. backend.proto
.PHONY: clean
clean: protogen-clean
rm -rf venv __pycache__

View File

@@ -1,5 +1,5 @@
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
intel-extension-for-pytorch==2.8.10+xpu
intel-extension-for-pytorch==2.3.110+xpu
torch==2.3.1+cxx11.abi
torchaudio==2.3.1+cxx11.abi
oneccl_bind_pt==2.3.100+xpu

View File

@@ -1,4 +1,4 @@
bark==0.1.5
grpcio==1.74.0
grpcio==1.71.0
protobuf
certifi

View File

@@ -1,23 +1,29 @@
.PHONY: chatterbox
chatterbox:
.PHONY: coqui
coqui: protogen
bash install.sh
.PHONY: run
run: chatterbox
run: protogen
@echo "Running coqui..."
bash run.sh
@echo "coqui run."
.PHONY: test
test: chatterbox
test: protogen
@echo "Testing coqui..."
bash test.sh
@echo "coqui tested."
.PHONY: protogen
protogen: backend_pb2_grpc.py backend_pb2.py
.PHONY: protogen-clean
protogen-clean:
$(RM) backend_pb2_grpc.py backend_pb2.py
backend_pb2_grpc.py backend_pb2.py:
python3 -m grpc_tools.protoc -I../.. -I./ --python_out=. --grpc_python_out=. backend.proto
.PHONY: clean
clean: protogen-clean
rm -rf venv __pycache__

View File

@@ -41,9 +41,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
else:
print("CUDA is not available", file=sys.stderr)
device = "cpu"
mps_available = hasattr(torch.backends, "mps") and torch.backends.mps.is_available()
if mps_available:
device = "mps"
if not torch.cuda.is_available() and request.CUDA:
return backend_pb2.Result(success=False, message="CUDA is not available")

View File

@@ -1,6 +1,5 @@
--extra-index-url https://download.pytorch.org/whl/cpu
accelerate
torch==2.6.0
torchaudio==2.6.0
transformers==4.46.3
chatterbox-tts==0.1.2
chatterbox-tts

View File

@@ -2,5 +2,5 @@
torch==2.6.0+cu118
torchaudio==2.6.0+cu118
transformers==4.46.3
chatterbox-tts==0.1.2
chatterbox-tts
accelerate

View File

@@ -1,5 +1,5 @@
torch==2.6.0
torchaudio==2.6.0
transformers==4.46.3
chatterbox-tts==0.1.2
chatterbox-tts
accelerate

View File

@@ -2,5 +2,5 @@
torch==2.6.0+rocm6.1
torchaudio==2.6.0+rocm6.1
transformers==4.46.3
chatterbox-tts==0.1.2
chatterbox-tts
accelerate

View File

@@ -3,8 +3,9 @@ intel-extension-for-pytorch==2.3.110+xpu
torch==2.3.1+cxx11.abi
torchaudio==2.3.1+cxx11.abi
transformers==4.46.3
chatterbox-tts==0.1.2
chatterbox-tts
accelerate
oneccl_bind_pt==2.3.100+xpu
optimum[openvino]
setuptools
setuptools
accelerate

View File

@@ -1,6 +1,6 @@
#!/usr/bin/env bash
set -euo pipefail
# init handles the setup of the library
#
# use the library by adding the following line to a script:
# source $(dirname $0)/../common/libbackend.sh
@@ -17,182 +17,29 @@ set -euo pipefail
# LIMIT_TARGETS="cublas12"
# source $(dirname $0)/../common/libbackend.sh
#
# You can switch between uv (conda-like) and pip installation methods by setting USE_PIP:
# USE_PIP=true source $(dirname $0)/../common/libbackend.sh
#
# ===================== user-configurable defaults =====================
PYTHON_VERSION="${PYTHON_VERSION:-3.10}" # e.g. 3.10 / 3.11 / 3.12 / 3.13
PYTHON_PATCH="${PYTHON_PATCH:-18}" # e.g. 18 -> 3.10.18 ; 13 -> 3.11.13
PY_STANDALONE_TAG="${PY_STANDALONE_TAG:-20250818}" # release tag date
# Enable/disable bundling of a portable Python build
PORTABLE_PYTHON="${PORTABLE_PYTHON:-false}"
# If you want to fully pin the filename (including tuned CPU targets), set:
# PORTABLE_PY_FILENAME="cpython-3.10.18+20250818-x86_64_v3-unknown-linux-gnu-install_only.tar.gz"
: "${PORTABLE_PY_FILENAME:=}"
: "${PORTABLE_PY_SHA256:=}" # optional; if set we verify the download
# =====================================================================
PYTHON_VERSION="3.10"
# Default to uv if USE_PIP is not set
if [ "x${USE_PIP:-}" == "x" ]; then
USE_PIP=false
fi
# ----------------------- helpers -----------------------
function _is_musl() {
# detect musl (Alpine, etc)
if command -v ldd >/dev/null 2>&1; then
ldd --version 2>&1 | grep -qi musl && return 0
fi
# busybox-ish fallback
if command -v getconf >/dev/null 2>&1; then
getconf GNU_LIBC_VERSION >/dev/null 2>&1 || return 0
fi
return 1
}
function _triple() {
local os="" arch="" libc="gnu"
case "$(uname -s)" in
Linux*) os="unknown-linux" ;;
Darwin*) os="apple-darwin" ;;
MINGW*|MSYS*|CYGWIN*) os="pc-windows-msvc" ;; # best-effort for Git Bash
*) echo "Unsupported OS $(uname -s)"; exit 1;;
esac
case "$(uname -m)" in
x86_64) arch="x86_64" ;;
aarch64|arm64) arch="aarch64" ;;
armv7l) arch="armv7" ;;
i686|i386) arch="i686" ;;
ppc64le) arch="ppc64le" ;;
s390x) arch="s390x" ;;
riscv64) arch="riscv64" ;;
*) echo "Unsupported arch $(uname -m)"; exit 1;;
esac
if [[ "$os" == "unknown-linux" ]]; then
if _is_musl; then
libc="musl"
else
libc="gnu"
fi
echo "${arch}-${os}-${libc}"
else
echo "${arch}-${os}"
fi
}
function _portable_dir() {
echo "${EDIR}/python"
}
function _portable_bin() {
# python-build-standalone puts python in ./bin
echo "$(_portable_dir)/bin"
}
function _portable_python() {
if [ -x "$(_portable_bin)/python3" ]; then
echo "$(_portable_bin)/python3"
else
echo "$(_portable_bin)/python"
fi
}
# macOS loader env for the portable CPython
_macosPortableEnv() {
if [ "$(uname -s)" = "Darwin" ]; then
export DYLD_LIBRARY_PATH="$(_portable_dir)/lib${DYLD_LIBRARY_PATH:+:${DYLD_LIBRARY_PATH}}"
export DYLD_FALLBACK_LIBRARY_PATH="$(_portable_dir)/lib${DYLD_FALLBACK_LIBRARY_PATH:+:${DYLD_FALLBACK_LIBRARY_PATH}}"
fi
}
# Good hygiene on macOS for downloaded/extracted trees
_unquarantinePortablePython() {
if [ "$(uname -s)" = "Darwin" ]; then
command -v xattr >/dev/null 2>&1 && xattr -dr com.apple.quarantine "$(_portable_dir)" || true
fi
}
# ------------------ ### PORTABLE PYTHON ------------------
function ensurePortablePython() {
local pdir="$(_portable_dir)"
local pbin="$(_portable_bin)"
local pyexe
if [ -x "${pbin}/python3" ] || [ -x "${pbin}/python" ]; then
_macosPortableEnv
return 0
fi
mkdir -p "${pdir}"
local triple="$(_triple)"
local full_ver="${PYTHON_VERSION}.${PYTHON_PATCH}"
local fn=""
if [ -n "${PORTABLE_PY_FILENAME}" ]; then
fn="${PORTABLE_PY_FILENAME}"
else
# generic asset name: cpython-<full_ver>+<tag>-<triple>-install_only.tar.gz
fn="cpython-${full_ver}+${PY_STANDALONE_TAG}-${triple}-install_only.tar.gz"
fi
local url="https://github.com/astral-sh/python-build-standalone/releases/download/${PY_STANDALONE_TAG}/${fn}"
local tmp="${pdir}/${fn}"
echo "Downloading portable Python: ${fn}"
# curl with retries; fall back to wget if needed
if command -v curl >/dev/null 2>&1; then
curl -L --fail --retry 3 --retry-delay 1 -o "${tmp}" "${url}"
else
wget -O "${tmp}" "${url}"
fi
if [ -n "${PORTABLE_PY_SHA256}" ]; then
echo "${PORTABLE_PY_SHA256} ${tmp}" | sha256sum -c -
fi
echo "Extracting ${fn} -> ${pdir}"
# always a .tar.gz (we purposely choose install_only)
tar -xzf "${tmp}" -C "${pdir}"
rm -f "${tmp}"
# Some archives nest a directory; if so, flatten to ${pdir}
# Find the first dir with a 'bin/python*'
local inner
inner="$(find "${pdir}" -type f -path "*/bin/python*" -maxdepth 3 2>/dev/null | head -n1 || true)"
if [ -n "${inner}" ]; then
local inner_root
inner_root="$(dirname "$(dirname "${inner}")")" # .../bin -> root
if [ "${inner_root}" != "${pdir}" ]; then
# move contents up one level
shopt -s dotglob
mv "${inner_root}/"* "${pdir}/"
rm -rf "${inner_root}"
shopt -u dotglob
fi
fi
_unquarantinePortablePython
_macosPortableEnv
# Make sure it's runnable
pyexe="$(_portable_python)"
"${pyexe}" -V
}
# init handles the setup of the library
function init() {
# Name of the backend (directory name)
BACKEND_NAME=${PWD##*/}
MY_DIR=$(realpath "$(dirname "$0")")
# Path where all backends files are
MY_DIR=$(realpath `dirname $0`)
# Build type
BUILD_PROFILE=$(getBuildProfile)
# Environment directory
EDIR=${MY_DIR}
if [ "x${ENV_DIR:-}" != "x" ]; then
# Allow to specify a custom env dir for shared environments
if [ "x${ENV_DIR}" != "x" ]; then
EDIR=${ENV_DIR}
fi
if [ ! -z "${LIMIT_TARGETS:-}" ]; then
# If a backend has defined a list of valid build profiles...
if [ ! -z "${LIMIT_TARGETS}" ]; then
isValidTarget=$(checkTargets ${LIMIT_TARGETS})
if [ ${isValidTarget} != true ]; then
echo "${BACKEND_NAME} can only be used on the following targets: ${LIMIT_TARGETS}"
@@ -203,7 +50,6 @@ function init() {
echo "Initializing libbackend for ${BACKEND_NAME}"
}
# getBuildProfile will inspect the system to determine which build profile is appropriate:
# returns one of the following:
# - cublas11
@@ -211,140 +57,58 @@ function init() {
# - hipblas
# - intel
function getBuildProfile() {
if [ x"${BUILD_TYPE:-}" == "xcublas" ]; then
if [ ! -z "${CUDA_MAJOR_VERSION:-}" ]; then
if [ "x${BUILD_TYPE}" == "xl4t" ]; then
echo "l4t"
return 0
fi
# First check if we are a cublas build, and if so report the correct build profile
if [ x"${BUILD_TYPE}" == "xcublas" ]; then
if [ ! -z ${CUDA_MAJOR_VERSION} ]; then
# If we have been given a CUDA version, we trust it
echo ${BUILD_TYPE}${CUDA_MAJOR_VERSION}
else
# We don't know what version of cuda we are, so we report ourselves as a generic cublas
echo ${BUILD_TYPE}
fi
return 0
fi
# If /opt/intel exists, then we are doing an intel/ARC build
if [ -d "/opt/intel" ]; then
echo "intel"
return 0
fi
if [ -n "${BUILD_TYPE:-}" ]; then
# If for any other values of BUILD_TYPE, we don't need any special handling/discovery
if [ ! -z ${BUILD_TYPE} ]; then
echo ${BUILD_TYPE}
return 0
fi
# If there is no BUILD_TYPE set at all, set a build-profile value of CPU, we aren't building for any GPU targets
echo "cpu"
}
# Make the venv relocatable:
# - rewrite venv/bin/python{,3} to relative symlinks into $(_portable_dir)
# - normalize entrypoint shebangs to /usr/bin/env python3
_makeVenvPortable() {
local venv_dir="${EDIR}/venv"
local vbin="${venv_dir}/bin"
[ -d "${vbin}" ] || return 0
# 1) Replace python symlinks with relative ones to ../../python/bin/python3
# (venv/bin -> venv -> EDIR -> python/bin)
local rel_py='../../python/bin/python3'
for name in python3 python; do
if [ -e "${vbin}/${name}" ] || [ -L "${vbin}/${name}" ]; then
rm -f "${vbin}/${name}"
fi
done
ln -s "${rel_py}" "${vbin}/python3"
ln -s "python3" "${vbin}/python"
# 2) Rewrite shebangs of entry points to use env, so the venv is relocatable
# Only touch text files that start with #! and reference the current venv.
local ve_abs="${vbin}/python"
local sed_i=(sed -i)
# macOS/BSD sed needs a backup suffix; GNU sed doesn't. Make it portable:
if sed --version >/dev/null 2>&1; then
sed_i=(sed -i)
else
sed_i=(sed -i '')
fi
for f in "${vbin}"/*; do
[ -f "$f" ] || continue
# Fast path: check first two bytes (#!)
head -c2 "$f" 2>/dev/null | grep -q '^#!' || continue
# Only rewrite if the shebang mentions the (absolute) venv python
if head -n1 "$f" | grep -Fq "${ve_abs}"; then
"${sed_i[@]}" '1s|^#!.*$|#!/usr/bin/env python3|' "$f"
chmod +x "$f" 2>/dev/null || true
fi
done
}
# ensureVenv makes sure that the venv for the backend both exists, and is activated.
#
# This function is idempotent, so you can call it as many times as you want and it will
# always result in an activated virtual environment
function ensureVenv() {
local interpreter=""
if [ "x${PORTABLE_PYTHON}" == "xtrue" ] || [ -e "$(_portable_python)" ]; then
echo "Using portable Python"
ensurePortablePython
interpreter="$(_portable_python)"
else
# Prefer system python${PYTHON_VERSION}, else python3, else fall back to bundled
if command -v python${PYTHON_VERSION} >/dev/null 2>&1; then
interpreter="python${PYTHON_VERSION}"
elif command -v python3 >/dev/null 2>&1; then
interpreter="python3"
else
echo "No suitable system Python found, bootstrapping portable build..."
ensurePortablePython
interpreter="$(_portable_python)"
fi
fi
if [ ! -d "${EDIR}/venv" ]; then
if [ "x${USE_PIP}" == "xtrue" ]; then
"${interpreter}" -m venv --copies "${EDIR}/venv"
source "${EDIR}/venv/bin/activate"
"${interpreter}" -m pip install --upgrade pip
else
if [ "x${PORTABLE_PYTHON}" == "xtrue" ]; then
uv venv --python "${interpreter}" "${EDIR}/venv"
else
uv venv --python "${PYTHON_VERSION}" "${EDIR}/venv"
fi
fi
if [ "x${PORTABLE_PYTHON}" == "xtrue" ]; then
_makeVenvPortable
fi
uv venv --python ${PYTHON_VERSION} ${EDIR}/venv
echo "virtualenv created"
fi
# We call it here to make sure that when we source a venv we can still use python as expected
if [ -x "$(_portable_python)" ]; then
_macosPortableEnv
# Source if we are not already in a Virtual env
if [ "x${VIRTUAL_ENV}" != "x${EDIR}/venv" ]; then
source ${EDIR}/venv/bin/activate
echo "virtualenv activated"
fi
if [ "x${VIRTUAL_ENV:-}" != "x${EDIR}/venv" ]; then
source "${EDIR}/venv/bin/activate"
fi
echo "activated virtualenv has been ensured"
}
function runProtogen() {
ensureVenv
if [ "x${USE_PIP}" == "xtrue" ]; then
pip install grpcio-tools
else
uv pip install grpcio-tools
fi
pushd "${EDIR}" >/dev/null
# use the venv python (ensures correct interpreter & sys.path)
python -m grpc_tools.protoc -I../../ -I./ --python_out=. --grpc_python_out=. backend.proto
popd >/dev/null
}
# installRequirements looks for several requirements files and if they exist runs the install for them in order
#
# - requirements-install.txt
@@ -352,7 +116,7 @@ function runProtogen() {
# - requirements-${BUILD_TYPE}.txt
# - requirements-${BUILD_PROFILE}.txt
#
# BUILD_PROFILE is a more specific version of BUILD_TYPE, ex: cuda-11 or cuda-12
# BUILD_PROFILE is a pore specific version of BUILD_TYPE, ex: cuda-11 or cuda-12
# it can also include some options that we do not have BUILD_TYPES for, ex: intel
#
# NOTE: for BUILD_PROFILE==intel, this function does NOT automatically use the Intel python package index.
@@ -368,41 +132,36 @@ function runProtogen() {
# installRequirements
function installRequirements() {
ensureVenv
# These are the requirements files we will attempt to install, in order
declare -a requirementFiles=(
"${EDIR}/requirements-install.txt"
"${EDIR}/requirements.txt"
"${EDIR}/requirements-${BUILD_TYPE:-}.txt"
"${EDIR}/requirements-${BUILD_TYPE}.txt"
)
if [ "x${BUILD_TYPE:-}" != "x${BUILD_PROFILE}" ]; then
if [ "x${BUILD_TYPE}" != "x${BUILD_PROFILE}" ]; then
requirementFiles+=("${EDIR}/requirements-${BUILD_PROFILE}.txt")
fi
if [ "x${BUILD_TYPE:-}" == "x" ]; then
# if BUILD_TYPE is empty, we are a CPU build, so we should try to install the CPU requirements
if [ "x${BUILD_TYPE}" == "x" ]; then
requirementFiles+=("${EDIR}/requirements-cpu.txt")
fi
requirementFiles+=("${EDIR}/requirements-after.txt")
if [ "x${BUILD_TYPE:-}" != "x${BUILD_PROFILE}" ]; then
if [ "x${BUILD_TYPE}" != "x${BUILD_PROFILE}" ]; then
requirementFiles+=("${EDIR}/requirements-${BUILD_PROFILE}-after.txt")
fi
# This is needed to build wheels that e.g. depends on Python.h
if [ "x${PORTABLE_PYTHON}" == "xtrue" ]; then
export C_INCLUDE_PATH="${C_INCLUDE_PATH:-}:$(_portable_dir)/include/python${PYTHON_VERSION}"
fi
for reqFile in ${requirementFiles[@]}; do
if [ -f "${reqFile}" ]; then
if [ -f ${reqFile} ]; then
echo "starting requirements install for ${reqFile}"
if [ "x${USE_PIP}" == "xtrue" ]; then
pip install ${EXTRA_PIP_INSTALL_FLAGS:-} --requirement "${reqFile}"
else
uv pip install ${EXTRA_PIP_INSTALL_FLAGS:-} --requirement "${reqFile}"
fi
uv pip install ${EXTRA_PIP_INSTALL_FLAGS} --requirement ${reqFile}
echo "finished requirements install for ${reqFile}"
fi
done
runProtogen
}
# startBackend discovers and runs the backend GRPC server
@@ -420,18 +179,18 @@ function installRequirements() {
# - ${BACKEND_NAME}.py
function startBackend() {
ensureVenv
if [ ! -z "${BACKEND_FILE:-}" ]; then
exec "${EDIR}/venv/bin/python" "${BACKEND_FILE}" "$@"
if [ ! -z ${BACKEND_FILE} ]; then
exec ${EDIR}/venv/bin/python ${BACKEND_FILE} $@
elif [ -e "${MY_DIR}/server.py" ]; then
exec "${EDIR}/venv/bin/python" "${MY_DIR}/server.py" "$@"
exec ${EDIR}/venv/bin/python ${MY_DIR}/server.py $@
elif [ -e "${MY_DIR}/backend.py" ]; then
exec "${EDIR}/venv/bin/python" "${MY_DIR}/backend.py" "$@"
exec ${EDIR}/venv/bin/python ${MY_DIR}/backend.py $@
elif [ -e "${MY_DIR}/${BACKEND_NAME}.py" ]; then
exec "${EDIR}/venv/bin/python" "${MY_DIR}/${BACKEND_NAME}.py" "$@"
exec ${EDIR}/venv/bin/python ${MY_DIR}/${BACKEND_NAME}.py $@
fi
}
# runUnittests discovers and runs python unittests
#
# You can specify a specific test file to use by setting TEST_FILE before calling runUnittests.
@@ -444,36 +203,41 @@ function startBackend() {
# be default a file named test.py in the backends directory will be used
function runUnittests() {
ensureVenv
if [ ! -z "${TEST_FILE:-}" ]; then
testDir=$(dirname "$(realpath "${TEST_FILE}")")
testFile=$(basename "${TEST_FILE}")
pushd "${testDir}" >/dev/null
python -m unittest "${testFile}"
popd >/dev/null
if [ ! -z ${TEST_FILE} ]; then
testDir=$(dirname `realpath ${TEST_FILE}`)
testFile=$(basename ${TEST_FILE})
pushd ${testDir}
python -m unittest ${testFile}
popd
elif [ -f "${MY_DIR}/test.py" ]; then
pushd "${MY_DIR}" >/dev/null
pushd ${MY_DIR}
python -m unittest test.py
popd >/dev/null
popd
else
echo "no tests defined for ${BACKEND_NAME}"
fi
}
##################################################################################
# Below here are helper functions not intended to be used outside of the library #
##################################################################################
# checkTargets determines if the current BUILD_TYPE or BUILD_PROFILE is in a list of valid targets
function checkTargets() {
# Collect all provided targets into a variable and...
targets=$@
# ...convert it into an array
declare -a targets=($targets)
for target in ${targets[@]}; do
if [ "x${BUILD_TYPE:-}" == "x${target}" ]; then
echo true; return 0
if [ "x${BUILD_TYPE}" == "x${target}" ]; then
echo true
return 0
fi
if [ "x${BUILD_PROFILE}" == "x${target}" ]; then
echo true; return 0
echo true
return 0
fi
done
echo false

View File

@@ -3,11 +3,18 @@
.PHONY: install
install:
bash install.sh
$(MAKE) protogen
.PHONY: protogen
protogen: backend_pb2_grpc.py backend_pb2.py
.PHONY: protogen-clean
protogen-clean:
$(RM) backend_pb2_grpc.py backend_pb2.py
backend_pb2_grpc.py backend_pb2.py:
bash protogen.sh
.PHONY: clean
clean: protogen-clean
rm -rf venv __pycache__

View File

@@ -8,4 +8,6 @@ else
source $backend_dir/../common/libbackend.sh
fi
runProtogen
ensureVenv
python3 -m grpc_tools.protoc -I../.. -I./ --python_out=. --grpc_python_out=. backend.proto

View File

@@ -1,5 +1,5 @@
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
intel-extension-for-pytorch==2.8.10+xpu
torch==2.8.0
oneccl_bind_pt==2.8.0+xpu
intel-extension-for-pytorch==2.3.110+xpu
torch==2.3.1+cxx11.abi
oneccl_bind_pt==2.3.100+xpu
optimum[openvino]

View File

@@ -1,3 +1,3 @@
grpcio==1.74.0
grpcio==1.71.0
protobuf
grpcio-tools

View File

@@ -1,23 +1,29 @@
.PHONY: coqui
coqui:
coqui: protogen
bash install.sh
.PHONY: run
run: coqui
run: protogen
@echo "Running coqui..."
bash run.sh
@echo "coqui run."
.PHONY: test
test: coqui
test: protogen
@echo "Testing coqui..."
bash test.sh
@echo "coqui tested."
.PHONY: protogen
protogen: backend_pb2_grpc.py backend_pb2.py
.PHONY: protogen-clean
protogen-clean:
$(RM) backend_pb2_grpc.py backend_pb2.py
backend_pb2_grpc.py backend_pb2.py:
python3 -m grpc_tools.protoc -I../.. -I./ --python_out=. --grpc_python_out=. backend.proto
.PHONY: clean
clean: protogen-clean
rm -rf venv __pycache__

View File

@@ -40,9 +40,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
else:
print("CUDA is not available", file=sys.stderr)
device = "cpu"
mps_available = hasattr(torch.backends, "mps") and torch.backends.mps.is_available()
if mps_available:
device = "mps"
if not torch.cuda.is_available() and request.CUDA:
return backend_pb2.Result(success=False, message="CUDA is not available")

View File

@@ -1,4 +1,4 @@
grpcio==1.74.0
grpcio==1.71.0
protobuf
certifi
packaging==24.1

View File

@@ -12,22 +12,28 @@ export SKIP_CONDA=1
endif
.PHONY: diffusers
diffusers:
diffusers: protogen
bash install.sh
.PHONY: run
run: diffusers
run: protogen
@echo "Running diffusers..."
bash run.sh
@echo "Diffusers run."
test: diffusers
test: protogen
bash test.sh
.PHONY: protogen
protogen: backend_pb2_grpc.py backend_pb2.py
.PHONY: protogen-clean
protogen-clean:
$(RM) backend_pb2_grpc.py backend_pb2.py
backend_pb2_grpc.py backend_pb2.py:
python3 -m grpc_tools.protoc -I../.. -I./ --python_out=. --grpc_python_out=. backend.proto
.PHONY: clean
clean: protogen-clean
rm -rf venv __pycache__

View File

@@ -18,7 +18,7 @@ import backend_pb2_grpc
import grpc
from diffusers import SanaPipeline, StableDiffusion3Pipeline, StableDiffusionXLPipeline, StableDiffusionDepth2ImgPipeline, DPMSolverMultistepScheduler, StableDiffusionPipeline, DiffusionPipeline, \
EulerAncestralDiscreteScheduler, FluxPipeline, FluxTransformer2DModel, QwenImageEditPipeline, AutoencoderKLWan, WanPipeline, WanImageToVideoPipeline
EulerAncestralDiscreteScheduler, FluxPipeline, FluxTransformer2DModel
from diffusers import StableDiffusionImg2ImgPipeline, AutoPipelineForText2Image, ControlNetModel, StableVideoDiffusionPipeline, Lumina2Text2ImgPipeline
from diffusers.pipelines.stable_diffusion import safety_checker
from diffusers.utils import load_image, export_to_video
@@ -72,6 +72,13 @@ def is_float(s):
except ValueError:
return False
def is_int(s):
try:
int(s)
return True
except ValueError:
return False
# The scheduler list mapping was taken from here: https://github.com/neggles/animatediff-cli/blob/6f336f5f4b5e38e85d7f06f1744ef42d0a45f2a7/src/animatediff/schedulers.py#L39
# Credits to https://github.com/neggles
# See https://github.com/huggingface/diffusers/issues/4167 for more details on sched mapping from A1111
@@ -177,10 +184,9 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
key, value = opt.split(":")
# if value is a number, convert it to the appropriate type
if is_float(value):
if value.is_integer():
value = int(value)
else:
value = float(value)
value = float(value)
elif is_int(value):
value = int(value)
self.options[key] = value
# From options, extract if present "torch_dtype" and set it to the appropriate type
@@ -257,9 +263,6 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
elif request.PipelineType == "DiffusionPipeline":
self.pipe = DiffusionPipeline.from_pretrained(request.Model,
torch_dtype=torchType)
elif request.PipelineType == "QwenImageEditPipeline":
self.pipe = QwenImageEditPipeline.from_pretrained(request.Model,
torch_dtype=torchType)
elif request.PipelineType == "VideoDiffusionPipeline":
self.txt2vid = True
self.pipe = DiffusionPipeline.from_pretrained(request.Model,
@@ -328,32 +331,6 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
torch_dtype=torch.bfloat16)
self.pipe.vae.to(torch.bfloat16)
self.pipe.text_encoder.to(torch.bfloat16)
elif request.PipelineType == "WanPipeline":
# WAN2.2 pipeline requires special VAE handling
vae = AutoencoderKLWan.from_pretrained(
request.Model,
subfolder="vae",
torch_dtype=torch.float32
)
self.pipe = WanPipeline.from_pretrained(
request.Model,
vae=vae,
torch_dtype=torchType
)
self.txt2vid = True # WAN2.2 is a text-to-video pipeline
elif request.PipelineType == "WanImageToVideoPipeline":
# WAN2.2 image-to-video pipeline
vae = AutoencoderKLWan.from_pretrained(
request.Model,
subfolder="vae",
torch_dtype=torch.float32
)
self.pipe = WanImageToVideoPipeline.from_pretrained(
request.Model,
vae=vae,
torch_dtype=torchType
)
self.img2vid = True # WAN2.2 image-to-video pipeline
if CLIPSKIP and request.CLIPSkip != 0:
self.clip_skip = request.CLIPSkip
@@ -388,9 +365,6 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
device = "cpu" if not request.CUDA else "cuda"
if XPU:
device = "xpu"
mps_available = hasattr(torch.backends, "mps") and torch.backends.mps.is_available()
if mps_available:
device = "mps"
self.device = device
if request.LoraAdapter:
# Check if its a local file and not a directory ( we load lora differently for a safetensor file )
@@ -495,24 +469,11 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
"num_inference_steps": steps,
}
# Handle image source: prioritize RefImages over request.src
image_src = None
if hasattr(request, 'ref_images') and request.ref_images and len(request.ref_images) > 0:
# Use the first reference image if available
image_src = request.ref_images[0]
print(f"Using reference image: {image_src}", file=sys.stderr)
elif request.src != "":
# Fall back to request.src if no ref_images
image_src = request.src
print(f"Using source image: {image_src}", file=sys.stderr)
else:
print("No image source provided", file=sys.stderr)
if image_src and not self.controlnet and not self.img2vid:
image = Image.open(image_src)
if request.src != "" and not self.controlnet and not self.img2vid:
image = Image.open(request.src)
options["image"] = image
elif self.controlnet and image_src:
pose_image = load_image(image_src)
elif self.controlnet and request.src:
pose_image = load_image(request.src)
options["image"] = pose_image
if CLIPSKIP and self.clip_skip != 0:
@@ -554,11 +515,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
if self.img2vid:
# Load the conditioning image
if image_src:
image = load_image(image_src)
else:
# Fallback to request.src for img2vid if no ref_images
image = load_image(request.src)
image = load_image(request.src)
image = image.resize((1024, 576))
generator = torch.manual_seed(request.seed)
@@ -595,96 +552,6 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
return backend_pb2.Result(message="Media generated", success=True)
def GenerateVideo(self, request, context):
try:
prompt = request.prompt
if not prompt:
return backend_pb2.Result(success=False, message="No prompt provided for video generation")
# Set default values from request or use defaults
num_frames = request.num_frames if request.num_frames > 0 else 81
fps = request.fps if request.fps > 0 else 16
cfg_scale = request.cfg_scale if request.cfg_scale > 0 else 4.0
num_inference_steps = request.step if request.step > 0 else 40
# Prepare generation parameters
kwargs = {
"prompt": prompt,
"negative_prompt": request.negative_prompt if request.negative_prompt else "",
"height": request.height if request.height > 0 else 720,
"width": request.width if request.width > 0 else 1280,
"num_frames": num_frames,
"guidance_scale": cfg_scale,
"num_inference_steps": num_inference_steps,
}
# Add custom options from self.options (including guidance_scale_2 if specified)
kwargs.update(self.options)
# Set seed if provided
if request.seed > 0:
kwargs["generator"] = torch.Generator(device=self.device).manual_seed(request.seed)
# Handle start and end images for video generation
if request.start_image:
kwargs["start_image"] = load_image(request.start_image)
if request.end_image:
kwargs["end_image"] = load_image(request.end_image)
print(f"Generating video with {kwargs=}", file=sys.stderr)
# Generate video frames based on pipeline type
if self.PipelineType == "WanPipeline":
# WAN2.2 text-to-video generation
output = self.pipe(**kwargs)
frames = output.frames[0] # WAN2.2 returns frames in this format
elif self.PipelineType == "WanImageToVideoPipeline":
# WAN2.2 image-to-video generation
if request.start_image:
# Load and resize the input image according to WAN2.2 requirements
image = load_image(request.start_image)
# Use request dimensions or defaults, but respect WAN2.2 constraints
request_height = request.height if request.height > 0 else 480
request_width = request.width if request.width > 0 else 832
max_area = request_height * request_width
aspect_ratio = image.height / image.width
mod_value = self.pipe.vae_scale_factor_spatial * self.pipe.transformer.config.patch_size[1]
height = round((max_area * aspect_ratio) ** 0.5 / mod_value) * mod_value
width = round((max_area / aspect_ratio) ** 0.5 / mod_value) * mod_value
image = image.resize((width, height))
kwargs["image"] = image
kwargs["height"] = height
kwargs["width"] = width
output = self.pipe(**kwargs)
frames = output.frames[0]
elif self.img2vid:
# Generic image-to-video generation
if request.start_image:
image = load_image(request.start_image)
image = image.resize((request.width if request.width > 0 else 1024,
request.height if request.height > 0 else 576))
kwargs["image"] = image
output = self.pipe(**kwargs)
frames = output.frames[0]
elif self.txt2vid:
# Generic text-to-video generation
output = self.pipe(**kwargs)
frames = output.frames[0]
else:
return backend_pb2.Result(success=False, message=f"Pipeline {self.PipelineType} does not support video generation")
# Export video
export_to_video(frames, request.dst, fps=fps)
return backend_pb2.Result(message="Video generated successfully", success=True)
except Exception as err:
print(f"Error generating video: {err}", file=sys.stderr)
traceback.print_exc()
return backend_pb2.Result(success=False, message=f"Error generating video: {err}")
def serve(address):
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS),

View File

@@ -1,12 +1,9 @@
--extra-index-url https://download.pytorch.org/whl/cpu
git+https://github.com/huggingface/diffusers
opencv-python
transformers
torchvision==0.22.1
accelerate
compel
peft
sentencepiece
torch==2.7.1
optimum-quanto
ftfy
optimum-quanto

View File

@@ -1,12 +1,10 @@
--extra-index-url https://download.pytorch.org/whl/cu118
torch==2.7.1+cu118
git+https://github.com/huggingface/diffusers
opencv-python
transformers
torchvision==0.22.1
accelerate
compel
peft
sentencepiece
torch==2.7.1
optimum-quanto
ftfy
optimum-quanto

View File

@@ -1,12 +1,9 @@
--extra-index-url https://download.pytorch.org/whl/cu121
torch==2.7.1
git+https://github.com/huggingface/diffusers
opencv-python
transformers
torchvision
accelerate
compel
peft
sentencepiece
torch
ftfy
optimum-quanto
optimum-quanto

View File

@@ -8,5 +8,4 @@ accelerate
compel
peft
sentencepiece
optimum-quanto
ftfy
optimum-quanto

View File

@@ -12,5 +12,4 @@ accelerate
compel
peft
sentencepiece
optimum-quanto
ftfy
optimum-quanto

View File

@@ -6,7 +6,4 @@ accelerate
compel
peft
optimum-quanto
numpy<2
sentencepiece
torchvision
ftfy
numpy<2

View File

@@ -1,11 +0,0 @@
torch==2.7.1
torchvision==0.22.1
git+https://github.com/huggingface/diffusers
opencv-python
transformers
accelerate
compel
peft
sentencepiece
optimum-quanto
ftfy

View File

@@ -1,5 +1,5 @@
setuptools
grpcio==1.74.0
grpcio==1.71.0
pillow
protobuf
certifi

View File

@@ -12,6 +12,4 @@ if [ -d "/opt/intel" ]; then
export XPU=1
fi
export PYTORCH_ENABLE_MPS_FALLBACK=1
startBackend $@

View File

@@ -1,17 +1,23 @@
.PHONY: exllama2
exllama2:
exllama2: protogen
bash install.sh
.PHONY: run
run: exllama2
run: protogen
@echo "Running exllama2..."
bash run.sh
@echo "exllama2 run."
.PHONY: protogen
protogen: backend_pb2_grpc.py backend_pb2.py
.PHONY: protogen-clean
protogen-clean:
$(RM) backend_pb2_grpc.py backend_pb2.py
backend_pb2_grpc.py backend_pb2.py:
python3 -m grpc_tools.protoc -I../.. -I./ --python_out=. --grpc_python_out=. backend.proto
.PHONY: clean
clean: protogen-clean
$(RM) -r venv source __pycache__

View File

@@ -1,4 +1,4 @@
grpcio==1.74.0
grpcio==1.71.0
protobuf
certifi
wheel

View File

@@ -3,11 +3,18 @@
.PHONY: install
install:
bash install.sh
$(MAKE) protogen
.PHONY: protogen
protogen: backend_pb2_grpc.py backend_pb2.py
.PHONY: protogen-clean
protogen-clean:
$(RM) backend_pb2_grpc.py backend_pb2.py
backend_pb2_grpc.py backend_pb2.py:
bash protogen.sh
.PHONY: clean
clean: protogen-clean
rm -rf venv __pycache__

View File

@@ -10,7 +10,7 @@ import sys
import os
import backend_pb2
import backend_pb2_grpc
import torch
from faster_whisper import WhisperModel
import grpc
@@ -35,9 +35,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
# device = "cuda" if request.CUDA else "cpu"
if request.CUDA:
device = "cuda"
mps_available = hasattr(torch.backends, "mps") and torch.backends.mps.is_available()
if mps_available:
device = "mps"
try:
print("Preparing models, please wait", file=sys.stderr)
self.model = WhisperModel(request.Model, device=device, compute_type="float16")

View File

@@ -1,23 +1,29 @@
.PHONY: kitten-tts
kitten-tts:
kitten-tts: protogen
bash install.sh
.PHONY: run
run: kitten-tts
run: protogen
@echo "Running kitten-tts..."
bash run.sh
@echo "kitten-tts run."
.PHONY: test
test: kitten-tts
test: protogen
@echo "Testing kitten-tts..."
bash test.sh
@echo "kitten-tts tested."
.PHONY: protogen
protogen: backend_pb2_grpc.py backend_pb2.py
.PHONY: protogen-clean
protogen-clean:
$(RM) backend_pb2_grpc.py backend_pb2.py
backend_pb2_grpc.py backend_pb2.py:
python3 -m grpc_tools.protoc -I../.. -I./ --python_out=. --grpc_python_out=. backend.proto
.PHONY: clean
clean: protogen-clean
rm -rf venv __pycache__

View File

@@ -33,6 +33,18 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
def LoadModel(self, request, context):
# Get device
# device = "cuda" if request.CUDA else "cpu"
if torch.cuda.is_available():
print("CUDA is available", file=sys.stderr)
device = "cuda"
else:
print("CUDA is not available", file=sys.stderr)
device = "cpu"
if not torch.cuda.is_available() and request.CUDA:
return backend_pb2.Result(success=False, message="CUDA is not available")
self.AudioPath = None
# List available KittenTTS models
print("Available KittenTTS voices: expr-voice-2-m, expr-voice-2-f, expr-voice-3-m, expr-voice-3-f, expr-voice-4-m, expr-voice-4-f, expr-voice-5-m, expr-voice-5-f")

View File

@@ -1,23 +1,29 @@
.PHONY: kokoro
kokoro:
kokoro: protogen
bash install.sh
.PHONY: run
run: kokoro
run: protogen
@echo "Running kokoro..."
bash run.sh
@echo "kokoro run."
.PHONY: test
test: kokoro
test: protogen
@echo "Testing kokoro..."
bash test.sh
@echo "kokoro tested."
.PHONY: protogen
protogen: backend_pb2_grpc.py backend_pb2.py
.PHONY: protogen-clean
protogen-clean:
$(RM) backend_pb2_grpc.py backend_pb2.py
backend_pb2_grpc.py backend_pb2.py:
python3 -m grpc_tools.protoc -I../.. -I./ --python_out=. --grpc_python_out=. backend.proto
.PHONY: clean
clean: protogen-clean
rm -rf venv __pycache__

View File

@@ -33,6 +33,17 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
def LoadModel(self, request, context):
# Get device
if torch.cuda.is_available():
print("CUDA is available", file=sys.stderr)
device = "cuda"
else:
print("CUDA is not available", file=sys.stderr)
device = "cpu"
if not torch.cuda.is_available() and request.CUDA:
return backend_pb2.Result(success=False, message="CUDA is not available")
try:
print("Preparing Kokoro TTS pipeline, please wait", file=sys.stderr)
# empty dict

View File

@@ -1,23 +0,0 @@
.PHONY: mlx-audio
mlx-audio:
bash install.sh
.PHONY: run
run: mlx-audio
@echo "Running mlx-audio..."
bash run.sh
@echo "mlx run."
.PHONY: test
test: mlx-audio
@echo "Testing mlx-audio..."
bash test.sh
@echo "mlx tested."
.PHONY: protogen-clean
protogen-clean:
$(RM) backend_pb2_grpc.py backend_pb2.py
.PHONY: clean
clean: protogen-clean
rm -rf venv __pycache__

View File

@@ -1,459 +0,0 @@
#!/usr/bin/env python3
import asyncio
from concurrent import futures
import argparse
import signal
import sys
import os
import shutil
import glob
from typing import List
import time
import tempfile
import backend_pb2
import backend_pb2_grpc
import grpc
from mlx_audio.tts.utils import load_model
import soundfile as sf
import numpy as np
import uuid
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
# Implement the BackendServicer class with the service methods
class BackendServicer(backend_pb2_grpc.BackendServicer):
"""
A gRPC servicer that implements the Backend service defined in backend.proto.
This backend provides TTS (Text-to-Speech) functionality using MLX-Audio.
"""
def _is_float(self, s):
"""Check if a string can be converted to float."""
try:
float(s)
return True
except ValueError:
return False
def Health(self, request, context):
"""
Returns a health check message.
Args:
request: The health check request.
context: The gRPC context.
Returns:
backend_pb2.Reply: The health check reply.
"""
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
async def LoadModel(self, request, context):
"""
Loads a TTS model using MLX-Audio.
Args:
request: The load model request.
context: The gRPC context.
Returns:
backend_pb2.Result: The load model result.
"""
try:
print(f"Loading MLX-Audio TTS model: {request.Model}", file=sys.stderr)
print(f"Request: {request}", file=sys.stderr)
# Parse options like in the kokoro backend
options = request.Options
self.options = {}
# The options are a list of strings in this form optname:optvalue
# We store all the options in a dict for later use
for opt in options:
if ":" not in opt:
continue
key, value = opt.split(":", 1) # Split only on first colon to handle values with colons
# Convert numeric values to appropriate types
if self._is_float(value):
if float(value).is_integer():
value = int(value)
else:
value = float(value)
elif value.lower() in ["true", "false"]:
value = value.lower() == "true"
self.options[key] = value
print(f"Options: {self.options}", file=sys.stderr)
# Load the model using MLX-Audio's load_model function
try:
self.tts_model = load_model(request.Model)
self.model_path = request.Model
print(f"TTS model loaded successfully from {request.Model}", file=sys.stderr)
except Exception as model_err:
print(f"Error loading TTS model: {model_err}", file=sys.stderr)
return backend_pb2.Result(success=False, message=f"Failed to load model: {model_err}")
except Exception as err:
print(f"Error loading MLX-Audio TTS model {err=}, {type(err)=}", file=sys.stderr)
return backend_pb2.Result(success=False, message=f"Error loading MLX-Audio TTS model: {err}")
print("MLX-Audio TTS model loaded successfully", file=sys.stderr)
return backend_pb2.Result(message="MLX-Audio TTS model loaded successfully", success=True)
def TTS(self, request, context):
"""
Generates TTS audio from text using MLX-Audio.
Args:
request: A TTSRequest object containing text, model, destination, voice, and language.
context: A grpc.ServicerContext object that provides information about the RPC.
Returns:
A Result object indicating success or failure.
"""
try:
# Check if model is loaded
if not hasattr(self, 'tts_model') or self.tts_model is None:
return backend_pb2.Result(success=False, message="TTS model not loaded. Please call LoadModel first.")
print(f"Generating TTS with MLX-Audio - text: {request.text[:50]}..., voice: {request.voice}, language: {request.language}", file=sys.stderr)
# Handle speed parameter based on model type
speed_value = self._handle_speed_parameter(request, self.model_path)
# Map language names to codes if needed
lang_code = self._map_language_code(request.language, request.voice)
# Prepare generation parameters
gen_params = {
"text": request.text,
"speed": speed_value,
"verbose": False,
}
# Add model-specific parameters
if request.voice and request.voice.strip():
gen_params["voice"] = request.voice
# Check if model supports language codes (primarily Kokoro)
if "kokoro" in self.model_path.lower():
gen_params["lang_code"] = lang_code
# Add pitch and gender for Spark models
if "spark" in self.model_path.lower():
gen_params["pitch"] = 1.0 # Default to moderate
gen_params["gender"] = "female" # Default to female
print(f"Generation parameters: {gen_params}", file=sys.stderr)
# Generate audio using the loaded model
try:
results = self.tts_model.generate(**gen_params)
except Exception as gen_err:
print(f"Error during TTS generation: {gen_err}", file=sys.stderr)
return backend_pb2.Result(success=False, message=f"TTS generation failed: {gen_err}")
# Process the generated audio segments
audio_arrays = []
for segment in results:
audio_arrays.append(segment.audio)
# If no segments, return error
if not audio_arrays:
print("No audio segments generated", file=sys.stderr)
return backend_pb2.Result(success=False, message="No audio generated")
# Concatenate all segments
cat_audio = np.concatenate(audio_arrays, axis=0)
# Generate output filename and path
if request.dst:
output_path = request.dst
else:
unique_id = str(uuid.uuid4())
filename = f"tts_{unique_id}.wav"
output_path = filename
# Write the audio as a WAV
try:
sf.write(output_path, cat_audio, 24000)
print(f"Successfully wrote audio file to {output_path}", file=sys.stderr)
# Verify the file exists and has content
if not os.path.exists(output_path):
print(f"File was not created at {output_path}", file=sys.stderr)
return backend_pb2.Result(success=False, message="Failed to create audio file")
file_size = os.path.getsize(output_path)
if file_size == 0:
print("File was created but is empty", file=sys.stderr)
return backend_pb2.Result(success=False, message="Generated audio file is empty")
print(f"Audio file size: {file_size} bytes", file=sys.stderr)
except Exception as write_err:
print(f"Error writing audio file: {write_err}", file=sys.stderr)
return backend_pb2.Result(success=False, message=f"Failed to save audio: {write_err}")
return backend_pb2.Result(success=True, message=f"TTS audio generated successfully: {output_path}")
except Exception as e:
print(f"Error in MLX-Audio TTS: {e}", file=sys.stderr)
return backend_pb2.Result(success=False, message=f"TTS generation failed: {str(e)}")
async def Predict(self, request, context):
"""
Generates TTS audio based on the given prompt using MLX-Audio TTS.
This is a fallback method for compatibility with the Predict endpoint.
Args:
request: The predict request.
context: The gRPC context.
Returns:
backend_pb2.Reply: The predict result.
"""
try:
# Check if model is loaded
if not hasattr(self, 'tts_model') or self.tts_model is None:
context.set_code(grpc.StatusCode.FAILED_PRECONDITION)
context.set_details("TTS model not loaded. Please call LoadModel first.")
return backend_pb2.Reply(message=bytes("", encoding='utf-8'))
# For TTS, we expect the prompt to contain the text to synthesize
if not request.Prompt:
context.set_code(grpc.StatusCode.INVALID_ARGUMENT)
context.set_details("Prompt is required for TTS generation")
return backend_pb2.Reply(message=bytes("", encoding='utf-8'))
# Handle speed parameter based on model type
speed_value = self._handle_speed_parameter(request, self.model_path)
# Map language names to codes if needed
lang_code = self._map_language_code(None, None) # Use defaults for Predict
# Prepare generation parameters
gen_params = {
"text": request.Prompt,
"speed": speed_value,
"verbose": False,
}
# Add model-specific parameters
if hasattr(self, 'options') and 'voice' in self.options:
gen_params["voice"] = self.options['voice']
# Check if model supports language codes (primarily Kokoro)
if "kokoro" in self.model_path.lower():
gen_params["lang_code"] = lang_code
print(f"Generating TTS with MLX-Audio - text: {request.Prompt[:50]}..., params: {gen_params}", file=sys.stderr)
# Generate audio using the loaded model
try:
results = self.tts_model.generate(**gen_params)
except Exception as gen_err:
print(f"Error during TTS generation: {gen_err}", file=sys.stderr)
context.set_code(grpc.StatusCode.INTERNAL)
context.set_details(f"TTS generation failed: {gen_err}")
return backend_pb2.Reply(message=bytes("", encoding='utf-8'))
# Process the generated audio segments
audio_arrays = []
for segment in results:
audio_arrays.append(segment.audio)
# If no segments, return error
if not audio_arrays:
print("No audio segments generated", file=sys.stderr)
return backend_pb2.Reply(message=bytes("No audio generated", encoding='utf-8'))
# Concatenate all segments
cat_audio = np.concatenate(audio_arrays, axis=0)
duration = len(cat_audio) / 24000 # Assuming 24kHz sample rate
# Return success message with audio information
response = f"TTS audio generated successfully. Duration: {duration:.2f}s, Sample rate: 24000Hz"
return backend_pb2.Reply(message=bytes(response, encoding='utf-8'))
except Exception as e:
print(f"Error in MLX-Audio TTS Predict: {e}", file=sys.stderr)
context.set_code(grpc.StatusCode.INTERNAL)
context.set_details(f"TTS generation failed: {str(e)}")
return backend_pb2.Reply(message=bytes("", encoding='utf-8'))
def _handle_speed_parameter(self, request, model_path):
"""
Handle speed parameter based on model type.
Args:
request: The TTSRequest object.
model_path: The model path to determine model type.
Returns:
float: The processed speed value.
"""
# Get speed from options if available
speed = 1.0
if hasattr(self, 'options') and 'speed' in self.options:
speed = self.options['speed']
# Handle speed parameter based on model type
if "spark" in model_path.lower():
# Spark actually expects float values that map to speed descriptions
speed_map = {
"very_low": 0.0,
"low": 0.5,
"moderate": 1.0,
"high": 1.5,
"very_high": 2.0,
}
if isinstance(speed, str) and speed in speed_map:
speed_value = speed_map[speed]
else:
# Try to use as float, default to 1.0 (moderate) if invalid
try:
speed_value = float(speed)
if speed_value not in [0.0, 0.5, 1.0, 1.5, 2.0]:
speed_value = 1.0 # Default to moderate
except:
speed_value = 1.0 # Default to moderate
else:
# Other models use float speed values
try:
speed_value = float(speed)
if speed_value < 0.5 or speed_value > 2.0:
speed_value = 1.0 # Default to 1.0 if out of range
except ValueError:
speed_value = 1.0 # Default to 1.0 if invalid
return speed_value
def _map_language_code(self, language, voice):
"""
Map language names to codes if needed.
Args:
language: The language parameter from the request.
voice: The voice parameter from the request.
Returns:
str: The language code.
"""
if not language:
# Default to voice[0] if not found
return voice[0] if voice else "a"
# Map language names to codes if needed
language_map = {
"american_english": "a",
"british_english": "b",
"spanish": "e",
"french": "f",
"hindi": "h",
"italian": "i",
"portuguese": "p",
"japanese": "j",
"mandarin_chinese": "z",
# Also accept direct language codes
"a": "a", "b": "b", "e": "e", "f": "f", "h": "h", "i": "i", "p": "p", "j": "j", "z": "z",
}
return language_map.get(language.lower(), language)
def _build_generation_params(self, request, default_speed=1.0):
"""
Build generation parameters from request attributes and options for MLX-Audio TTS.
Args:
request: The gRPC request.
default_speed: Default speed if not specified.
Returns:
dict: Generation parameters for MLX-Audio
"""
# Initialize generation parameters for MLX-Audio TTS
generation_params = {
'speed': default_speed,
'voice': 'af_heart', # Default voice
'lang_code': 'a', # Default language code
}
# Extract parameters from request attributes
if hasattr(request, 'Temperature') and request.Temperature > 0:
# Temperature could be mapped to speed variation
generation_params['speed'] = 1.0 + (request.Temperature - 0.5) * 0.5
# Override with options if available
if hasattr(self, 'options'):
# Speed from options
if 'speed' in self.options:
generation_params['speed'] = self.options['speed']
# Voice from options
if 'voice' in self.options:
generation_params['voice'] = self.options['voice']
# Language code from options
if 'lang_code' in self.options:
generation_params['lang_code'] = self.options['lang_code']
# Model-specific parameters
param_option_mapping = {
'temp': 'speed',
'temperature': 'speed',
'top_p': 'speed', # Map top_p to speed variation
}
for option_key, param_key in param_option_mapping.items():
if option_key in self.options:
if param_key == 'speed':
# Ensure speed is within reasonable bounds
speed_val = float(self.options[option_key])
if 0.5 <= speed_val <= 2.0:
generation_params[param_key] = speed_val
return generation_params
async def serve(address):
# Start asyncio gRPC server
server = grpc.aio.server(migration_thread_pool=futures.ThreadPoolExecutor(max_workers=MAX_WORKERS),
options=[
('grpc.max_message_length', 50 * 1024 * 1024), # 50MB
('grpc.max_send_message_length', 50 * 1024 * 1024), # 50MB
('grpc.max_receive_message_length', 50 * 1024 * 1024), # 50MB
])
# Add the servicer to the server
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
# Bind the server to the address
server.add_insecure_port(address)
# Gracefully shutdown the server on SIGTERM or SIGINT
loop = asyncio.get_event_loop()
for sig in (signal.SIGINT, signal.SIGTERM):
loop.add_signal_handler(
sig, lambda: asyncio.ensure_future(server.stop(5))
)
# Start the server
await server.start()
print("MLX-Audio TTS Server started. Listening on: " + address, file=sys.stderr)
# Wait for the server to be terminated
await server.wait_for_termination()
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Run the MLX-Audio TTS gRPC server.")
parser.add_argument(
"--addr", default="localhost:50051", help="The address to bind the server to."
)
args = parser.parse_args()
asyncio.run(serve(args.addr))

View File

@@ -1,14 +0,0 @@
#!/bin/bash
set -e
USE_PIP=true
backend_dir=$(dirname $0)
if [ -d $backend_dir/common ]; then
source $backend_dir/common/libbackend.sh
else
source $backend_dir/../common/libbackend.sh
fi
installRequirements

View File

@@ -1 +0,0 @@
git+https://github.com/Blaizzy/mlx-audio

View File

@@ -1,7 +0,0 @@
grpcio==1.71.0
protobuf
certifi
setuptools
mlx-audio
soundfile
numpy

View File

@@ -1,11 +0,0 @@
#!/bin/bash
backend_dir=$(dirname $0)
if [ -d $backend_dir/common ]; then
source $backend_dir/common/libbackend.sh
else
source $backend_dir/../common/libbackend.sh
fi
startBackend $@

View File

@@ -1,142 +0,0 @@
import unittest
import subprocess
import time
import backend_pb2
import backend_pb2_grpc
import grpc
import unittest
import subprocess
import time
import grpc
import backend_pb2_grpc
import backend_pb2
class TestBackendServicer(unittest.TestCase):
"""
TestBackendServicer is the class that tests the gRPC service.
This class contains methods to test the startup and shutdown of the gRPC service.
"""
def setUp(self):
self.service = subprocess.Popen(["python", "backend.py", "--addr", "localhost:50051"])
time.sleep(10)
def tearDown(self) -> None:
self.service.terminate()
self.service.wait()
def test_server_startup(self):
try:
self.setUp()
with grpc.insecure_channel("localhost:50051") as channel:
stub = backend_pb2_grpc.BackendStub(channel)
response = stub.Health(backend_pb2.HealthMessage())
self.assertEqual(response.message, b'OK')
except Exception as err:
print(err)
self.fail("Server failed to start")
finally:
self.tearDown()
def test_load_model(self):
"""
This method tests if the TTS model is loaded successfully
"""
try:
self.setUp()
with grpc.insecure_channel("localhost:50051") as channel:
stub = backend_pb2_grpc.BackendStub(channel)
response = stub.LoadModel(backend_pb2.ModelOptions(Model="mlx-community/Kokoro-82M-4bit"))
self.assertTrue(response.success)
self.assertEqual(response.message, "MLX-Audio TTS model loaded successfully")
except Exception as err:
print(err)
self.fail("LoadModel service failed")
finally:
self.tearDown()
def test_tts_generation(self):
"""
This method tests if TTS audio is generated successfully
"""
try:
self.setUp()
with grpc.insecure_channel("localhost:50051") as channel:
stub = backend_pb2_grpc.BackendStub(channel)
response = stub.LoadModel(backend_pb2.ModelOptions(Model="mlx-community/Kokoro-82M-4bit"))
self.assertTrue(response.success)
# Test TTS generation
tts_req = backend_pb2.TTSRequest(
text="Hello, this is a test of the MLX-Audio TTS system.",
model="mlx-community/Kokoro-82M-4bit",
voice="af_heart",
language="a"
)
tts_resp = stub.TTS(tts_req)
self.assertTrue(tts_resp.success)
self.assertIn("TTS audio generated successfully", tts_resp.message)
except Exception as err:
print(err)
self.fail("TTS service failed")
finally:
self.tearDown()
def test_tts_with_options(self):
"""
This method tests if TTS works with various options and parameters
"""
try:
self.setUp()
with grpc.insecure_channel("localhost:50051") as channel:
stub = backend_pb2_grpc.BackendStub(channel)
response = stub.LoadModel(backend_pb2.ModelOptions(
Model="mlx-community/Kokoro-82M-4bit",
Options=["voice:af_soft", "speed:1.2", "lang_code:b"]
))
self.assertTrue(response.success)
# Test TTS generation with different voice and language
tts_req = backend_pb2.TTSRequest(
text="Hello, this is a test with British English accent.",
model="mlx-community/Kokoro-82M-4bit",
voice="af_soft",
language="b"
)
tts_resp = stub.TTS(tts_req)
self.assertTrue(tts_resp.success)
self.assertIn("TTS audio generated successfully", tts_resp.message)
except Exception as err:
print(err)
self.fail("TTS with options service failed")
finally:
self.tearDown()
def test_tts_multilingual(self):
"""
This method tests if TTS works with different languages
"""
try:
self.setUp()
with grpc.insecure_channel("localhost:50051") as channel:
stub = backend_pb2_grpc.BackendStub(channel)
response = stub.LoadModel(backend_pb2.ModelOptions(Model="mlx-community/Kokoro-82M-4bit"))
self.assertTrue(response.success)
# Test Spanish TTS
tts_req = backend_pb2.TTSRequest(
text="Hola, esto es una prueba del sistema TTS MLX-Audio.",
model="mlx-community/Kokoro-82M-4bit",
voice="af_heart",
language="e"
)
tts_resp = stub.TTS(tts_req)
self.assertTrue(tts_resp.success)
self.assertIn("TTS audio generated successfully", tts_resp.message)
except Exception as err:
print(err)
self.fail("Multilingual TTS service failed")
finally:
self.tearDown()

View File

@@ -1,12 +0,0 @@
#!/bin/bash
set -e
backend_dir=$(dirname $0)
if [ -d $backend_dir/common ]; then
source $backend_dir/common/libbackend.sh
else
source $backend_dir/../common/libbackend.sh
fi
runUnittests

Some files were not shown because too many files have changed in this diff Show More