test CI (remove me)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
chore(deps): switch to ubuntu 24.04
2026-02-05 04:02:45 -05:00 · 2025-07-04 18:49:15 +02:00 · 2025-07-04 18:48:53 +02:00
196 changed files with 2759 additions and 4360 deletions
--- a/.devcontainer-scripts/poststart.sh
+++ b/.devcontainer-scripts/poststart.sh
@@ -2,6 +2,9 @@

 cd /workspace

+# Grab the pre-stashed backend assets to avoid build issues
+cp -r /build/backend-assets /workspace/backend-assets
+
 # Ensures generated source files are present upon load
 make prepare

--- a/.devcontainer/docker-compose-devcontainer.yml
+++ b/.devcontainer/docker-compose-devcontainer.yml
@@ -4,6 +4,9 @@ services:
      context: ..
      dockerfile: Dockerfile
      target: devcontainer
+      args:
+      - FFMPEG=true
+      - GO_TAGS=p2p tts
    env_file:
      - ../.env
    ports:
--- a/.dockerignore
+++ b/.dockerignore
@@ -3,9 +3,7 @@
 .vscode
 .devcontainer
 models
-backends
 examples/chatbot-ui/models
-backend/go/image/stablediffusion-ggml/build/
 examples/rwkv/models
 examples/**/models
 Dockerfile*
@@ -16,4 +14,4 @@ __pycache__

 # backend virtual environments
 **/venv
-backend/python/**/source
+backend/python/**/source
--- a/.env
+++ b/.env
@@ -41,6 +41,13 @@
 ## Uncomment and set to true to enable rebuilding from source
 # REBUILD=true

+## Enable go tags, available: p2p, tts
+## p2p: enable distributed inferencing
+## tts: enables text-to-speech with go-piper 
+## (requires REBUILD=true)
+#
+# GO_TAGS=p2p
+
 ## Path where to store generated images
 # LOCALAI_IMAGE_PATH=/tmp/generated/images

--- a/.github/bump_deps.sh
+++ b/.github/bump_deps.sh
@@ -3,20 +3,15 @@ set -xe
 REPO=$1
 BRANCH=$2
 VAR=$3
-FILE=$4
-
-if [ -z "$FILE" ]; then
-    FILE="Makefile"
-fi

 LAST_COMMIT=$(curl -s -H "Accept: application/vnd.github.VERSION.sha" "https://api.github.com/repos/$REPO/commits/$BRANCH")

 # Read $VAR from Makefile (only first match)
 set +e
-CURRENT_COMMIT="$(grep -m1 "^$VAR?=" $FILE | cut -d'=' -f2)"
+CURRENT_COMMIT="$(grep -m1 "^$VAR?=" Makefile | cut -d'=' -f2)"
 set -e

-sed -i $FILE -e "s/$VAR?=.*/$VAR?=$LAST_COMMIT/"
+sed -i Makefile -e "s/$VAR?=.*/$VAR?=$LAST_COMMIT/"

 if [ -z "$CURRENT_COMMIT" ]; then
    echo "Could not find $VAR in Makefile."
--- a/.github/workflows/backend.yml
+++ b/.github/workflows/backend.yml
--- a/.github/workflows/backend_build.yml
+++ b/.github/workflows/backend_build.yml
@@ -49,10 +49,6 @@ on:
        description: 'Build Dockerfile'
        required: true
        type: string
-      skip-drivers:
-        description: 'Skip drivers'
-        default: 'false'
-        type: string
    secrets:
      dockerUsername:
        required: true
@@ -64,7 +60,7 @@ on:
        required: true

 jobs:
-  backend-build:
+  reusable_python_backend-build:
    runs-on: ${{ inputs.runs-on }}
    steps:

@@ -201,13 +197,12 @@ jobs:
          builder: ${{ steps.buildx.outputs.name }}
          build-args: |
            BUILD_TYPE=${{ inputs.build-type }}
-            SKIP_DRIVERS=${{ inputs.skip-drivers }}
            CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }}
            CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }}
            BASE_IMAGE=${{ inputs.base-image }}
            BACKEND=${{ inputs.backend }}
-          context: ${{ inputs.context }}
-          file: ${{ inputs.dockerfile }}
+          context: ./backend
+          file: ./backend/Dockerfile.python
          cache-from: type=gha
          platforms: ${{ inputs.platforms }}
          push: ${{ github.event_name != 'pull_request' }}
@@ -221,13 +216,12 @@ jobs:
          builder: ${{ steps.buildx.outputs.name }}
          build-args: |
            BUILD_TYPE=${{ inputs.build-type }}
-            SKIP_DRIVERS=${{ inputs.skip-drivers }}
            CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }}
            CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }}
            BASE_IMAGE=${{ inputs.base-image }}
            BACKEND=${{ inputs.backend }}
-          context: ${{ inputs.context }}
-          file: ${{ inputs.dockerfile }}
+          context: ./backend
+          file: ./backend/Dockerfile.python
          cache-from: type=gha
          platforms: ${{ inputs.platforms }}
          push: true
--- a/.github/workflows/build-test.yaml
+++ b/.github/workflows/build-test.yaml
@@ -1,23 +0,0 @@
-name: Build test
-
-on:
-  push:
-    branches:
-      - master
-  pull_request:
-
-jobs:
-  build-test:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-      - name: Set up Go
-        uses: actions/setup-go@v5
-        with:
-          go-version: 1.23
-      - name: Run GoReleaser
-        run: |
-          make dev-dist
--- a/.github/workflows/bump_deps.yaml
+++ b/.github/workflows/bump_deps.yaml
@@ -10,32 +10,30 @@ jobs:
      matrix:
        include:
          - repository: "ggml-org/llama.cpp"
-            variable: "LLAMA_VERSION"
+            variable: "CPPLLAMA_VERSION"
            branch: "master"
-            file: "backend/cpp/llama-cpp/Makefile"
          - repository: "ggml-org/whisper.cpp"
            variable: "WHISPER_CPP_VERSION"
            branch: "master"
-            file: "backend/go/whisper/Makefile"
          - repository: "PABannier/bark.cpp"
            variable: "BARKCPP_VERSION"
            branch: "main"
-            file: "Makefile"
-          - repository: "richiejp/stable-diffusion.cpp"
+          - repository: "leejet/stable-diffusion.cpp"
            variable: "STABLEDIFFUSION_GGML_VERSION"
            branch: "master"
-            file: "backend/go/stablediffusion-ggml/Makefile"
+          - repository: "mudler/go-stable-diffusion"
+            variable: "STABLEDIFFUSION_VERSION"
+            branch: "master"
          - repository: "mudler/go-piper"
            variable: "PIPER_VERSION"
            branch: "master"
-            file: "backend/go/piper/Makefile"
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - name: Bump dependencies 🔧
        id: bump
        run: |
-          bash .github/bump_deps.sh ${{ matrix.repository }} ${{ matrix.branch }} ${{ matrix.variable }} ${{ matrix.file }}
+          bash .github/bump_deps.sh ${{ matrix.repository }} ${{ matrix.branch }} ${{ matrix.variable }}
          {
            echo 'message<<EOF'
            cat "${{ matrix.variable }}_message.txt"
--- a/.github/workflows/checksum_checker.yaml
+++ b/.github/workflows/checksum_checker.yaml
@@ -20,6 +20,7 @@ jobs:
        run: |
          sudo apt-get update
          sudo apt-get install -y pip wget
+          sudo pip install --upgrade pip
          pip install huggingface_hub
      - name: 'Setup yq'
        uses: dcarbone/install-yq-action@v1.3.1
--- a/.github/workflows/deploy-explorer.yaml
+++ b/.github/workflows/deploy-explorer.yaml
@@ -31,7 +31,7 @@ jobs:
          make protogen-go
      - name: Build api
        run: |
-          CGO_ENABLED=0 make build
+          CGO_ENABLED=0 make build-api
      - name: rm
        uses: appleboy/ssh-action@v1.2.2
        with:
--- a/.github/workflows/generate_grpc_cache.yaml
+++ b/.github/workflows/generate_grpc_cache.yaml
@@ -16,7 +16,7 @@ jobs:
    strategy:
      matrix:
        include:
-          - grpc-base-image: ubuntu:22.04
+          - grpc-base-image: ubuntu:24.04
            runs-on: 'ubuntu-latest'
            platforms: 'linux/amd64,linux/arm64'
    runs-on: ${{matrix.runs-on}}
--- a/.github/workflows/generate_intel_image.yaml
+++ b/.github/workflows/generate_intel_image.yaml
@@ -15,7 +15,7 @@ jobs:
    strategy:
      matrix:
        include:
-          - base-image: intel/oneapi-basekit:2025.2.0-0-devel-ubuntu22.04
+          - base-image: intel/oneapi-basekit:2025.1.3-0-devel-ubuntu24.04
            runs-on: 'ubuntu-latest'
            platforms: 'linux/amd64'
    runs-on: ${{matrix.runs-on}}
--- a/.github/workflows/image-pr.yml
+++ b/.github/workflows/image-pr.yml
@@ -14,6 +14,7 @@ jobs:
    with:
      tag-latest: ${{ matrix.tag-latest }}
      tag-suffix: ${{ matrix.tag-suffix }}
+      ffmpeg: ${{ matrix.ffmpeg }}
      build-type: ${{ matrix.build-type }}
      cuda-major-version: ${{ matrix.cuda-major-version }}
      cuda-minor-version: ${{ matrix.cuda-minor-version }}
@@ -39,30 +40,34 @@ jobs:
            cuda-minor-version: "0"
            platforms: 'linux/amd64'
            tag-latest: 'false'
-            tag-suffix: '-gpu-nvidia-cuda12'
+            tag-suffix: '-gpu-nvidia-cuda12-ffmpeg'
+            ffmpeg: 'true'
            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
            makeflags: "--jobs=3 --output-sync=target"
          - build-type: 'hipblas'
            platforms: 'linux/amd64'
            tag-latest: 'false'
            tag-suffix: '-hipblas'
-            base-image: "rocm/dev-ubuntu-22.04:6.1"
-            grpc-base-image: "ubuntu:22.04"
+            ffmpeg: 'false'
+            base-image: "rocm/dev-ubuntu-24.04:6.4.1"
+            grpc-base-image: "ubuntu:24.04"
            runs-on: 'ubuntu-latest'
            makeflags: "--jobs=3 --output-sync=target"
          - build-type: 'sycl_f16'
            platforms: 'linux/amd64'
            tag-latest: 'false'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
-            grpc-base-image: "ubuntu:22.04"
-            tag-suffix: 'sycl-f16'
+            grpc-base-image: "ubuntu:24.04"
+            tag-suffix: 'sycl-f16-ffmpeg'
+            ffmpeg: 'true'
            runs-on: 'ubuntu-latest'
            makeflags: "--jobs=3 --output-sync=target"
          - build-type: 'vulkan'
            platforms: 'linux/amd64'
            tag-latest: 'false'
-            tag-suffix: '-vulkan-core'
+            tag-suffix: '-vulkan-ffmpeg-core'
+            ffmpeg: 'true'
            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
            makeflags: "--jobs=4 --output-sync=target"
--- a/.github/workflows/image.yml
+++ b/.github/workflows/image.yml
@@ -18,6 +18,7 @@ jobs:
    with:
      tag-latest: ${{ matrix.tag-latest }}
      tag-suffix: ${{ matrix.tag-suffix }}
+      ffmpeg: ${{ matrix.ffmpeg }}
      build-type: ${{ matrix.build-type }}
      cuda-major-version: ${{ matrix.cuda-major-version }}
      cuda-minor-version: ${{ matrix.cuda-minor-version }}
@@ -39,8 +40,9 @@ jobs:
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-hipblas'
-            base-image: "rocm/dev-ubuntu-22.04:6.1"
-            grpc-base-image: "ubuntu:22.04"
+            ffmpeg: 'true'
+            base-image: "rocm/dev-ubuntu-24.04:6.4.1"
+            grpc-base-image: "ubuntu:24.04"
            runs-on: 'ubuntu-latest'
            makeflags: "--jobs=3 --output-sync=target"
            aio: "-aio-gpu-hipblas"
@@ -50,6 +52,7 @@ jobs:
    with:
      tag-latest: ${{ matrix.tag-latest }}
      tag-suffix: ${{ matrix.tag-suffix }}
+      ffmpeg: ${{ matrix.ffmpeg }}
      build-type: ${{ matrix.build-type }}
      cuda-major-version: ${{ matrix.cuda-major-version }}
      cuda-minor-version: ${{ matrix.cuda-minor-version }}
@@ -73,7 +76,8 @@ jobs:
            platforms: 'linux/amd64,linux/arm64'
            tag-latest: 'auto'
            tag-suffix: ''
-            base-image: "ubuntu:22.04"
+            ffmpeg: 'true'
+            base-image: "ubuntu:24.04"
            runs-on: 'ubuntu-latest'
            aio: "-aio-cpu"
            makeflags: "--jobs=4 --output-sync=target"
@@ -84,8 +88,9 @@ jobs:
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda11'
+            ffmpeg: 'true'
            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
            makeflags: "--jobs=4 --output-sync=target"
            skip-drivers: 'false'
            aio: "-aio-gpu-nvidia-cuda-11"
@@ -95,8 +100,9 @@ jobs:
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda12'
+            ffmpeg: 'true'
            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
            skip-drivers: 'false'
            makeflags: "--jobs=4 --output-sync=target"
            aio: "-aio-gpu-nvidia-cuda-12"
@@ -104,8 +110,9 @@ jobs:
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-vulkan'
+            ffmpeg: 'true'
            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
            skip-drivers: 'false'
            makeflags: "--jobs=4 --output-sync=target"
            aio: "-aio-gpu-vulkan"
@@ -113,8 +120,9 @@ jobs:
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
-            grpc-base-image: "ubuntu:22.04"
+            grpc-base-image: "ubuntu:24.04"
            tag-suffix: '-gpu-intel-f16'
+            ffmpeg: 'true'
            runs-on: 'ubuntu-latest'
            makeflags: "--jobs=3 --output-sync=target"
            aio: "-aio-gpu-intel-f16"
@@ -122,8 +130,9 @@ jobs:
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
-            grpc-base-image: "ubuntu:22.04"
+            grpc-base-image: "ubuntu:24.04"
            tag-suffix: '-gpu-intel-f32'
+            ffmpeg: 'true'
            runs-on: 'ubuntu-latest'
            makeflags: "--jobs=3 --output-sync=target"
            aio: "-aio-gpu-intel-f32"
@@ -133,6 +142,7 @@ jobs:
    with:
      tag-latest: ${{ matrix.tag-latest }}
      tag-suffix: ${{ matrix.tag-suffix }}
+      ffmpeg: ${{ matrix.ffmpeg }}
      build-type: ${{ matrix.build-type }}
      cuda-major-version: ${{ matrix.cuda-major-version }}
      cuda-minor-version: ${{ matrix.cuda-minor-version }}
@@ -157,6 +167,7 @@ jobs:
            platforms: 'linux/arm64'
            tag-latest: 'auto'
            tag-suffix: '-nvidia-l4t-arm64'
+            ffmpeg: 'true'
            base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
            runs-on: 'ubuntu-24.04-arm'
            makeflags: "--jobs=4 --output-sync=target"
--- a/.github/workflows/image_build.yml
+++ b/.github/workflows/image_build.yml
@@ -37,6 +37,10 @@ on:
        description: 'Tag suffix'
        default: ''
        type: string
+      ffmpeg:
+        description: 'FFMPEG'
+        default: ''
+        type: string
      skip-drivers:
        description: 'Skip drivers by default'
        default: 'false'
@@ -232,6 +236,7 @@ jobs:
            BUILD_TYPE=${{ inputs.build-type }}
            CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }}
            CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }}
+            FFMPEG=${{ inputs.ffmpeg }}
            BASE_IMAGE=${{ inputs.base-image }}
            GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }}
            GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
@@ -259,6 +264,7 @@ jobs:
            BUILD_TYPE=${{ inputs.build-type }}
            CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }}
            CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }}
+            FFMPEG=${{ inputs.ffmpeg }}
            BASE_IMAGE=${{ inputs.base-image }}
            GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }}
            GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
--- a/.github/workflows/notify-models.yaml
+++ b/.github/workflows/notify-models.yaml
@@ -96,7 +96,7 @@ jobs:
    - name: Start LocalAI
      run: |
        echo "Starting LocalAI..."
-        docker run -e -ti -d --name local-ai -p 8080:8080 localai/localai:master run --debug $MODEL_NAME
+        docker run -e -ti -d --name local-ai -p 8080:8080 localai/localai:master-ffmpeg-core run --debug $MODEL_NAME
        until [ "`docker inspect -f {{.State.Health.Status}} local-ai`" == "healthy" ]; do echo "Waiting for container to be ready";  docker logs --tail 10 local-ai; sleep 2; done
      # Check the PR diff using the current branch and the base branch of the PR
    - uses: GrantBirki/git-diff-action@v2.8.1
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -1,26 +1,375 @@
-name: goreleaser
+name: Build and Release

 on:
  push:
+    branches:
+      - master
    tags:
      - 'v*'
+  pull_request:
+
+env:
+  GRPC_VERSION: v1.65.0
+
+permissions:
+  contents: write
+
+concurrency:
+  group: ci-releases-${{ github.head_ref || github.ref }}-${{ github.repository }}
+  cancel-in-progress: true

 jobs:
-  goreleaser:
+
+  build-linux-arm:
    runs-on: ubuntu-latest
    steps:
-      - name: Checkout
+      - name: Clone
        uses: actions/checkout@v4
        with:
-          fetch-depth: 0
-      - name: Set up Go
-        uses: actions/setup-go@v5
+          submodules: true
+      - uses: actions/setup-go@v5
        with:
-          go-version: 1.23
-      - name: Run GoReleaser
-        uses: goreleaser/goreleaser-action@v6
-        with:
-          version: v2.11.0
-          args: release --clean
+          go-version: '1.21.x'
+          cache: false
+      - name: Dependencies
+        run: |
+          sudo apt-get update
+          sudo apt-get install build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk
+          sudo apt-get install -qy binutils-aarch64-linux-gnu gcc-aarch64-linux-gnu g++-aarch64-linux-gnu libgmock-dev
+          make install-go-tools
+      - name: Install CUDA Dependencies
+        run: |
+          curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/cross-linux-aarch64/cuda-keyring_1.1-1_all.deb
+          sudo dpkg -i cuda-keyring_1.1-1_all.deb
+          sudo apt-get update
+          sudo apt-get install -y cuda-cross-aarch64 cuda-nvcc-cross-aarch64-${CUDA_VERSION} libcublas-cross-aarch64-${CUDA_VERSION}
        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 
+          CUDA_VERSION: 12-4
+      - name: Cache grpc
+        id: cache-grpc
+        uses: actions/cache@v4
+        with:
+          path: grpc
+          key: ${{ runner.os }}-arm-grpc-${{ env.GRPC_VERSION }}
+      - name: Build grpc
+        if: steps.cache-grpc.outputs.cache-hit != 'true'
+        run: |
+
+          git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
+          cd grpc && sed -i "216i\  TESTONLY" "third_party/abseil-cpp/absl/container/CMakeLists.txt" && mkdir -p cmake/build && \
+          cd cmake/build && cmake -DgRPC_INSTALL=ON \
+            -DgRPC_BUILD_TESTS=OFF \
+            ../.. && sudo make --jobs 5 --output-sync=target
+      - name: Install gRPC
+        run: |
+          GNU_HOST=aarch64-linux-gnu
+          C_COMPILER_ARM_LINUX=$GNU_HOST-gcc
+          CXX_COMPILER_ARM_LINUX=$GNU_HOST-g++
+
+          CROSS_TOOLCHAIN=/usr/$GNU_HOST
+          CROSS_STAGING_PREFIX=$CROSS_TOOLCHAIN/stage
+          CMAKE_CROSS_TOOLCHAIN=/tmp/arm.toolchain.cmake
+
+          # https://cmake.org/cmake/help/v3.13/manual/cmake-toolchains.7.html#cross-compiling-for-linux
+          echo "set(CMAKE_SYSTEM_NAME Linux)" >> $CMAKE_CROSS_TOOLCHAIN && \
+            echo "set(CMAKE_SYSTEM_PROCESSOR arm)" >> $CMAKE_CROSS_TOOLCHAIN && \
+            echo "set(CMAKE_STAGING_PREFIX $CROSS_STAGING_PREFIX)" >> $CMAKE_CROSS_TOOLCHAIN && \
+            echo "set(CMAKE_SYSROOT ${CROSS_TOOLCHAIN}/sysroot)" >> $CMAKE_CROSS_TOOLCHAIN && \
+            echo "set(CMAKE_C_COMPILER /usr/bin/$C_COMPILER_ARM_LINUX)" >> $CMAKE_CROSS_TOOLCHAIN && \
+            echo "set(CMAKE_CXX_COMPILER /usr/bin/$CXX_COMPILER_ARM_LINUX)" >> $CMAKE_CROSS_TOOLCHAIN && \
+            echo "set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)" >> $CMAKE_CROSS_TOOLCHAIN && \
+            echo "set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)" >> $CMAKE_CROSS_TOOLCHAIN && \
+            echo "set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)" >> $CMAKE_CROSS_TOOLCHAIN && \
+            echo "set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)" >> $CMAKE_CROSS_TOOLCHAIN
+          GRPC_DIR=$PWD/grpc
+          cd grpc && cd cmake/build && sudo make --jobs 5 --output-sync=target install && \
+          GRPC_CROSS_BUILD_DIR=$GRPC_DIR/cmake/cross_build && \
+          mkdir -p $GRPC_CROSS_BUILD_DIR && \
+          cd $GRPC_CROSS_BUILD_DIR && \
+          cmake -DCMAKE_TOOLCHAIN_FILE=$CMAKE_CROSS_TOOLCHAIN \
+            -DCMAKE_BUILD_TYPE=Release \
+            -DCMAKE_INSTALL_PREFIX=$CROSS_TOOLCHAIN/grpc_install \
+            ../.. && \
+          sudo make -j`nproc` install
+      - name: Build
+        id: build
+        run: |
+          GNU_HOST=aarch64-linux-gnu
+          C_COMPILER_ARM_LINUX=$GNU_HOST-gcc
+          CXX_COMPILER_ARM_LINUX=$GNU_HOST-g++
+
+          CROSS_TOOLCHAIN=/usr/$GNU_HOST
+          CROSS_STAGING_PREFIX=$CROSS_TOOLCHAIN/stage
+          CMAKE_CROSS_TOOLCHAIN=/tmp/arm.toolchain.cmake
+          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
+          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
+          export PATH=$PATH:$GOPATH/bin
+          export PATH=/usr/local/cuda/bin:$PATH
+          sudo rm -rf /usr/aarch64-linux-gnu/lib/libstdc++.so.6
+          sudo cp -rf /usr/aarch64-linux-gnu/lib/libstdc++.so* /usr/aarch64-linux-gnu/lib/libstdc++.so.6
+          sudo cp /usr/aarch64-linux-gnu/lib/ld-linux-aarch64.so.1 ld.so
+          BACKEND_LIBS="./grpc/cmake/cross_build/third_party/re2/libre2.a ./grpc/cmake/cross_build/libgrpc.a ./grpc/cmake/cross_build/libgrpc++.a ./grpc/cmake/cross_build/third_party/protobuf/libprotobuf.a /usr/aarch64-linux-gnu/lib/libc.so.6 /usr/aarch64-linux-gnu/lib/libstdc++.so.6 /usr/aarch64-linux-gnu/lib/libgomp.so.1 /usr/aarch64-linux-gnu/lib/libm.so.6 /usr/aarch64-linux-gnu/lib/libgcc_s.so.1 /usr/aarch64-linux-gnu/lib/libdl.so.2 /usr/aarch64-linux-gnu/lib/libpthread.so.0 ./ld.so" \
+          GOOS=linux \
+          GOARCH=arm64 \
+          CMAKE_ARGS="-DProtobuf_INCLUDE_DIRS=$CROSS_STAGING_PREFIX/include -DProtobuf_DIR=$CROSS_STAGING_PREFIX/lib/cmake/protobuf -DgRPC_DIR=$CROSS_STAGING_PREFIX/lib/cmake/grpc -DCMAKE_TOOLCHAIN_FILE=$CMAKE_CROSS_TOOLCHAIN -DCMAKE_C_COMPILER=aarch64-linux-gnu-gcc -DCMAKE_CXX_COMPILER=aarch64-linux-gnu-g++" make dist-cross-linux-arm64
+      - uses: actions/upload-artifact@v4
+        with:
+          name: LocalAI-linux-arm64
+          path: release/
+      - name: Release
+        uses: softprops/action-gh-release@v2
+        if: startsWith(github.ref, 'refs/tags/')
+        with:
+          files: |
+            release/*
+      - name: Setup tmate session if tests fail
+        if: ${{ failure() }}
+        uses: mxschmitt/action-tmate@v3.22
+        with:
+          detached: true
+          connect-timeout-seconds: 180
+          limit-access-to-actor: true
+  build-linux:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Free Disk Space (Ubuntu)
+        uses: jlumbroso/free-disk-space@main
+        with:
+          # this might remove tools that are actually needed,
+          # if set to "true" but frees about 6 GB
+          tool-cache: true
+          # all of these default to true, but feel free to set to
+          # "false" if necessary for your workflow
+          android: true
+          dotnet: true
+          haskell: true
+          large-packages: true
+          docker-images: true
+          swap-storage: true
+
+      - name: Release space from worker
+        run: |
+          echo "Listing top largest packages"
+          pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
+          head -n 30 <<< "${pkgs}"
+          echo
+          df -h
+          echo
+          sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
+          sudo apt-get remove --auto-remove android-sdk-platform-tools snapd || true
+          sudo apt-get purge --auto-remove android-sdk-platform-tools snapd || true
+          sudo rm -rf /usr/local/lib/android
+          sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
+          sudo rm -rf /usr/share/dotnet
+          sudo apt-get remove -y '^mono-.*' || true
+          sudo apt-get remove -y '^ghc-.*' || true
+          sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
+          sudo apt-get remove -y 'php.*' || true
+          sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
+          sudo apt-get remove -y '^google-.*' || true
+          sudo apt-get remove -y azure-cli || true
+          sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
+          sudo apt-get remove -y '^gfortran-.*' || true
+          sudo apt-get remove -y microsoft-edge-stable || true
+          sudo apt-get remove -y firefox || true
+          sudo apt-get remove -y powershell || true
+          sudo apt-get remove -y r-base-core || true
+          sudo apt-get autoremove -y
+          sudo apt-get clean
+          echo
+          echo "Listing top largest packages"
+          pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
+          head -n 30 <<< "${pkgs}"
+          echo
+          sudo rm -rfv build || true
+          sudo rm -rf /usr/share/dotnet || true
+          sudo rm -rf /opt/ghc || true
+          sudo rm -rf "/usr/local/share/boost" || true
+          sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
+          df -h
+
+      - name: Force Install GIT latest
+        run: |
+          sudo apt-get update \
+          && sudo apt-get install -y software-properties-common \
+          && sudo apt-get update \
+          && sudo add-apt-repository -y ppa:git-core/ppa \
+          && sudo apt-get update \
+          && sudo apt-get install -y git
+      - name: Clone
+        uses: actions/checkout@v4
+        with:
+          submodules: true
+      - uses: actions/setup-go@v5
+        with:
+          go-version: '1.21.x'
+          cache: false
+      - name: Dependencies
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk cmake libgmock-dev
+          make install-go-tools
+      - name: Intel Dependencies
+        run: |
+          wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null
+          echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | sudo tee /etc/apt/sources.list.d/oneAPI.list
+          sudo apt update
+          sudo apt install -y intel-basekit
+      - name: Install CUDA Dependencies
+        run: |
+          curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
+          sudo dpkg -i cuda-keyring_1.1-1_all.deb
+          sudo apt-get update
+          sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
+        env:
+          CUDA_VERSION: 12-5
+      - name: "Install Hipblas"
+        env:
+          ROCM_VERSION: "6.1"
+          AMDGPU_VERSION: "6.1"
+        run: |
+            set -ex
+
+            sudo apt-get update
+            sudo DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ca-certificates curl libnuma-dev gnupg
+
+            sudo apt update
+            wget https://repo.radeon.com/amdgpu-install/6.4.1/ubuntu/noble/amdgpu-install_6.4.60401-1_all.deb
+            sudo apt install ./amdgpu-install_6.4.60401-1_all.deb
+            sudo apt update
+
+            sudo amdgpu-install --usecase=rocm
+
+            sudo apt-get clean
+            sudo rm -rf /var/lib/apt/lists/*
+            sudo ldconfig
+      - name: Cache grpc
+        id: cache-grpc
+        uses: actions/cache@v4
+        with:
+          path: grpc
+          key: ${{ runner.os }}-grpc-${{ env.GRPC_VERSION }}
+      - name: Build grpc
+        if: steps.cache-grpc.outputs.cache-hit != 'true'
+        run: |
+          git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
+          cd grpc && sed -i "216i\  TESTONLY" "third_party/abseil-cpp/absl/container/CMakeLists.txt" && mkdir -p cmake/build && \
+          cd cmake/build && cmake -DgRPC_INSTALL=ON \
+            -DgRPC_BUILD_TESTS=OFF \
+            ../.. && sudo make --jobs 5 --output-sync=target
+      - name: Install gRPC
+        run: |
+          cd grpc && cd cmake/build && sudo make --jobs 5 --output-sync=target install
+      # BACKEND_LIBS needed for gpu-workload: /opt/intel/oneapi/*/lib/libiomp5.so /opt/intel/oneapi/*/lib/libmkl_core.so /opt/intel/oneapi/*/lib/libmkl_core.so.2 /opt/intel/oneapi/*/lib/libmkl_intel_ilp64.so /opt/intel/oneapi/*/lib/libmkl_intel_ilp64.so.2 /opt/intel/oneapi/*/lib/libmkl_sycl_blas.so /opt/intel/oneapi/*/lib/libmkl_sycl_blas.so.4 /opt/intel/oneapi/*/lib/libmkl_tbb_thread.so /opt/intel/oneapi/*/lib/libmkl_tbb_thread.so.2 /opt/intel/oneapi/*/lib/libsycl.so /opt/intel/oneapi/*/lib/libsycl.so.7 /opt/intel/oneapi/*/lib/libsycl.so.7.1.0 /opt/rocm-*/lib/libamdhip64.so /opt/rocm-*/lib/libamdhip64.so.5 /opt/rocm-*/lib/libamdhip64.so.6 /opt/rocm-*/lib/libamdhip64.so.6.1.60100 /opt/rocm-*/lib/libhipblas.so /opt/rocm-*/lib/libhipblas.so.2 /opt/rocm-*/lib/libhipblas.so.2.1.60100 /opt/rocm-*/lib/librocblas.so /opt/rocm-*/lib/librocblas.so.4 /opt/rocm-*/lib/librocblas.so.4.1.60100 /usr/lib/x86_64-linux-gnu/libstdc++.so.6 /usr/lib/x86_64-linux-gnu/libOpenCL.so.1 /usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 /usr/lib/x86_64-linux-gnu/libm.so.6 /usr/lib/x86_64-linux-gnu/libgcc_s.so.1 /usr/lib/x86_64-linux-gnu/libc.so.6 /usr/lib/x86_64-linux-gnu/librt.so.1 /usr/local/cuda-*/targets/x86_64-linux/lib/libcublas.so /usr/local/cuda-*/targets/x86_64-linux/lib/libcublasLt.so /usr/local/cuda-*/targets/x86_64-linux/lib/libcudart.so /usr/local/cuda-*/targets/x86_64-linux/lib/stubs/libcuda.so
+      - name: Build
+        id: build
+        run: |
+          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
+          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
+          export PATH=$PATH:$GOPATH/bin
+          export PATH=/usr/local/cuda/bin:$PATH
+          export PATH=/opt/rocm/bin:$PATH
+          source /opt/intel/oneapi/setvars.sh
+          sudo cp /lib64/ld-linux-x86-64.so.2 ld.so
+          BACKEND_LIBS="./ld.so ./sources/go-piper/piper/build/fi/lib/libfmt.a ./sources/go-piper/piper-phonemize/pi/lib/libonnxruntime.so.1.14.1 ./sources/go-piper/piper-phonemize/pi/src/libespeak-ng/libespeak-ng.so /usr/lib/x86_64-linux-gnu/libdl.so.2 /usr/lib/x86_64-linux-gnu/librt.so.1 /usr/lib/x86_64-linux-gnu/libpthread.so.0 ./sources/go-piper/piper-phonemize/pi/lib/libpiper_phonemize.so.1 ./sources/go-piper/piper/build/si/lib/libspdlog.a ./sources/go-piper/espeak/ei/lib/libucd.so" \
+          make -j4 dist
+      - uses: actions/upload-artifact@v4
+        with:
+          name: LocalAI-linux
+          path: release/
+      - name: Release
+        uses: softprops/action-gh-release@v2
+        if: startsWith(github.ref, 'refs/tags/')
+        with:
+          files: |
+            release/*
+      - name: Setup tmate session if tests fail
+        if: ${{ failure() }}
+        uses: mxschmitt/action-tmate@v3.22
+        with:
+          detached: true
+          connect-timeout-seconds: 180
+          limit-access-to-actor: true
+
+
+  build-macOS-x86_64:
+    runs-on: macos-13
+    steps:
+      - name: Clone
+        uses: actions/checkout@v4
+        with:
+          submodules: true
+      - uses: actions/setup-go@v5
+        with:
+          go-version: '1.21.x'
+          cache: false
+      - name: Dependencies
+        run: |
+          brew install protobuf grpc
+          make install-go-tools
+      - name: Build
+        id: build
+        run: |
+          export C_INCLUDE_PATH=/usr/local/include
+          export CPLUS_INCLUDE_PATH=/usr/local/include
+          export PATH=$PATH:$GOPATH/bin
+          export SKIP_GRPC_BACKEND=backend-assets/grpc/whisper
+          make dist
+      - uses: actions/upload-artifact@v4
+        with:
+          name: LocalAI-MacOS-x86_64
+          path: release/
+      - name: Release
+        uses: softprops/action-gh-release@v2
+        if: startsWith(github.ref, 'refs/tags/')
+        with:
+          files: |
+            release/*
+      - name: Setup tmate session if tests fail
+        if: ${{ failure() }}
+        uses: mxschmitt/action-tmate@v3.22
+        with:
+          detached: true
+          connect-timeout-seconds: 180
+          limit-access-to-actor: true
+
+  build-macOS-arm64:
+    runs-on: macos-14
+    steps:
+      - name: Clone
+        uses: actions/checkout@v4
+        with:
+          submodules: true
+      - uses: actions/setup-go@v5
+        with:
+          go-version: '1.21.x'
+          cache: false
+      - name: Dependencies
+        run: |
+          brew install protobuf grpc libomp llvm
+          make install-go-tools
+      - name: Build
+        id: build
+        run: |
+          export C_INCLUDE_PATH=/usr/local/include
+          export CPLUS_INCLUDE_PATH=/usr/local/include
+          export PATH=$PATH:$GOPATH/bin
+          export CC=/opt/homebrew/opt/llvm/bin/clang
+          make dist
+      - uses: actions/upload-artifact@v4
+        with:
+          name: LocalAI-MacOS-arm64
+          path: release/
+      - name: Release
+        uses: softprops/action-gh-release@v2
+        if: startsWith(github.ref, 'refs/tags/')
+        with:
+          files: |
+            release/*
+      - name: Setup tmate session if tests fail
+        if: ${{ failure() }}
+        uses: mxschmitt/action-tmate@v3.22
+        with:
+          detached: true
+          connect-timeout-seconds: 180
+          limit-access-to-actor: true
--- a/.github/workflows/secscan.yaml
+++ b/.github/workflows/secscan.yaml
@@ -18,7 +18,7 @@ jobs:
        if: ${{ github.actor != 'dependabot[bot]' }}
      - name: Run Gosec Security Scanner
        if: ${{ github.actor != 'dependabot[bot]' }}
-        uses: securego/gosec@v2.22.7
+        uses: securego/gosec@v2.22.5
        with:
          # we let the report trigger content trigger a failure using the GitHub Security features.
          args: '-no-fail -fmt sarif -out results.sarif ./...'
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -67,20 +67,18 @@ jobs:
      # You can test your matrix by printing the current Go version
      - name: Display Go version
        run: go version
-      - name: Proto Dependencies
-        run: |
-          # Install protoc
-          curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v26.1/protoc-26.1-linux-x86_64.zip -o protoc.zip && \
-          unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
-          rm protoc.zip
-          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
-          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
-          PATH="$PATH:$HOME/go/bin" make protogen-go
      - name: Dependencies
        run: |
          sudo apt-get update
          sudo apt-get install build-essential ccache upx-ucl curl ffmpeg
          sudo apt-get install -y libgmock-dev clang
+          curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
+             sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
+             gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
+             sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
+             sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
+             sudo apt-get update && \
+             sudo apt-get install -y conda
          # Install UV
          curl -LsSf https://astral.sh/uv/install.sh | sh
          sudo apt-get install -y ca-certificates cmake patch python3-pip unzip
@@ -96,15 +94,38 @@ jobs:
          sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
          export CUDACXX=/usr/local/cuda/bin/nvcc

+          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
+          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
+          go install github.com/GeertJohan/go.rice/rice@latest

          # The python3-grpc-tools package in 22.04 is too old
          pip install --user grpcio-tools==1.71.0 grpcio==1.71.0

          make -C backend/python/transformers

-          make backends/huggingface backends/llama-cpp backends/local-store backends/silero-vad backends/piper backends/whisper backends/stablediffusion-ggml
+          # Pre-build piper before we start tests in order to have shared libraries in place
+          make sources/go-piper && \
+          GO_TAGS="tts" make -C sources/go-piper piper.o && \
+          sudo cp -rfv sources/go-piper/piper-phonemize/pi/lib/. /usr/lib/
        env:
          CUDA_VERSION: 12-4
+      - name: Cache grpc
+        id: cache-grpc
+        uses: actions/cache@v4
+        with:
+          path: grpc
+          key: ${{ runner.os }}-grpc-${{ env.GRPC_VERSION }}
+      - name: Build grpc
+        if: steps.cache-grpc.outputs.cache-hit != 'true'
+        run: |
+          git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --jobs 5 --shallow-submodules https://github.com/grpc/grpc && \
+          cd grpc && sed -i "216i\  TESTONLY" "third_party/abseil-cpp/absl/container/CMakeLists.txt" && mkdir -p cmake/build && cd cmake/build && \
+          cmake -DgRPC_INSTALL=ON \
+            -DgRPC_BUILD_TESTS=OFF \
+            ../.. && sudo make --jobs 5
+      - name: Install gRPC
+        run: |
+          cd grpc && cd cmake/build && sudo make --jobs 5 install
      - name: Test
        run: |
          PATH="$PATH:/root/go/bin" GO_TAGS="tts" make --jobs 5 --output-sync=target test
@@ -163,10 +184,16 @@ jobs:
          rm protoc.zip
          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
+          go install github.com/GeertJohan/go.rice/rice@latest
          PATH="$PATH:$HOME/go/bin" make protogen-go
+      - name: Build images
+        run: |
+          docker build --build-arg FFMPEG=true --build-arg MAKEFLAGS="--jobs=5 --output-sync=target" -t local-ai:tests -f Dockerfile .
+          BASE_IMAGE=local-ai:tests DOCKER_AIO_IMAGE=local-ai-aio:test make docker-aio
      - name: Test
        run: |
-            PATH="$PATH:$HOME/go/bin" make backends/local-store backends/silero-vad backends/llama-cpp backends/whisper backends/piper backends/stablediffusion-ggml docker-build-aio e2e-aio
+            PATH="$PATH:$HOME/go/bin" LOCALAI_MODELS_DIR=$PWD/models LOCALAI_IMAGE_TAG=test LOCALAI_IMAGE=local-ai-aio \
+            make run-e2e-aio
      - name: Setup tmate session if tests fail
        if: ${{ failure() }}
        uses: mxschmitt/action-tmate@v3.22
@@ -197,14 +224,7 @@ jobs:
        run: |
          brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm
          pip install --user --no-cache-dir grpcio-tools==1.71.0 grpcio==1.71.0
-      - name: Build llama-cpp-darwin
-        run: |
-          make protogen-go
-          make build
-          bash scripts/build-llama-cpp-darwin.sh
-          ls -la build/darwin.tar
-          mv build/darwin.tar build/llama-cpp.tar
-          ./local-ai backends install "ocifile://$PWD/build/llama-cpp.tar"
+          go install github.com/GeertJohan/go.rice/rice@latest
      - name: Test
        run: |
          export C_INCLUDE_PATH=/usr/local/include
@@ -212,8 +232,7 @@ jobs:
          export CC=/opt/homebrew/opt/llvm/bin/clang
          # Used to run the newer GNUMake version from brew that supports --output-sync
          export PATH="/opt/homebrew/opt/make/libexec/gnubin:$PATH"
-          PATH="$PATH:$HOME/go/bin" make protogen-go
-          PATH="$PATH:$HOME/go/bin" BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DGGML_F16C=OFF -DGGML_AVX512=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF" make --jobs 4 --output-sync=target test
+          BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DGGML_F16C=OFF -DGGML_AVX512=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF" make --jobs 4 --output-sync=target test
      - name: Setup tmate session if tests fail
        if: ${{ failure() }}
        uses: mxschmitt/action-tmate@v3.22
--- a/.gitignore
+++ b/.gitignore
@@ -5,13 +5,9 @@ __pycache__/
 *.o
 get-sources
 prepare-sources
-/backend/cpp/llama-cpp/grpc-server
-/backend/cpp/llama-cpp/llama.cpp
+/backend/cpp/llama/grpc-server
+/backend/cpp/llama/llama.cpp
 /backend/cpp/llama-*
-!backend/cpp/llama-cpp
-/backends
-/backend-images
-/result.yaml

 *.log

@@ -60,4 +56,4 @@ docs/static/gallery.html
 **/venv

 # per-developer customization files for the development container
-.devcontainer/customization/*
+.devcontainer/customization/*
--- a/.goreleaser.yaml
+++ b/.goreleaser.yaml
@@ -1,33 +0,0 @@
-version: 2
-before:
-  hooks:
-    - make protogen-go
-    - go mod tidy
-dist: release
-source:
-  enabled: true
-  name_template: '{{ .ProjectName }}-{{ .Tag }}-source'
-builds:
-  -
-    env:
-      - CGO_ENABLED=0
-    ldflags:
-      - -s -w
-      - -X "github.com/mudler/LocalAI/internal.Version={{ .Tag }}"
-      - -X "github.com/mudler/LocalAI/internal.Commit={{ .FullCommit }}"
-    goos:
-      - linux
-      - darwin
-      #- windows
-    goarch:
-      - amd64
-      - arm64
-archives:
-  - formats: [ 'binary' ] # this removes the tar of the archives, leaving the binaries alone
-    name_template: local-ai-{{ .Tag }}-{{ .Os }}-{{ .Arch }}{{ if .Arm }}v{{ .Arm }}{{ end }}
-checksum:
-  name_template: '{{ .ProjectName }}-{{ .Tag }}-checksums.txt'
-snapshot:
-  version_template: "{{ .Tag }}-next"
-changelog:
-  use: github-native
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -26,7 +26,7 @@
                "LOCALAI_P2P": "true",
                "LOCALAI_FEDERATED": "true"
            },
-            "buildFlags": ["-tags", "", "-v"],
+            "buildFlags": ["-tags", "p2p tts", "-v"],
            "envFile": "${workspaceFolder}/.env",
            "cwd": "${workspaceRoot}"
        }
--- a/89
+++ b/89
@@ -1,4 +1,4 @@
-ARG BASE_IMAGE=ubuntu:22.04
+ARG BASE_IMAGE=ubuntu:24.04
 ARG GRPC_BASE_IMAGE=${BASE_IMAGE}
 ARG INTEL_BASE_IMAGE=${BASE_IMAGE}

@@ -25,7 +25,6 @@ ARG TARGETVARIANT
 ENV BUILD_TYPE=${BUILD_TYPE}

 RUN mkdir -p /run/localai
-RUN echo "default" > /run/localai/capability

 # Vulkan requirements
 RUN <<EOT bash
@@ -127,7 +126,7 @@ RUN apt-get update && \

 # Install CMake (the version in 22.04 is too old)
 RUN <<EOT bash
-    if [ "${CMAKE_FROM_SOURCE}" = "true" ]; then
+    if [ "${CMAKE_FROM_SOURCE}}" = "true" ]; then
        curl -L -s https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}.tar.gz -o cmake.tar.gz && tar xvf cmake.tar.gz && cd cmake-${CMAKE_VERSION} && ./configure && make && make install
    else
        apt-get update && \
@@ -142,9 +141,10 @@ EOT
 RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz
 ENV PATH=$PATH:/root/go/bin:/usr/local/go/bin

-# Install grpc compilers
+# Install grpc compilers and rice
 RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 && \
-    go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
+    go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af && \
+    go install github.com/GeertJohan/go.rice/rice@latest

 COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/
 RUN update-ca-certificates
@@ -180,12 +180,57 @@ FROM ${INTEL_BASE_IMAGE} AS intel
 RUN wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | \
 gpg --yes --dearmor --output /usr/share/keyrings/intel-graphics.gpg
 RUN echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu jammy/lts/2350 unified" > /etc/apt/sources.list.d/intel-graphics.list
+
+###################################
+###################################
+
+# The grpc target does one thing, it builds and installs GRPC.  This is in it's own layer so that it can be effectively cached by CI.
+# You probably don't need to change anything here, and if you do, make sure that CI is adjusted so that the cache continues to work.
+FROM ${GRPC_BASE_IMAGE} AS grpc
+
+# This is a bit of a hack, but it's required in order to be able to effectively cache this layer in CI
+ARG GRPC_MAKEFLAGS="-j4 -Otarget"
+ARG GRPC_VERSION=v1.65.0
+ARG CMAKE_FROM_SOURCE=false
+ARG CMAKE_VERSION=3.26.4
+
+ENV MAKEFLAGS=${GRPC_MAKEFLAGS}
+
+WORKDIR /build
+
 RUN apt-get update && \
    apt-get install -y --no-install-recommends \
-        intel-oneapi-runtime-libs && \
+        ca-certificates \
+        build-essential curl libssl-dev \
+        git && \
    apt-get clean && \
    rm -rf /var/lib/apt/lists/*

+# Install CMake (the version in 22.04 is too old)
+RUN <<EOT bash
+    if [ "${CMAKE_FROM_SOURCE}}" = "true" ]; then
+        curl -L -s https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}.tar.gz -o cmake.tar.gz && tar xvf cmake.tar.gz && cd cmake-${CMAKE_VERSION} && ./configure && make && make install
+    else
+        apt-get update && \
+        apt-get install -y \
+            cmake && \
+        apt-get clean && \
+        rm -rf /var/lib/apt/lists/*
+    fi
+EOT
+
+# We install GRPC to a different prefix here so that we can copy in only the build artifacts later
+# saves several hundred MB on the final docker image size vs copying in the entire GRPC source tree
+# and running make install in the target container
+RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
+    mkdir -p /build/grpc/cmake/build && \
+    cd /build/grpc/cmake/build && \
+    sed -i "216i\  TESTONLY" "../../third_party/abseil-cpp/absl/container/CMakeLists.txt" && \
+    cmake -DgRPC_INSTALL=ON -DgRPC_BUILD_TESTS=OFF -DCMAKE_INSTALL_PREFIX:PATH=/opt/grpc ../.. && \
+    make && \
+    make install && \
+    rm -rf /build
+
 ###################################
 ###################################

@@ -193,7 +238,7 @@ RUN apt-get update && \

 FROM build-requirements AS builder-base

-ARG GO_TAGS=""
+ARG GO_TAGS="tts p2p"
 ARG GRPC_BACKENDS
 ARG MAKEFLAGS
 ARG LD_FLAGS="-s -w"
@@ -212,7 +257,9 @@ RUN echo "GO_TAGS: $GO_TAGS" && echo "TARGETARCH: $TARGETARCH"
 WORKDIR /build


-# We need protoc installed, and the version in 22.04 is too old.
+# We need protoc installed, and the version in 22.04 is too old.  We will create one as part installing the GRPC build below
+# but that will also being in a newer version of absl which stablediffusion cannot compile with.  This version of protoc is only
+# here so that we can generate the grpc code for the stablediffusion build
 RUN <<EOT bash
    if [ "amd64" = "$TARGETARCH" ]; then
        curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v27.1/protoc-27.1-linux-x86_64.zip -o protoc.zip && \
@@ -234,6 +281,8 @@ FROM builder-base AS builder-backends
 ARG TARGETARCH
 ARG TARGETVARIANT

+COPY --from=grpc /opt/grpc /usr/local
+
 WORKDIR /build

 COPY ./Makefile .
@@ -248,7 +297,13 @@ COPY ./pkg/utils ./pkg/utils
 COPY ./pkg/langchain ./pkg/langchain

 RUN ls -l ./
-RUN make protogen-go
+RUN make backend-assets
+RUN make prepare
+RUN if [ "${TARGETARCH}" = "arm64" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then \
+        SKIP_GRPC_BACKEND="backend-assets/grpc/llama-cpp-avx512 backend-assets/grpc/llama-cpp-avx backend-assets/grpc/llama-cpp-avx2" make grpcs; \
+    else \
+        make grpcs; \
+    fi

 # The builder target compiles LocalAI. This target is not the target that will be uploaded to the registry.
 # Adjustments to the build process should likely be made here.
@@ -261,7 +316,16 @@ COPY . .
 ## Build the binary
 ## If we're on arm64 AND using cublas/hipblas, skip some of the llama-compat backends to save space
 ## Otherwise just run the normal build
-RUN make build
+RUN if [ "${TARGETARCH}" = "arm64" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then \
+        SKIP_GRPC_BACKEND="backend-assets/grpc/llama-cpp-avx512 backend-assets/grpc/llama-cpp-avx backend-assets/grpc/llama-cpp-avx2" make build; \
+    else \
+        make build; \
+    fi
+
+RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \
+        mkdir -p /build/sources/go-piper/piper-phonemize/pi/lib/ \
+        touch /build/sources/go-piper/piper-phonemize/pi/lib/keep \
+    ; fi

 ###################################
 ###################################
@@ -271,6 +335,8 @@ RUN make build

 FROM builder-base AS devcontainer

+COPY --from=grpc /opt/grpc /usr/local
+
 COPY .devcontainer-scripts /.devcontainer-scripts

 RUN apt-get update && \
@@ -303,6 +369,9 @@ COPY ./entrypoint.sh .
 # Copy the binary
 COPY --from=builder /build/local-ai ./

+# Copy shared libraries for piper
+COPY --from=builder /build/sources/go-piper/piper-phonemize/pi/lib/* /usr/lib/
+
 # Make sure the models directory exists
 RUN mkdir -p /models /backends

--- a/Dockerfile.aio
+++ b/Dockerfile.aio
@@ -1,4 +1,4 @@
-ARG BASE_IMAGE=ubuntu:22.04
+ARG BASE_IMAGE=ubuntu:24.04

 FROM ${BASE_IMAGE} 

--- a/5
+++ b/5
@@ -0,0 +1,5 @@
+VERSION 0.7
+
+build:
+    FROM DOCKERFILE -f Dockerfile .
+    SAVE ARTIFACT /usr/bin/local-ai AS LOCAL local-ai
--- a/754
+++ b/754
@@ -3,12 +3,47 @@ GOTEST=$(GOCMD) test
 GOVET=$(GOCMD) vet
 BINARY_NAME=local-ai

-GORELEASER?=
+DETECT_LIBS?=true

-ONEAPI_VERSION?=2025.2
+# llama.cpp versions
+CPPLLAMA_VERSION?=bee28421be25fd447f61cb6db64d556cbfce32ec
+
+# whisper.cpp version
+WHISPER_REPO?=https://github.com/ggml-org/whisper.cpp
+WHISPER_CPP_VERSION?=d9999d54c868b8bfcd376aa26067e787d53e679e
+
+# go-piper version
+PIPER_REPO?=https://github.com/mudler/go-piper
+PIPER_VERSION?=e10ca041a885d4a8f3871d52924b47792d5e5aa0
+
+# bark.cpp
+BARKCPP_REPO?=https://github.com/PABannier/bark.cpp.git
+BARKCPP_VERSION?=5d5be84f089ab9ea53b7a793f088d3fbf7247495
+
+# stablediffusion.cpp (ggml)
+STABLEDIFFUSION_GGML_REPO?=https://github.com/richiejp/stable-diffusion.cpp
+STABLEDIFFUSION_GGML_VERSION?=53e3b17eb3d0b5760ced06a1f98320b68b34aaae
+
+# ONEAPI variables for SYCL
+export ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh
+
+ONNX_VERSION?=1.20.0
+ONNX_ARCH?=x64
+ONNX_OS?=linux

 export BUILD_TYPE?=
+export STABLE_BUILD_TYPE?=$(BUILD_TYPE)
+export CMAKE_ARGS?=-DBUILD_SHARED_LIBS=OFF
+export WHISPER_CMAKE_ARGS?=-DBUILD_SHARED_LIBS=OFF
+export BACKEND_LIBS?=
+export WHISPER_DIR=$(abspath ./sources/whisper.cpp)
+export WHISPER_INCLUDE_PATH=$(WHISPER_DIR)/include:$(WHISPER_DIR)/ggml/include
+export WHISPER_LIBRARY_PATH=$(WHISPER_DIR)/build/src/:$(WHISPER_DIR)/build/ggml/src

+CGO_LDFLAGS?=
+CGO_LDFLAGS_WHISPER?=
+CGO_LDFLAGS_WHISPER+=-lggml
+CUDA_LIBPATH?=/usr/local/cuda/lib64/
 GO_TAGS?=
 BUILD_ID?=
 NATIVE?=false
@@ -35,6 +70,14 @@ WHITE  := $(shell tput -Txterm setaf 7)
 CYAN   := $(shell tput -Txterm setaf 6)
 RESET  := $(shell tput -Txterm sgr0)

+UPX?=
+# check if upx exists
+ifeq (, $(shell which upx))
+	UPX=
+else
+	UPX=$(shell which upx)
+endif
+
 # Default Docker bridge IP
 E2E_BRIDGE_IP?=172.17.0.1

@@ -42,69 +85,368 @@ ifndef UNAME_S
 UNAME_S := $(shell uname -s)
 endif

+# IF native is false, we add -DGGML_NATIVE=OFF to CMAKE_ARGS
+ifeq ($(NATIVE),false)
+	CMAKE_ARGS+=-DGGML_NATIVE=OFF
+	WHISPER_CMAKE_ARGS+=-DGGML_NATIVE=OFF
+endif
+
+# Detect if we are running on arm64
+ifneq (,$(findstring aarch64,$(shell uname -m)))
+	ONNX_ARCH=aarch64
+endif
+
 ifeq ($(OS),Darwin)
+	ONNX_OS=osx
+	ifneq (,$(findstring aarch64,$(shell uname -m)))
+		ONNX_ARCH=arm64
+	else ifneq (,$(findstring arm64,$(shell uname -m)))
+		ONNX_ARCH=arm64
+	else
+		ONNX_ARCH=x86_64
+	endif
+
 	ifeq ($(OSX_SIGNING_IDENTITY),)
 		OSX_SIGNING_IDENTITY := $(shell security find-identity -v -p codesigning | grep '"' | head -n 1 | sed -E 's/.*"(.*)"/\1/')
 	endif
-endif

-# check if goreleaser exists
-ifeq (, $(shell which goreleaser))
-	GORELEASER=curl -sfL https://goreleaser.com/static/run | bash -s --
+	# on OSX, if BUILD_TYPE is blank, we should default to use Metal
+	ifeq ($(BUILD_TYPE),)
+		BUILD_TYPE=metal
+	# disable metal if on Darwin and any other value is explicitly passed.
+	else ifneq ($(BUILD_TYPE),metal)
+		CMAKE_ARGS+=-DGGML_METAL=OFF
+		WHISPER_CMAKE_ARGS+=-DGGML_METAL=OFF
+		export GGML_NO_ACCELERATE=1
+		export GGML_NO_METAL=1
+		GO_LDFLAGS_WHISPER+=-lggml-blas
+		export WHISPER_LIBRARY_PATH:=$(WHISPER_LIBRARY_PATH):$(WHISPER_DIR)/build/ggml/src/ggml-blas
+	endif
+
+	ifeq ($(BUILD_TYPE),metal)
+		CGO_LDFLAGS += -framework Accelerate
+		CGO_LDFLAGS_WHISPER+=-lggml-metal -lggml-blas
+		CMAKE_ARGS+=-DGGML_METAL=ON
+		CMAKE_ARGS+=-DGGML_METAL_USE_BF16=ON
+		CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON
+		CMAKE_ARGS+=-DGGML_OPENMP=OFF
+		WHISPER_CMAKE_ARGS+=-DGGML_METAL=ON
+		WHISPER_CMAKE_ARGS+=-DGGML_METAL_USE_BF16=ON
+		WHISPER_CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON
+		WHISPER_CMAKE_ARGS+=-DWHISPER_BUILD_EXAMPLES=OFF
+		WHISPER_CMAKE_ARGS+=-DWHISPER_BUILD_TESTS=OFF
+		WHISPER_CMAKE_ARGS+=-DWHISPER_BUILD_SERVER=OFF
+		WHISPER_CMAKE_ARGS+=-DGGML_OPENMP=OFF
+		export WHISPER_LIBRARY_PATH:=$(WHISPER_LIBRARY_PATH):$(WHISPER_DIR)/build/ggml/src/ggml-metal/:$(WHISPER_DIR)/build/ggml/src/ggml-blas
+	else
+		CGO_LDFLAGS_WHISPER+=-lggml-blas
+		export WHISPER_LIBRARY_PATH:=$(WHISPER_LIBRARY_PATH):$(WHISPER_DIR)/build/ggml/src/ggml-blas
+	endif
 else
-	GORELEASER=$(shell which goreleaser)
+CGO_LDFLAGS_WHISPER+=-lgomp
 endif

+ifeq ($(BUILD_TYPE),openblas)
+	CGO_LDFLAGS+=-lopenblas
+	export GGML_OPENBLAS=1
+endif
+
+ifeq ($(BUILD_TYPE),cublas)
+	CGO_LDFLAGS+=-lcublas -lcudart -L$(CUDA_LIBPATH) -L$(CUDA_LIBPATH)/stubs/ -lcuda
+	export GGML_CUDA=1
+	CMAKE_ARGS+=-DGGML_CUDA=ON
+	WHISPER_CMAKE_ARGS+=-DGGML_CUDA=ON
+	CGO_LDFLAGS_WHISPER+=-lcufft -lggml-cuda
+	export WHISPER_LIBRARY_PATH:=$(WHISPER_LIBRARY_PATH):$(WHISPER_DIR)/build/ggml/src/ggml-cuda/
+endif
+
+ifeq ($(BUILD_TYPE),vulkan)
+	CMAKE_ARGS+=-DGGML_VULKAN=1
+	WHISPER_CMAKE_ARGS+=-DGGML_VULKAN=1
+	CGO_LDFLAGS_WHISPER+=-lggml-vulkan -lvulkan
+	export WHISPER_LIBRARY_PATH:=$(WHISPER_LIBRARY_PATH):$(WHISPER_DIR)/build/ggml/src/ggml-vulkan/
+endif
+
+ifneq (,$(findstring sycl,$(BUILD_TYPE)))
+	export GGML_SYCL=1
+	CMAKE_ARGS+=-DGGML_SYCL=ON
+endif
+
+ifeq ($(BUILD_TYPE),sycl_f16)
+	export GGML_SYCL_F16=1
+	CMAKE_ARGS+=-DGGML_SYCL_F16=ON
+endif
+
+ifeq ($(BUILD_TYPE),hipblas)
+	ROCM_HOME ?= /opt/rocm
+	ROCM_PATH ?= /opt/rocm
+	LD_LIBRARY_PATH ?= /opt/rocm/lib:/opt/rocm/llvm/lib
+	export CXX=$(ROCM_HOME)/llvm/bin/clang++
+	export CC=$(ROCM_HOME)/llvm/bin/clang
+	export STABLE_BUILD_TYPE=
+	export GGML_HIP=1
+	GPU_TARGETS ?= gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102
+	AMDGPU_TARGETS ?= "$(GPU_TARGETS)"
+	CMAKE_ARGS+=-DGGML_HIP=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
+	CGO_LDFLAGS += -O3 --rtlib=compiler-rt -unwindlib=libgcc -lhipblas -lrocblas --hip-link -L${ROCM_HOME}/lib/llvm/lib
+endif
+
+ifeq ($(BUILD_TYPE),metal)
+	CGO_LDFLAGS+=-framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders
+	export GGML_METAL=1
+endif
+
+ifeq ($(BUILD_TYPE),clblas)
+	CGO_LDFLAGS+=-lOpenCL -lclblast
+	export GGML_OPENBLAS=1
+endif
+
+# glibc-static or glibc-devel-static required
+ifeq ($(STATIC),true)
+	LD_FLAGS+=-linkmode external -extldflags -static
+endif
+
+ifeq ($(findstring tts,$(GO_TAGS)),tts)
+#	OPTIONAL_TARGETS+=go-piper/libpiper_binding.a
+#	OPTIONAL_TARGETS+=backend-assets/espeak-ng-data
+	PIPER_CGO_CXXFLAGS+=-I$(CURDIR)/sources/go-piper/piper/src/cpp -I$(CURDIR)/sources/go-piper/piper/build/fi/include -I$(CURDIR)/sources/go-piper/piper/build/pi/include -I$(CURDIR)/sources/go-piper/piper/build/si/include
+	PIPER_CGO_LDFLAGS+=-L$(CURDIR)/sources/go-piper/piper/build/fi/lib -L$(CURDIR)/sources/go-piper/piper/build/pi/lib -L$(CURDIR)/sources/go-piper/piper/build/si/lib -lfmt -lspdlog -lucd
+	OPTIONAL_GRPC+=backend-assets/grpc/piper
+endif
+
+ALL_GRPC_BACKENDS=backend-assets/grpc/huggingface
+ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx
+ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx2
+ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx512
+ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback
+ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-grpc
+ALL_GRPC_BACKENDS+=backend-assets/util/llama-cpp-rpc-server
+ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper
+
+ifeq ($(ONNX_OS),linux)
+ifeq ($(ONNX_ARCH),x64)
+	ALL_GRPC_BACKENDS+=backend-assets/grpc/stablediffusion-ggml
+endif
+endif
+
+ALL_GRPC_BACKENDS+=backend-assets/grpc/local-store
+ALL_GRPC_BACKENDS+=backend-assets/grpc/silero-vad
+ALL_GRPC_BACKENDS+=$(OPTIONAL_GRPC)
+# Use filter-out to remove the specified backends
+ALL_GRPC_BACKENDS := $(filter-out $(SKIP_GRPC_BACKEND),$(ALL_GRPC_BACKENDS))
+
+GRPC_BACKENDS?=$(ALL_GRPC_BACKENDS) $(OPTIONAL_GRPC)
 TEST_PATHS?=./api/... ./pkg/... ./core/...

+# If empty, then we build all
+ifeq ($(GRPC_BACKENDS),)
+	GRPC_BACKENDS=$(ALL_GRPC_BACKENDS)
+endif

-.PHONY: all test build vendor
+ifeq ($(BUILD_API_ONLY),true)
+	GRPC_BACKENDS=
+endif
+
+.PHONY: all test build vendor get-sources prepare-sources prepare

 all: help

+## bark.cpp
+sources/bark.cpp:
+	git clone --recursive $(BARKCPP_REPO) sources/bark.cpp && \
+	cd sources/bark.cpp && \
+	git checkout $(BARKCPP_VERSION) && \
+	git submodule update --init --recursive --depth 1 --single-branch
+
+sources/bark.cpp/build/libbark.a: sources/bark.cpp
+	cd sources/bark.cpp && \
+	mkdir -p build && \
+	cd build && \
+	cmake $(CMAKE_ARGS) .. && \
+	cmake --build . --config Release
+
+backend/go/bark-cpp/libbark.a: sources/bark.cpp/build/libbark.a
+	$(MAKE) -C backend/go/bark-cpp libbark.a
+
+## go-piper
+sources/go-piper:
+	mkdir -p sources/go-piper
+	cd sources/go-piper && \
+	git init && \
+	git remote add origin $(PIPER_REPO) && \
+	git fetch origin && \
+	git checkout $(PIPER_VERSION) && \
+	git submodule update --init --recursive --depth 1 --single-branch
+
+sources/go-piper/libpiper_binding.a: sources/go-piper
+	$(MAKE) -C sources/go-piper libpiper_binding.a example/main piper.o
+
+## stablediffusion (ggml)
+sources/stablediffusion-ggml.cpp:
+	git clone --recursive $(STABLEDIFFUSION_GGML_REPO) sources/stablediffusion-ggml.cpp && \
+	cd sources/stablediffusion-ggml.cpp && \
+	git checkout $(STABLEDIFFUSION_GGML_VERSION) && \
+	git submodule update --init --recursive --depth 1 --single-branch
+
+backend/go/image/stablediffusion-ggml/libsd.a: sources/stablediffusion-ggml.cpp
+	$(MAKE) -C backend/go/image/stablediffusion-ggml build/libstable-diffusion.a
+	$(MAKE) -C backend/go/image/stablediffusion-ggml libsd.a
+
+backend-assets/grpc/stablediffusion-ggml: backend/go/image/stablediffusion-ggml/libsd.a backend-assets/grpc
+	$(MAKE) -C backend/go/image/stablediffusion-ggml CGO_LDFLAGS="$(CGO_LDFLAGS)" stablediffusion-ggml
+
+sources/onnxruntime:
+	mkdir -p sources/onnxruntime
+	curl -L https://github.com/microsoft/onnxruntime/releases/download/v$(ONNX_VERSION)/onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz -o sources/onnxruntime/onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz
+	cd sources/onnxruntime && tar -xvf onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz && rm onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz
+	cd sources/onnxruntime && mv onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION)/* ./
+
+backend-assets/lib/libonnxruntime.so.1: backend-assets/lib sources/onnxruntime
+	cp -rfv sources/onnxruntime/lib/* backend-assets/lib/
+ifeq ($(OS),Darwin)
+	mv backend-assets/lib/libonnxruntime.$(ONNX_VERSION).dylib backend-assets/lib/libonnxruntime.dylib
+else
+	mv backend-assets/lib/libonnxruntime.so.$(ONNX_VERSION) backend-assets/lib/libonnxruntime.so.1
+endif
+
+## whisper
+sources/whisper.cpp:
+	mkdir -p sources/whisper.cpp
+	cd sources/whisper.cpp && \
+	git init && \
+	git remote add origin $(WHISPER_REPO) && \
+	git fetch origin && \
+	git checkout $(WHISPER_CPP_VERSION) && \
+	git submodule update --init --recursive --depth 1 --single-branch
+
+sources/whisper.cpp/build/src/libwhisper.a: sources/whisper.cpp
+	cd sources/whisper.cpp && cmake $(WHISPER_CMAKE_ARGS) . -B ./build
+	cd sources/whisper.cpp/build && cmake --build . --config Release
+
+get-sources: sources/go-piper sources/stablediffusion-ggml.cpp sources/bark.cpp sources/whisper.cpp backend/cpp/llama/llama.cpp
+
+replace:
+	$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
+	$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go
+	$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper
+
+dropreplace:
+	$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp
+	$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp/bindings/go
+	$(GOCMD) mod edit -dropreplace github.com/mudler/go-piper
+
+prepare-sources: get-sources replace
+	$(GOCMD) mod download
+
 ## GENERIC
 rebuild: ## Rebuilds the project
 	$(GOCMD) clean -cache
+	$(MAKE) -C sources/whisper.cpp clean
+	$(MAKE) -C sources/go-piper clean
 	$(MAKE) build

+prepare: prepare-sources $(OPTIONAL_TARGETS)
+
 clean: ## Remove build related file
 	$(GOCMD) clean -cache
 	rm -f prepare
+	rm -rf ./sources
 	rm -rf $(BINARY_NAME)
 	rm -rf release/
+	rm -rf backend-assets/*
+	$(MAKE) -C backend/cpp/grpc clean
+	$(MAKE) -C backend/go/bark-cpp clean
+	$(MAKE) -C backend/cpp/llama clean
+	$(MAKE) -C backend/go/image/stablediffusion-ggml clean
+	rm -rf backend/cpp/llama-* || true
+	$(MAKE) dropreplace
 	$(MAKE) protogen-clean
 	rmdir pkg/grpc/proto || true

 clean-tests:
 	rm -rf test-models
 	rm -rf test-dir
+	rm -rf core/http/backend-assets
+
+clean-dc: clean
+	cp -r /build/backend-assets /workspace/backend-assets

 ## Install Go tools
 install-go-tools:
 	go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
 	go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
+	go install github.com/GeertJohan/go.rice/rice@latest

 ## Build:
-build: protogen-go install-go-tools ## Build the project
+build: prepare backend-assets grpcs install-go-tools ## Build the project
 	$(info ${GREEN}I local-ai build info:${RESET})
 	$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
 	$(info ${GREEN}I GO_TAGS: ${YELLOW}$(GO_TAGS)${RESET})
 	$(info ${GREEN}I LD_FLAGS: ${YELLOW}$(LD_FLAGS)${RESET})
 	$(info ${GREEN}I UPX: ${YELLOW}$(UPX)${RESET})
+ifneq ($(BACKEND_LIBS),)
+	$(MAKE) backend-assets/lib
+	cp -f $(BACKEND_LIBS) backend-assets/lib/
+endif
 	rm -rf $(BINARY_NAME) || true
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(BINARY_NAME) ./
+	rice append --exec $(BINARY_NAME)

-dev-dist:
-	$(GORELEASER) build --snapshot --clean
+build-minimal:
+	BUILD_GRPC_FOR_BACKEND_LLAMA=true GRPC_BACKENDS="backend-assets/grpc/llama-cpp-avx2" GO_TAGS=p2p $(MAKE) build
+
+build-api:
+	BUILD_GRPC_FOR_BACKEND_LLAMA=true BUILD_API_ONLY=true GO_TAGS=p2p $(MAKE) build
+
+backend-assets/lib:
+	mkdir -p backend-assets/lib

 dist:
-	$(GORELEASER) build --clean
+	$(MAKE) backend-assets/grpc/llama-cpp-avx2
+ifeq ($(DETECT_LIBS),true)
+	scripts/prepare-libs.sh backend-assets/grpc/llama-cpp-avx2
+endif
+ifeq ($(OS),Darwin)
+	BUILD_TYPE=none $(MAKE) backend-assets/grpc/llama-cpp-fallback
+else
+	$(MAKE) backend-assets/grpc/llama-cpp-cuda
+	$(MAKE) backend-assets/grpc/llama-cpp-hipblas
+	$(MAKE) backend-assets/grpc/llama-cpp-sycl_f16
+	$(MAKE) backend-assets/grpc/llama-cpp-sycl_f32
+endif
+	GO_TAGS="tts p2p" $(MAKE) build
+ifeq ($(DETECT_LIBS),true)
+	scripts/prepare-libs.sh backend-assets/grpc/piper
+endif
+	GO_TAGS="tts p2p" STATIC=true $(MAKE) build
+	mkdir -p release
+# if BUILD_ID is empty, then we don't append it to the binary name
+ifeq ($(BUILD_ID),)
+	cp $(BINARY_NAME) release/$(BINARY_NAME)-$(OS)-$(ARCH)
+	shasum -a 256 release/$(BINARY_NAME)-$(OS)-$(ARCH) > release/$(BINARY_NAME)-$(OS)-$(ARCH).sha256
+else
+	cp $(BINARY_NAME) release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-$(ARCH)
+	shasum -a 256 release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-$(ARCH) > release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-$(ARCH).sha256
+endif
+
+dist-cross-linux-arm64:
+	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_NATIVE=off" GRPC_BACKENDS="backend-assets/grpc/llama-cpp-fallback backend-assets/grpc/llama-cpp-grpc backend-assets/util/llama-cpp-rpc-server" GO_TAGS="p2p" \
+	STATIC=true $(MAKE) build
+	mkdir -p release
+# if BUILD_ID is empty, then we don't append it to the binary name
+ifeq ($(BUILD_ID),)
+	cp $(BINARY_NAME) release/$(BINARY_NAME)-$(OS)-arm64
+	shasum -a 256 release/$(BINARY_NAME)-$(OS)-arm64 > release/$(BINARY_NAME)-$(OS)-arm64.sha256
+else
+	cp $(BINARY_NAME) release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-arm64
+	shasum -a 256 release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-arm64 > release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-arm64.sha256
+endif

 osx-signed: build
 	codesign --deep --force --sign "$(OSX_SIGNING_IDENTITY)" --entitlements "./Entitlements.plist" "./$(BINARY_NAME)"

 ## Run
-run: ## run local-ai
+run: prepare ## run local-ai
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) run ./

 test-models/testmodel.ggml:
@@ -116,78 +458,35 @@ test-models/testmodel.ggml:
 	wget -q https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav
 	cp tests/models_fixtures/* test-models

-prepare-test: protogen-go
+prepare-test: grpcs
+	cp -rf backend-assets core/http
 	cp tests/models_fixtures/* test-models

-########################################################
-## Tests
-########################################################
-
 ## Test targets
-test: test-models/testmodel.ggml protogen-go
+test: prepare test-models/testmodel.ggml grpcs
 	@echo 'Running tests'
-	export GO_TAGS="debug"
+	export GO_TAGS="tts debug"
 	$(MAKE) prepare-test
-	HUGGINGFACE_GRPC=$(abspath ./)/backend/python/transformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models BACKENDS_PATH=$(abspath ./)/backends \
+	HUGGINGFACE_GRPC=$(abspath ./)/backend/python/transformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
 	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!llama-gguf"  --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS)
 	$(MAKE) test-llama-gguf
 	$(MAKE) test-tts
 	$(MAKE) test-stablediffusion

-backends/llama-cpp: docker-build-llama-cpp docker-save-llama-cpp build
-	./local-ai backends install "ocifile://$(abspath ./backend-images/llama-cpp.tar)"
-
-backends/piper: docker-build-piper docker-save-piper build
-	./local-ai backends install "ocifile://$(abspath ./backend-images/piper.tar)"
-
-backends/stablediffusion-ggml: docker-build-stablediffusion-ggml docker-save-stablediffusion-ggml build
-	./local-ai backends install "ocifile://$(abspath ./backend-images/stablediffusion-ggml.tar)"
-
-backends/whisper: docker-build-whisper docker-save-whisper build
-	./local-ai backends install "ocifile://$(abspath ./backend-images/whisper.tar)"
-	
-backends/silero-vad: docker-build-silero-vad docker-save-silero-vad build
-	./local-ai backends install "ocifile://$(abspath ./backend-images/silero-vad.tar)"
-
-backends/local-store: docker-build-local-store docker-save-local-store build
-	./local-ai backends install "ocifile://$(abspath ./backend-images/local-store.tar)"
-
-backends/huggingface: docker-build-huggingface docker-save-huggingface build
-	./local-ai backends install "ocifile://$(abspath ./backend-images/huggingface.tar)"
-
-########################################################
-## AIO tests
-########################################################
-
-docker-build-aio:
-	docker build --build-arg MAKEFLAGS="--jobs=5 --output-sync=target" -t local-ai:tests -f Dockerfile .
-	BASE_IMAGE=local-ai:tests DOCKER_AIO_IMAGE=local-ai-aio:test $(MAKE) docker-aio
-
-e2e-aio:
-	LOCALAI_BACKEND_DIR=$(abspath ./backends) \
-	LOCALAI_MODELS_DIR=$(abspath ./models) \
-	LOCALAI_IMAGE_TAG=test \
-	LOCALAI_IMAGE=local-ai-aio \
-	$(MAKE) run-e2e-aio
-
-run-e2e-aio: protogen-go
-	@echo 'Running e2e AIO tests'
-	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts $(TEST_FLAKES) -v -r ./tests/e2e-aio
-
-########################################################
-## E2E tests
-########################################################
-
 prepare-e2e:
 	mkdir -p $(TEST_DIR)
 	cp -rfv $(abspath ./tests/e2e-fixtures)/gpu.yaml $(TEST_DIR)/gpu.yaml
 	test -e $(TEST_DIR)/ggllm-test-model.bin || wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(TEST_DIR)/ggllm-test-model.bin
-	docker build --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=0 -t localai-tests .
+	docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=0 --build-arg FFMPEG=true -t localai-tests .

 run-e2e-image:
 	ls -liah $(abspath ./tests/e2e-fixtures)
 	docker run -p 5390:8080 -e MODELS_PATH=/models -e THREADS=1 -e DEBUG=true -d --rm -v $(TEST_DIR):/models --gpus all --name e2e-tests-$(RANDOM) localai-tests

+run-e2e-aio: protogen-go
+	@echo 'Running e2e AIO tests'
+	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts $(TEST_FLAKES) -v -r ./tests/e2e-aio
+
 test-e2e:
 	@echo 'Running e2e tests'
 	BUILD_TYPE=$(BUILD_TYPE) \
@@ -198,33 +497,27 @@ teardown-e2e:
 	rm -rf $(TEST_DIR) || true
 	docker stop $$(docker ps -q --filter ancestor=localai-tests)

-########################################################
-## Integration and unit tests
-########################################################
-
 test-llama-gguf: prepare-test
-	TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models BACKENDS_PATH=$(abspath ./)/backends \
+	TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
 	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama-gguf" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)

 test-tts: prepare-test
-	TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models BACKENDS_PATH=$(abspath ./)/backends \
+	TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
 	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="tts" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)

 test-stablediffusion: prepare-test
-	TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models BACKENDS_PATH=$(abspath ./)/backends \
+	TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
 	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stablediffusion" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)

-test-stores:
+test-stores: backend-assets/grpc/local-store
+	mkdir -p tests/integration/backend-assets/grpc
+	cp -f backend-assets/grpc/local-store tests/integration/backend-assets/grpc/
 	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stores" --flake-attempts $(TEST_FLAKES) -v -r tests/integration

 test-container:
 	docker build --target requirements -t local-ai-test-container .
 	docker run -ti --rm --entrypoint /bin/bash -ti -v $(abspath ./):/build local-ai-test-container

-########################################################
-## Help
-########################################################
-
 ## Help:
 help: ## Show this help.
 	@echo ''
@@ -237,52 +530,16 @@ help: ## Show this help.
 		else if (/^## .*$$/) {printf "  ${CYAN}%s${RESET}\n", substr($$1,4)} \
 		}' $(MAKEFILE_LIST)

-########################################################
-## Backends
-########################################################
-
 .PHONY: protogen
 protogen: protogen-go protogen-python

 .PHONY: protogen-clean
 protogen-clean: protogen-go-clean protogen-python-clean

-protoc:
-	@OS_NAME=$$(uname -s | tr '[:upper:]' '[:lower:]'); \
-	ARCH_NAME=$$(uname -m); \
-	if [ "$$OS_NAME" = "darwin" ]; then \
-	  if [ "$$ARCH_NAME" = "arm64" ]; then \
-	    FILE=protoc-31.1-osx-aarch_64.zip; \
-	  elif [ "$$ARCH_NAME" = "x86_64" ]; then \
-	    FILE=protoc-31.1-osx-x86_64.zip; \
-	  else \
-	    echo "Unsupported macOS architecture: $$ARCH_NAME"; exit 1; \
-	  fi; \
-	elif [ "$$OS_NAME" = "linux" ]; then \
-	  if [ "$$ARCH_NAME" = "x86_64" ]; then \
-	    FILE=protoc-31.1-linux-x86_64.zip; \
-	  elif [ "$$ARCH_NAME" = "aarch64" ] || [ "$$ARCH_NAME" = "arm64" ]; then \
-	    FILE=protoc-31.1-linux-aarch_64.zip; \
-	  elif [ "$$ARCH_NAME" = "ppc64le" ]; then \
-	    FILE=protoc-31.1-linux-ppcle_64.zip; \
-	  elif [ "$$ARCH_NAME" = "s390x" ]; then \
-	    FILE=protoc-31.1-linux-s390_64.zip; \
-	  elif [ "$$ARCH_NAME" = "i386" ] || [ "$$ARCH_NAME" = "x86" ]; then \
-	    FILE=protoc-31.1-linux-x86_32.zip; \
-	  else \
-	    echo "Unsupported Linux architecture: $$ARCH_NAME"; exit 1; \
-	  fi; \
-	else \
-	  echo "Unsupported OS: $$OS_NAME"; exit 1; \
-	fi; \
-	URL=https://github.com/protocolbuffers/protobuf/releases/download/v31.1/$$FILE; \
-	curl -L -s $$URL -o protoc.zip && \
-	unzip -j -d $(CURDIR) protoc.zip bin/protoc && rm protoc.zip
-
 .PHONY: protogen-go
-protogen-go: protoc install-go-tools
+protogen-go: install-go-tools
 	mkdir -p pkg/grpc/proto
-	./protoc --experimental_allow_proto3_optional -Ibackend/ --go_out=pkg/grpc/proto/ --go_opt=paths=source_relative --go-grpc_out=pkg/grpc/proto/ --go-grpc_opt=paths=source_relative \
+	protoc --experimental_allow_proto3_optional -Ibackend/ --go_out=pkg/grpc/proto/ --go_opt=paths=source_relative --go-grpc_out=pkg/grpc/proto/ --go-grpc_opt=paths=source_relative \
    backend/backend.proto

 .PHONY: protogen-go-clean
@@ -376,6 +633,19 @@ vllm-protogen:
 vllm-protogen-clean:
 	$(MAKE) -C backend/python/vllm protogen-clean

+## GRPC
+# Note: it is duplicated in the Dockerfile
+prepare-extra-conda-environments: protogen-python
+	$(MAKE) -C backend/python/bark
+	$(MAKE) -C backend/python/coqui
+	$(MAKE) -C backend/python/diffusers
+	$(MAKE) -C backend/python/chatterbox
+	$(MAKE) -C backend/python/faster-whisper
+	$(MAKE) -C backend/python/vllm
+	$(MAKE) -C backend/python/rerankers
+	$(MAKE) -C backend/python/transformers
+	$(MAKE) -C backend/python/kokoro
+	$(MAKE) -C backend/python/exllama2

 prepare-test-extra: protogen-python
 	$(MAKE) -C backend/python/transformers
@@ -389,10 +659,165 @@ test-extra: prepare-test-extra
 	$(MAKE) -C backend/python/chatterbox test
 	$(MAKE) -C backend/python/vllm test

+backend-assets:
+	mkdir -p backend-assets
+ifeq ($(BUILD_API_ONLY),true)
+	touch backend-assets/keep
+endif
+
+backend-assets/espeak-ng-data: sources/go-piper sources/go-piper/libpiper_binding.a
+	mkdir -p backend-assets/espeak-ng-data
+	@cp -rf sources/go-piper/piper-phonemize/pi/share/espeak-ng-data/. backend-assets/espeak-ng-data
+
+backend-assets/grpc: protogen-go replace
+	mkdir -p backend-assets/grpc
+
+backend-assets/grpc/huggingface: backend-assets/grpc
+	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/huggingface ./backend/go/llm/langchain/
+ifneq ($(UPX),)
+	$(UPX) backend-assets/grpc/huggingface
+endif
+
+backend/cpp/llama/llama.cpp:
+	LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama llama.cpp
+
+INSTALLED_PACKAGES=$(CURDIR)/backend/cpp/grpc/installed_packages
+INSTALLED_LIB_CMAKE=$(INSTALLED_PACKAGES)/lib/cmake
+ADDED_CMAKE_ARGS=-Dabsl_DIR=${INSTALLED_LIB_CMAKE}/absl \
+				 -DProtobuf_DIR=${INSTALLED_LIB_CMAKE}/protobuf \
+				 -Dutf8_range_DIR=${INSTALLED_LIB_CMAKE}/utf8_range \
+				 -DgRPC_DIR=${INSTALLED_LIB_CMAKE}/grpc \
+				 -DCMAKE_CXX_STANDARD_INCLUDE_DIRECTORIES=${INSTALLED_PACKAGES}/include
+build-llama-cpp-grpc-server:
+# Conditionally build grpc for the llama backend to use if needed
+ifdef BUILD_GRPC_FOR_BACKEND_LLAMA
+	$(MAKE) -C backend/cpp/grpc build
+	_PROTOBUF_PROTOC=${INSTALLED_PACKAGES}/bin/proto \
+	_GRPC_CPP_PLUGIN_EXECUTABLE=${INSTALLED_PACKAGES}/bin/grpc_cpp_plugin \
+	PATH="${INSTALLED_PACKAGES}/bin:${PATH}" \
+	CMAKE_ARGS="${CMAKE_ARGS} ${ADDED_CMAKE_ARGS}" \
+	LLAMA_VERSION=$(CPPLLAMA_VERSION) \
+	$(MAKE) -C backend/cpp/${VARIANT} grpc-server
+else
+	echo "BUILD_GRPC_FOR_BACKEND_LLAMA is not defined."
+	LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/${VARIANT} grpc-server
+endif
+
+# This target is for manually building a variant with-auto detected flags
+backend-assets/grpc/llama-cpp: backend-assets/grpc backend/cpp/llama/llama.cpp
+	cp -rf backend/cpp/llama backend/cpp/llama-cpp
+	$(MAKE) -C backend/cpp/llama-cpp purge
+	$(info ${GREEN}I llama-cpp build info:avx2${RESET})
+	$(MAKE) VARIANT="llama-cpp" build-llama-cpp-grpc-server
+	cp -rfv backend/cpp/llama-cpp/grpc-server backend-assets/grpc/llama-cpp
+
+backend-assets/grpc/llama-cpp-avx2: backend-assets/grpc backend/cpp/llama/llama.cpp
+	cp -rf backend/cpp/llama backend/cpp/llama-avx2
+	$(MAKE) -C backend/cpp/llama-avx2 purge
+	$(info ${GREEN}I llama-cpp build info:avx2${RESET})
+	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-avx2" build-llama-cpp-grpc-server
+	cp -rfv backend/cpp/llama-avx2/grpc-server backend-assets/grpc/llama-cpp-avx2
+
+backend-assets/grpc/llama-cpp-avx512: backend-assets/grpc backend/cpp/llama/llama.cpp
+	cp -rf backend/cpp/llama backend/cpp/llama-avx512
+	$(MAKE) -C backend/cpp/llama-avx512 purge
+	$(info ${GREEN}I llama-cpp build info:avx512${RESET})
+	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=on -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-avx512" build-llama-cpp-grpc-server
+	cp -rfv backend/cpp/llama-avx512/grpc-server backend-assets/grpc/llama-cpp-avx512
+
+backend-assets/grpc/llama-cpp-avx: backend-assets/grpc backend/cpp/llama/llama.cpp
+	cp -rf backend/cpp/llama backend/cpp/llama-avx
+	$(MAKE) -C backend/cpp/llama-avx purge
+	$(info ${GREEN}I llama-cpp build info:avx${RESET})
+	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-avx" build-llama-cpp-grpc-server
+	cp -rfv backend/cpp/llama-avx/grpc-server backend-assets/grpc/llama-cpp-avx
+
+backend-assets/grpc/llama-cpp-fallback: backend-assets/grpc backend/cpp/llama/llama.cpp
+	cp -rf backend/cpp/llama backend/cpp/llama-fallback
+	$(MAKE) -C backend/cpp/llama-fallback purge
+	$(info ${GREEN}I llama-cpp build info:fallback${RESET})
+	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-fallback" build-llama-cpp-grpc-server
+	cp -rfv backend/cpp/llama-fallback/grpc-server backend-assets/grpc/llama-cpp-fallback
+
+backend-assets/grpc/llama-cpp-cuda: backend-assets/grpc backend/cpp/llama/llama.cpp
+	cp -rf backend/cpp/llama backend/cpp/llama-cuda
+	$(MAKE) -C backend/cpp/llama-cuda purge
+	$(info ${GREEN}I llama-cpp build info:cuda${RESET})
+	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_CUDA=ON" $(MAKE) VARIANT="llama-cuda" build-llama-cpp-grpc-server
+	cp -rfv backend/cpp/llama-cuda/grpc-server backend-assets/grpc/llama-cpp-cuda
+
+backend-assets/grpc/llama-cpp-hipblas: backend-assets/grpc backend/cpp/llama/llama.cpp
+	cp -rf backend/cpp/llama backend/cpp/llama-hipblas
+	$(MAKE) -C backend/cpp/llama-hipblas purge
+	$(info ${GREEN}I llama-cpp build info:hipblas${RESET})
+	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" BUILD_TYPE="hipblas" $(MAKE) VARIANT="llama-hipblas" build-llama-cpp-grpc-server
+	cp -rfv backend/cpp/llama-hipblas/grpc-server backend-assets/grpc/llama-cpp-hipblas
+
+backend-assets/grpc/llama-cpp-sycl_f16: backend-assets/grpc backend/cpp/llama/llama.cpp
+	cp -rf backend/cpp/llama backend/cpp/llama-sycl_f16
+	$(MAKE) -C backend/cpp/llama-sycl_f16 purge
+	$(info ${GREEN}I llama-cpp build info:sycl_f16${RESET})
+	BUILD_TYPE="sycl_f16" $(MAKE) VARIANT="llama-sycl_f16" build-llama-cpp-grpc-server
+	cp -rfv backend/cpp/llama-sycl_f16/grpc-server backend-assets/grpc/llama-cpp-sycl_f16
+
+backend-assets/grpc/llama-cpp-sycl_f32: backend-assets/grpc backend/cpp/llama/llama.cpp
+	cp -rf backend/cpp/llama backend/cpp/llama-sycl_f32
+	$(MAKE) -C backend/cpp/llama-sycl_f32 purge
+	$(info ${GREEN}I llama-cpp build info:sycl_f32${RESET})
+	BUILD_TYPE="sycl_f32" $(MAKE) VARIANT="llama-sycl_f32" build-llama-cpp-grpc-server
+	cp -rfv backend/cpp/llama-sycl_f32/grpc-server backend-assets/grpc/llama-cpp-sycl_f32
+
+backend-assets/grpc/llama-cpp-grpc: backend-assets/grpc backend/cpp/llama/llama.cpp
+	cp -rf backend/cpp/llama backend/cpp/llama-grpc
+	$(MAKE) -C backend/cpp/llama-grpc purge
+	$(info ${GREEN}I llama-cpp build info:grpc${RESET})
+	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_RPC=ON -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" TARGET="--target grpc-server --target rpc-server" $(MAKE) VARIANT="llama-grpc" build-llama-cpp-grpc-server
+	cp -rfv backend/cpp/llama-grpc/grpc-server backend-assets/grpc/llama-cpp-grpc
+
+backend-assets/util/llama-cpp-rpc-server: backend-assets/grpc/llama-cpp-grpc
+	mkdir -p backend-assets/util/
+	cp -rf backend/cpp/llama-grpc/llama.cpp/build/bin/rpc-server backend-assets/util/llama-cpp-rpc-server
+
+backend-assets/grpc/bark-cpp: backend/go/bark-cpp/libbark.a backend-assets/grpc
+	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/backend/go/bark-cpp/ LIBRARY_PATH=$(CURDIR)/backend/go/bark-cpp/ \
+	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bark-cpp ./backend/go/bark-cpp/
+ifneq ($(UPX),)
+	$(UPX) backend-assets/grpc/bark-cpp
+endif
+
+backend-assets/grpc/piper: sources/go-piper sources/go-piper/libpiper_binding.a backend-assets/grpc backend-assets/espeak-ng-data
+	CGO_CXXFLAGS="$(PIPER_CGO_CXXFLAGS)" CGO_LDFLAGS="$(PIPER_CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/sources/go-piper \
+	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/piper ./backend/go/tts/
+ifneq ($(UPX),)
+	$(UPX) backend-assets/grpc/piper
+endif
+
+backend-assets/grpc/silero-vad: backend-assets/grpc backend-assets/lib/libonnxruntime.so.1
+	CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/onnxruntime/include/" LIBRARY_PATH=$(CURDIR)/backend-assets/lib \
+	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/silero-vad ./backend/go/vad/silero
+ifneq ($(UPX),)
+	$(UPX) backend-assets/grpc/silero-vad
+endif
+
+backend-assets/grpc/whisper: sources/whisper.cpp sources/whisper.cpp/build/src/libwhisper.a backend-assets/grpc
+	CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_WHISPER)" C_INCLUDE_PATH="${WHISPER_INCLUDE_PATH}" LIBRARY_PATH="${WHISPER_LIBRARY_PATH}" LD_LIBRARY_PATH="${WHISPER_LIBRARY_PATH}" \
+	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/whisper
+ifneq ($(UPX),)
+	$(UPX) backend-assets/grpc/whisper
+endif
+
+backend-assets/grpc/local-store: backend-assets/grpc
+	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/local-store ./backend/go/stores/
+ifneq ($(UPX),)
+	$(UPX) backend-assets/grpc/local-store
+endif
+
+grpcs: prepare $(GRPC_BACKENDS)
+
 DOCKER_IMAGE?=local-ai
 DOCKER_AIO_IMAGE?=local-ai-aio
 IMAGE_TYPE?=core
-BASE_IMAGE?=ubuntu:22.04
+BASE_IMAGE?=ubuntu:24.04

 docker:
 	docker build \
@@ -427,7 +852,7 @@ docker-aio-all:

 docker-image-intel:
 	docker build \
-		--build-arg BASE_IMAGE=intel/oneapi-basekit:${ONEAPI_VERSION}.0-0-devel-ubuntu24.04 \
+		--build-arg BASE_IMAGE=intel/oneapi-basekit:2025.1.3-0-devel-ubuntu24.04 \
 		--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
 		--build-arg GO_TAGS="$(GO_TAGS)" \
 		--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
@@ -435,103 +860,12 @@ docker-image-intel:

 docker-image-intel-xpu:
 	docker build \
-		--build-arg BASE_IMAGE=intel/oneapi-basekit:${ONEAPI_VERSION}.0-0-devel-ubuntu22.04 \
+		--build-arg BASE_IMAGE=intel/oneapi-basekit:2025.1.3-0-devel-ubuntu24.04 \
 		--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
 		--build-arg GO_TAGS="$(GO_TAGS)" \
 		--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
 		--build-arg BUILD_TYPE=sycl_f32 -t $(DOCKER_IMAGE) .

-########################################################
-## Backends
-########################################################
-
-backend-images:
-	mkdir -p backend-images
-
-docker-build-llama-cpp:
-	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg IMAGE_BASE=$(IMAGE_BASE) -t local-ai-backend:llama-cpp -f backend/Dockerfile.llama-cpp .
-
-docker-build-bark-cpp:
-	docker build -t local-ai-backend:bark-cpp -f backend/Dockerfile.go --build-arg BACKEND=bark-cpp .
-
-docker-build-piper:
-	docker build -t local-ai-backend:piper -f backend/Dockerfile.go --build-arg BACKEND=piper .
-
-docker-build-local-store:
-	docker build -t local-ai-backend:local-store -f backend/Dockerfile.go --build-arg BACKEND=local-store .
-
-docker-build-huggingface:
-	docker build -t local-ai-backend:huggingface -f backend/Dockerfile.go --build-arg BACKEND=huggingface .
-
-docker-save-huggingface: backend-images
-	docker save local-ai-backend:huggingface -o backend-images/huggingface.tar
-
-docker-save-local-store: backend-images
-	docker save local-ai-backend:local-store -o backend-images/local-store.tar
-
-docker-build-silero-vad:
-	docker build -t local-ai-backend:silero-vad -f backend/Dockerfile.go --build-arg BACKEND=silero-vad .
-
-docker-save-silero-vad: backend-images
-	docker save local-ai-backend:silero-vad -o backend-images/silero-vad.tar
-
-docker-save-piper: backend-images
-	docker save local-ai-backend:piper -o backend-images/piper.tar
-
-docker-save-llama-cpp: backend-images
-	docker save local-ai-backend:llama-cpp -o backend-images/llama-cpp.tar
-
-docker-save-bark-cpp: backend-images
-	docker save local-ai-backend:bark-cpp -o backend-images/bark-cpp.tar
-
-docker-build-stablediffusion-ggml:
-	docker build -t local-ai-backend:stablediffusion-ggml -f backend/Dockerfile.go --build-arg BACKEND=stablediffusion-ggml .
-
-docker-save-stablediffusion-ggml: backend-images
-	docker save local-ai-backend:stablediffusion-ggml -o backend-images/stablediffusion-ggml.tar
-
-docker-build-rerankers:
-	docker build -t local-ai-backend:rerankers -f backend/Dockerfile.python --build-arg BACKEND=rerankers .
-
-docker-build-vllm:
-	docker build -t local-ai-backend:vllm -f backend/Dockerfile.python --build-arg BACKEND=vllm .
-
-docker-build-transformers:
-	docker build -t local-ai-backend:transformers -f backend/Dockerfile.python --build-arg BACKEND=transformers .
-
-docker-build-diffusers:
-	docker build -t local-ai-backend:diffusers -f backend/Dockerfile.python --build-arg BACKEND=diffusers .
-
-docker-build-kokoro:
-	docker build -t local-ai-backend:kokoro -f backend/Dockerfile.python --build-arg BACKEND=kokoro .
-
-docker-build-whisper:
-	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:whisper -f backend/Dockerfile.go --build-arg BACKEND=whisper  .
-
-docker-save-whisper: backend-images
-	docker save local-ai-backend:whisper -o backend-images/whisper.tar
-
-docker-build-faster-whisper:
-	docker build -t local-ai-backend:faster-whisper -f backend/Dockerfile.python --build-arg BACKEND=faster-whisper .
-
-docker-build-coqui:
-	docker build -t local-ai-backend:coqui -f backend/Dockerfile.python --build-arg BACKEND=coqui .
-
-docker-build-bark:
-	docker build -t local-ai-backend:bark -f backend/Dockerfile.python --build-arg BACKEND=bark .
-
-docker-build-chatterbox:
-	docker build -t local-ai-backend:chatterbox -f backend/Dockerfile.python --build-arg BACKEND=chatterbox .
-
-docker-build-exllama2:
-	docker build -t local-ai-backend:exllama2 -f backend/Dockerfile.python --build-arg BACKEND=exllama2 .
-
-docker-build-backends: docker-build-llama-cpp docker-build-rerankers docker-build-vllm docker-build-transformers docker-build-diffusers docker-build-kokoro docker-build-faster-whisper docker-build-coqui docker-build-bark docker-build-chatterbox docker-build-exllama2
-
-########################################################
-### END Backends
-########################################################
-
 .PHONY: swagger
 swagger:
 	swag init -g core/http/app.go --output swagger
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 <h1 align="center">
  <br>
-  <img width="300" src="./core/http/static/logo.png"> <br>
+  <img height="300" src="./core/http/static/logo.png"> <br>
 <br>
 </h1>

--- a/aio/cpu/embeddings.yaml
+++ b/aio/cpu/embeddings.yaml
@@ -1,6 +1,5 @@
 embeddings: true
 name: text-embedding-ada-002
-backend: llama-cpp
 parameters:
  model: huggingface://bartowski/granite-embedding-107m-multilingual-GGUF/granite-embedding-107m-multilingual-f16.gguf

--- a/aio/cpu/rerank.yaml
+++ b/aio/cpu/rerank.yaml
@@ -3,7 +3,7 @@ reranking: true
 f16: true
 parameters:
  model: jina-reranker-v1-tiny-en.f16.gguf
-backend: llama-cpp
+
 download_files:
  - filename: jina-reranker-v1-tiny-en.f16.gguf
    sha256: 5f696cf0d0f3d347c4a279eee8270e5918554cdac0ed1f632f2619e4e8341407
--- a/aio/cpu/text-to-speech.yaml
+++ b/aio/cpu/text-to-speech.yaml
@@ -2,7 +2,7 @@ name: tts-1
 download_files:
  - filename: voice-en-us-amy-low.tar.gz
    uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz
-backend: piper
+
 parameters:
  model: en-us-amy-low.onnx

--- a/aio/cpu/text-to-text.yaml
+++ b/aio/cpu/text-to-text.yaml
@@ -1,6 +1,5 @@
 context_size: 8192
 f16: true
-backend: llama-cpp
 function:
  grammar:
    no_mixed_free_string: true
--- a/aio/cpu/vision.yaml
+++ b/aio/cpu/vision.yaml
@@ -1,6 +1,5 @@
 context_size: 4096
 f16: true
-backend: llama-cpp
 mmap: true
 mmproj: minicpm-v-2_6-mmproj-f16.gguf
 name: gpt-4o
--- a/aio/gpu-8g/embeddings.yaml
+++ b/aio/gpu-8g/embeddings.yaml
@@ -1,6 +1,5 @@
 embeddings: true
 name: text-embedding-ada-002
-backend: llama-cpp
 parameters:
  model: huggingface://bartowski/granite-embedding-107m-multilingual-GGUF/granite-embedding-107m-multilingual-f16.gguf

--- a/aio/gpu-8g/rerank.yaml
+++ b/aio/gpu-8g/rerank.yaml
@@ -3,7 +3,7 @@ reranking: true
 f16: true
 parameters:
  model: jina-reranker-v1-tiny-en.f16.gguf
-backend: llama-cpp
+
 download_files:
  - filename: jina-reranker-v1-tiny-en.f16.gguf
    sha256: 5f696cf0d0f3d347c4a279eee8270e5918554cdac0ed1f632f2619e4e8341407
--- a/aio/gpu-8g/text-to-speech.yaml
+++ b/aio/gpu-8g/text-to-speech.yaml
@@ -2,7 +2,7 @@ name: tts-1
 download_files:
  - filename: voice-en-us-amy-low.tar.gz
    uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz
-backend: piper
+
 parameters:
  model: en-us-amy-low.onnx

--- a/aio/gpu-8g/text-to-text.yaml
+++ b/aio/gpu-8g/text-to-text.yaml
@@ -1,6 +1,5 @@
 context_size: 4096
 f16: true
-backend: llama-cpp
 function:
  capture_llm_results:
  - (?s)<Thought>(.*?)</Thought>
--- a/aio/gpu-8g/vision.yaml
+++ b/aio/gpu-8g/vision.yaml
@@ -1,5 +1,4 @@
 context_size: 4096
-backend: llama-cpp
 f16: true
 mmap: true
 mmproj: minicpm-v-2_6-mmproj-f16.gguf
--- a/aio/intel/embeddings.yaml
+++ b/aio/intel/embeddings.yaml
@@ -1,6 +1,5 @@
 embeddings: true
 name: text-embedding-ada-002
-backend: llama-cpp
 parameters:
  model: huggingface://bartowski/granite-embedding-107m-multilingual-GGUF/granite-embedding-107m-multilingual-f16.gguf

--- a/aio/intel/rerank.yaml
+++ b/aio/intel/rerank.yaml
@@ -3,7 +3,7 @@ reranking: true
 f16: true
 parameters:
  model: jina-reranker-v1-tiny-en.f16.gguf
-backend: llama-cpp
+
 download_files:
  - filename: jina-reranker-v1-tiny-en.f16.gguf
    sha256: 5f696cf0d0f3d347c4a279eee8270e5918554cdac0ed1f632f2619e4e8341407
--- a/aio/intel/text-to-speech.yaml
+++ b/aio/intel/text-to-speech.yaml
@@ -2,7 +2,7 @@ name: tts-1
 download_files:
  - filename: voice-en-us-amy-low.tar.gz
    uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz
-backend: piper
+
 parameters:
  model: en-us-amy-low.onnx

--- a/aio/intel/text-to-text.yaml
+++ b/aio/intel/text-to-text.yaml
@@ -1,6 +1,5 @@
 context_size: 4096
 f16: true
-backend: llama-cpp
 function:
  capture_llm_results:
  - (?s)<Thought>(.*?)</Thought>
--- a/aio/intel/vision.yaml
+++ b/aio/intel/vision.yaml
@@ -1,5 +1,4 @@
 context_size: 4096
-backend: llama-cpp
 f16: true
 mmap: true
 mmproj: minicpm-v-2_6-mmproj-f16.gguf
--- a/assets.go
+++ b/assets.go
@@ -0,0 +1,15 @@
+package main
+
+import (
+	rice "github.com/GeertJohan/go.rice"
+)
+
+var backendAssets *rice.Box
+
+func init() {
+	var err error
+	backendAssets, err = rice.FindBox("backend-assets")
+	if err != nil {
+		panic(err)
+	}
+}
--- a/backend/Dockerfile.go
+++ b/backend/Dockerfile.go
@@ -1,4 +1,4 @@
-ARG BASE_IMAGE=ubuntu:22.04
+ARG BASE_IMAGE=ubuntu:24.04

 FROM ${BASE_IMAGE} AS builder
 ARG BACKEND=rerankers
@@ -17,9 +17,9 @@ ARG GO_VERSION=1.22.6
 RUN apt-get update && \
    apt-get install -y --no-install-recommends \
        build-essential \
-        git ccache \
+        ccache \
        ca-certificates \
-        make cmake \
+        make \
        curl unzip \
        libssl-dev && \
    apt-get clean && \
@@ -96,17 +96,6 @@ RUN if [ "${BUILD_TYPE}" = "hipblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
        ldconfig \
    ; fi

-# Intel oneAPI requirements
-RUN <<EOT bash
-    if [[ "${BUILD_TYPE}" == sycl* ]] && [ "${SKIP_DRIVERS}" = "false" ]; then
-        apt-get update && \
-        apt-get install -y --no-install-recommends \
-            intel-oneapi-runtime-libs && \
-        apt-get clean && \
-        rm -rf /var/lib/apt/lists/*
-    fi
-EOT
-
 # Install Go
 RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz
 ENV PATH=$PATH:/root/go/bin:/usr/local/go/bin:/usr/local/bin
@@ -134,9 +123,9 @@ EOT

 COPY . /LocalAI

-RUN cd /LocalAI && make protogen-go && make -C /LocalAI/backend/go/${BACKEND} build
+RUN cd /LocalAI && make backend-assets/grpc/${BACKEND}

 FROM scratch
-ARG BACKEND=rerankers

-COPY --from=builder /LocalAI/backend/go/${BACKEND}/package/. ./
+COPY --from=builder /LocalAI/backend-assets/grpc/${BACKEND} ./
+COPY --from=builder /LocalAI/backend/go/${BACKEND}/run.sh ./
--- a/backend/Dockerfile.llama-cpp
+++ b/backend/Dockerfile.llama-cpp
@@ -1,204 +0,0 @@
-ARG BASE_IMAGE=ubuntu:22.04
-ARG GRPC_BASE_IMAGE=${BASE_IMAGE}
-
-
-# The grpc target does one thing, it builds and installs GRPC.  This is in it's own layer so that it can be effectively cached by CI.
-# You probably don't need to change anything here, and if you do, make sure that CI is adjusted so that the cache continues to work.
-FROM ${GRPC_BASE_IMAGE} AS grpc
-
-# This is a bit of a hack, but it's required in order to be able to effectively cache this layer in CI
-ARG GRPC_MAKEFLAGS="-j4 -Otarget"
-ARG GRPC_VERSION=v1.65.0
-ARG CMAKE_FROM_SOURCE=false
-ARG CMAKE_VERSION=3.26.4
-ARG PROTOBUF_VERSION=v21.12
-
-ENV MAKEFLAGS=${GRPC_MAKEFLAGS}
-
-WORKDIR /build
-
-RUN apt-get update && \
-    apt-get install -y --no-install-recommends \
-        ca-certificates \
-        build-essential curl libssl-dev \
-        git && \
-    apt-get clean && \
-    rm -rf /var/lib/apt/lists/*
-
-# Install CMake (the version in 22.04 is too old)
-RUN <<EOT bash
-    if [ "${CMAKE_FROM_SOURCE}}" = "true" ]; then
-        curl -L -s https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}.tar.gz -o cmake.tar.gz && tar xvf cmake.tar.gz && cd cmake-${CMAKE_VERSION} && ./configure && make && make install
-    else
-        apt-get update && \
-        apt-get install -y \
-            cmake && \
-        apt-get clean && \
-        rm -rf /var/lib/apt/lists/*
-    fi
-EOT
-
-# We install GRPC to a different prefix here so that we can copy in only the build artifacts later
-# saves several hundred MB on the final docker image size vs copying in the entire GRPC source tree
-# and running make install in the target container
-RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
-    mkdir -p /build/grpc/cmake/build && \
-    cd /build/grpc/cmake/build && \
-    sed -i "216i\  TESTONLY" "../../third_party/abseil-cpp/absl/container/CMakeLists.txt" && \
-    cmake -DgRPC_INSTALL=ON -DgRPC_BUILD_TESTS=OFF -DCMAKE_INSTALL_PREFIX:PATH=/opt/grpc ../.. && \
-    make && \
-    make install && \
-    rm -rf /build
-
-RUN git clone --recurse-submodules --branch ${PROTOBUF_VERSION} https://github.com/protocolbuffers/protobuf.git && \
-    mkdir -p /build/protobuf/build && \
-    cd /build/protobuf/build && \
-    cmake -Dprotobuf_BUILD_SHARED_LIBS=ON -Dprotobuf_BUILD_TESTS=OFF .. && \
-    make && \
-    make install && \
-    rm -rf /build
-
-FROM ${BASE_IMAGE} AS builder
-ARG BACKEND=rerankers
-ARG BUILD_TYPE
-ENV BUILD_TYPE=${BUILD_TYPE}
-ARG CUDA_MAJOR_VERSION
-ARG CUDA_MINOR_VERSION
-ARG SKIP_DRIVERS=false
-ENV CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION}
-ENV CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION}
-ENV DEBIAN_FRONTEND=noninteractive
-ARG TARGETARCH
-ARG TARGETVARIANT
-ARG GO_VERSION=1.22.6
-
-RUN apt-get update && \
-    apt-get install -y --no-install-recommends \
-        build-essential \
-        ccache git \
-        ca-certificates \
-        make \
-        curl unzip \
-        libssl-dev && \
-    apt-get clean && \
-    rm -rf /var/lib/apt/lists/*
-
-# Cuda
-ENV PATH=/usr/local/cuda/bin:${PATH}
-
-# HipBLAS requirements
-ENV PATH=/opt/rocm/bin:${PATH}
-
-# Vulkan requirements
-RUN <<EOT bash
-    if [ "${BUILD_TYPE}" = "vulkan" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
-        apt-get update && \
-        apt-get install -y  --no-install-recommends \
-            software-properties-common pciutils wget gpg-agent && \
-        wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
-        wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
-        apt-get update && \
-        apt-get install -y \
-            vulkan-sdk && \
-        apt-get clean && \
-        rm -rf /var/lib/apt/lists/*
-    fi
-EOT
-
-# CuBLAS requirements
-RUN <<EOT bash
-    if [ "${BUILD_TYPE}" = "cublas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
-        apt-get update && \
-        apt-get install -y  --no-install-recommends \
-            software-properties-common pciutils
-        if [ "amd64" = "$TARGETARCH" ]; then
-            curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
-        fi
-        if [ "arm64" = "$TARGETARCH" ]; then
-            curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/arm64/cuda-keyring_1.1-1_all.deb
-        fi
-        dpkg -i cuda-keyring_1.1-1_all.deb && \
-        rm -f cuda-keyring_1.1-1_all.deb && \
-        apt-get update && \
-        apt-get install -y --no-install-recommends \
-            cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
-            libcufft-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
-            libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
-            libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
-            libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
-            libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \
-        apt-get clean && \
-        rm -rf /var/lib/apt/lists/*
-    fi
-EOT
-
-# If we are building with clblas support, we need the libraries for the builds
-RUN if [ "${BUILD_TYPE}" = "clblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \
-        apt-get update && \
-        apt-get install -y --no-install-recommends \
-            libclblast-dev && \
-        apt-get clean && \
-        rm -rf /var/lib/apt/lists/* \
-    ; fi
-
-RUN if [ "${BUILD_TYPE}" = "hipblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \
-        apt-get update && \
-        apt-get install -y --no-install-recommends \
-            hipblas-dev \
-            rocblas-dev && \
-        apt-get clean && \
-        rm -rf /var/lib/apt/lists/* && \
-        # I have no idea why, but the ROCM lib packages don't trigger ldconfig after they install, which results in local-ai and others not being able
-        # to locate the libraries. We run ldconfig ourselves to work around this packaging deficiency
-        ldconfig \
-    ; fi
-
-RUN echo "TARGETARCH: $TARGETARCH"
-
-# We need protoc installed, and the version in 22.04 is too old.  We will create one as part installing the GRPC build below
-# but that will also being in a newer version of absl which stablediffusion cannot compile with.  This version of protoc is only
-# here so that we can generate the grpc code for the stablediffusion build
-RUN <<EOT bash
-    if [ "amd64" = "$TARGETARCH" ]; then
-        curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v27.1/protoc-27.1-linux-x86_64.zip -o protoc.zip && \
-        unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
-        rm protoc.zip
-    fi
-    if [ "arm64" = "$TARGETARCH" ]; then
-        curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v27.1/protoc-27.1-linux-aarch_64.zip -o protoc.zip && \
-        unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
-        rm protoc.zip
-    fi
-EOT
-
-# Install CMake (the version in 22.04 is too old)
-RUN <<EOT bash
-    if [ "${CMAKE_FROM_SOURCE}}" = "true" ]; then
-        curl -L -s https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}.tar.gz -o cmake.tar.gz && tar xvf cmake.tar.gz && cd cmake-${CMAKE_VERSION} && ./configure && make && make install
-    else
-        apt-get update && \
-        apt-get install -y \
-            cmake && \
-        apt-get clean && \
-        rm -rf /var/lib/apt/lists/*
-    fi
-EOT
-
-COPY --from=grpc /opt/grpc /usr/local
-
-
-COPY . /LocalAI
-
-RUN make -C /LocalAI/backend/cpp/llama-cpp llama-cpp
-RUN make -C /LocalAI/backend/cpp/llama-cpp llama-cpp-grpc
-RUN make -C /LocalAI/backend/cpp/llama-cpp llama-cpp-rpc-server
-
-# Copy libraries using a script to handle architecture differences
-RUN make -C /LocalAI/backend/cpp/llama-cpp package
-
-
-FROM scratch
-
-
-# Copy all available binaries (the build process only creates the appropriate ones for the target architecture)
-COPY --from=builder /LocalAI/backend/cpp/llama-cpp/package/. ./
--- a/backend/Dockerfile.python
+++ b/backend/Dockerfile.python
@@ -1,4 +1,4 @@
-ARG BASE_IMAGE=ubuntu:22.04
+ARG BASE_IMAGE=ubuntu:24.04

 FROM ${BASE_IMAGE} AS builder
 ARG BACKEND=rerankers
--- a/backend/backend.proto
+++ b/backend/backend.proto
@@ -185,6 +185,7 @@ message ModelOptions {
  string MainGPU = 13;
  string TensorSplit = 14;
  int32 Threads = 15;
+  string LibrarySearchPath = 16;
  float RopeFreqBase = 17;
  float RopeFreqScale = 18;
  float RMSNormEps = 19;
--- a/backend/cpp/llama-cpp/Makefile
+++ b/backend/cpp/llama-cpp/Makefile
@@ -1,146 +0,0 @@
-
-LLAMA_VERSION?=acd6cb1c41676f6bbb25c2a76fa5abeb1719301e
-LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
-
-CMAKE_ARGS?=
-BUILD_TYPE?=
-NATIVE?=false
-ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh
-TARGET?=--target grpc-server
-
-# Disable Shared libs as we are linking on static gRPC and we can't mix shared and static
-CMAKE_ARGS+=-DBUILD_SHARED_LIBS=ON -DLLAMA_CURL=OFF -DGGML_CPU_ALL_VARIANTS=ON -DGGML_BACKEND_DL=ON
-
-CURRENT_MAKEFILE_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
-ifeq ($(NATIVE),false)
-	CMAKE_ARGS+=-DGGML_NATIVE=OFF
-endif
-# If build type is cublas, then we set -DGGML_CUDA=ON to CMAKE_ARGS automatically
-ifeq ($(BUILD_TYPE),cublas)
-	CMAKE_ARGS+=-DGGML_CUDA=ON
-# If build type is openblas then we set -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
-# to CMAKE_ARGS automatically
-else ifeq ($(BUILD_TYPE),openblas)
-	CMAKE_ARGS+=-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
-# If build type is clblas (openCL) we set -DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
-else ifeq ($(BUILD_TYPE),clblas)
-	CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
-# If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++ 
-else ifeq ($(BUILD_TYPE),hipblas)
-	ROCM_HOME ?= /opt/rocm
-	ROCM_PATH ?= /opt/rocm
-	export CXX=$(ROCM_HOME)/llvm/bin/clang++
-	export CC=$(ROCM_HOME)/llvm/bin/clang
-#	GPU_TARGETS ?= gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102
-#	AMDGPU_TARGETS ?= "$(GPU_TARGETS)"
-	CMAKE_ARGS+=-DGGML_HIP=ON
-#	CMAKE_ARGS+=-DGGML_HIP=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
-else ifeq ($(BUILD_TYPE),vulkan)
-	CMAKE_ARGS+=-DGGML_VULKAN=1
-else ifeq ($(OS),Darwin)
-	ifeq ($(BUILD_TYPE),)
-		BUILD_TYPE=metal
-	endif
-	ifneq ($(BUILD_TYPE),metal)
-		CMAKE_ARGS+=-DGGML_METAL=OFF
-	else
-		CMAKE_ARGS+=-DGGML_METAL=ON
-		CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON
-		CMAKE_ARGS+=-DGGML_METAL_USE_BF16=ON
-		CMAKE_ARGS+=-DGGML_OPENMP=OFF
-	endif
-	TARGET+=--target ggml-metal
-endif
-
-ifeq ($(BUILD_TYPE),sycl_f16)
-	CMAKE_ARGS+=-DGGML_SYCL=ON \
-		-DCMAKE_C_COMPILER=icx \
-		-DCMAKE_CXX_COMPILER=icpx \
-		-DCMAKE_CXX_FLAGS="-fsycl" \
-		-DGGML_SYCL_F16=ON
-endif
-
-ifeq ($(BUILD_TYPE),sycl_f32)
-	CMAKE_ARGS+=-DGGML_SYCL=ON \
-		-DCMAKE_C_COMPILER=icx \
-		-DCMAKE_CXX_COMPILER=icpx \
-		-DCMAKE_CXX_FLAGS="-fsycl"
-endif
-
-INSTALLED_PACKAGES=$(CURDIR)/../grpc/installed_packages
-INSTALLED_LIB_CMAKE=$(INSTALLED_PACKAGES)/lib/cmake
-ADDED_CMAKE_ARGS=-Dabsl_DIR=${INSTALLED_LIB_CMAKE}/absl \
-				 -DProtobuf_DIR=${INSTALLED_LIB_CMAKE}/protobuf \
-				 -Dutf8_range_DIR=${INSTALLED_LIB_CMAKE}/utf8_range \
-				 -DgRPC_DIR=${INSTALLED_LIB_CMAKE}/grpc \
-				 -DCMAKE_CXX_STANDARD_INCLUDE_DIRECTORIES=${INSTALLED_PACKAGES}/include
-build-llama-cpp-grpc-server:
-# Conditionally build grpc for the llama backend to use if needed
-ifdef BUILD_GRPC_FOR_BACKEND_LLAMA
-	$(MAKE) -C ../../grpc build
-	_PROTOBUF_PROTOC=${INSTALLED_PACKAGES}/bin/proto \
-	_GRPC_CPP_PLUGIN_EXECUTABLE=${INSTALLED_PACKAGES}/bin/grpc_cpp_plugin \
-	PATH="${INSTALLED_PACKAGES}/bin:${PATH}" \
-	CMAKE_ARGS="${CMAKE_ARGS} ${ADDED_CMAKE_ARGS}" \
-	LLAMA_VERSION=$(LLAMA_VERSION) \
-	$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../$(VARIANT) grpc-server
-else
-	echo "BUILD_GRPC_FOR_BACKEND_LLAMA is not defined."
-	LLAMA_VERSION=$(LLAMA_VERSION) $(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../$(VARIANT) grpc-server
-endif
-
-llama-cpp: llama.cpp
-	cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-build
-	$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-build purge
-	$(info ${GREEN}I llama-cpp build info:${RESET})
-	CMAKE_ARGS="$(CMAKE_ARGS)" $(MAKE) VARIANT="llama-cpp-build" build-llama-cpp-grpc-server
-	cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-build/grpc-server llama-cpp
-
-llama-cpp-grpc: llama.cpp
-	cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build
-	$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build purge
-	$(info ${GREEN}I llama-cpp build info:grpc${RESET})
-	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_RPC=ON -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" TARGET="--target grpc-server --target rpc-server" $(MAKE) VARIANT="llama-cpp-grpc-build" build-llama-cpp-grpc-server
-	cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build/grpc-server llama-cpp-grpc
-
-llama-cpp-rpc-server: llama-cpp-grpc
-	cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build/llama.cpp/build/bin/rpc-server llama-cpp-rpc-server
-
-llama.cpp:
-	mkdir -p llama.cpp
-	cd llama.cpp && \
-	git init && \
-	git remote add origin $(LLAMA_REPO)  && \
-	git fetch origin && \
-	git checkout -b build $(LLAMA_VERSION) && \
-	git submodule update --init --recursive --depth 1 --single-branch
-
-llama.cpp/tools/grpc-server: llama.cpp
-	mkdir -p llama.cpp/tools/grpc-server
-	bash prepare.sh
-
-rebuild:
-	bash prepare.sh
-	rm -rf grpc-server
-	$(MAKE) grpc-server
-
-package:
-	bash package.sh
-
-purge:
-	rm -rf llama.cpp/build
-	rm -rf llama.cpp/tools/grpc-server
-	rm -rf grpc-server
-
-clean: purge
-	rm -rf llama.cpp
-
-grpc-server: llama.cpp llama.cpp/tools/grpc-server
-	@echo "Building grpc-server with $(BUILD_TYPE) build type and $(CMAKE_ARGS)"
-ifneq (,$(findstring sycl,$(BUILD_TYPE)))
-	+bash -c "source $(ONEAPI_VARS); \
-	cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release $(TARGET)"
-else
-	+cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release $(TARGET)
-endif
-	cp llama.cpp/build/bin/grpc-server .
--- a/backend/cpp/llama-cpp/package.sh
+++ b/backend/cpp/llama-cpp/package.sh
@@ -1,42 +0,0 @@
-#!/bin/bash
-
-# Script to copy the appropriate libraries based on architecture
-# This script is used in the final stage of the Dockerfile
-
-set -e
-
-CURDIR=$(dirname "$(realpath $0)")
-
-# Create lib directory
-mkdir -p $CURDIR/package/lib
-
-cp -avrf $CURDIR/llama-cpp-* $CURDIR/package/
-cp -rfv $CURDIR/run.sh $CURDIR/package/
-
-# Detect architecture and copy appropriate libraries
-if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
-    # x86_64 architecture
-    echo "Detected x86_64 architecture, copying x86_64 libraries..."
-    cp -arfLv /lib64/ld-linux-x86-64.so.2 $CURDIR/package/lib/ld.so
-    cp -arfLv /lib/x86_64-linux-gnu/libc.so.6 $CURDIR/package/lib/libc.so.6
-    cp -arfLv /lib/x86_64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
-    cp -arfLv /lib/x86_64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
-    cp -arfLv /lib/x86_64-linux-gnu/libm.so.6 $CURDIR/package/lib/libm.so.6
-    cp -arfLv /lib/x86_64-linux-gnu/libgomp.so.1 $CURDIR/package/lib/libgomp.so.1
-elif [ -f "/lib/ld-linux-aarch64.so.1" ]; then
-    # ARM64 architecture
-    echo "Detected ARM64 architecture, copying ARM64 libraries..."
-    cp -arfLv /lib/ld-linux-aarch64.so.1 $CURDIR/package/lib/ld.so
-    cp -arfLv /lib/aarch64-linux-gnu/libc.so.6 $CURDIR/package/lib/libc.so.6
-    cp -arfLv /lib/aarch64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
-    cp -arfLv /lib/aarch64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
-    cp -arfLv /lib/aarch64-linux-gnu/libm.so.6 $CURDIR/package/lib/libm.so.6
-    cp -arfLv /lib/aarch64-linux-gnu/libgomp.so.1 $CURDIR/package/lib/libgomp.so.1
-else
-    echo "Error: Could not detect architecture"
-    exit 1
-fi
-
-echo "Packaging completed successfully" 
-ls -liah $CURDIR/package/
-ls -liah $CURDIR/package/lib/
--- a/backend/cpp/llama-cpp/run.sh
+++ b/backend/cpp/llama-cpp/run.sh
@@ -1,33 +0,0 @@
-#!/bin/bash
-set -ex
-
-# Get the absolute current dir where the script is located
-CURDIR=$(dirname "$(realpath $0)")
-
-cd /
-
-BINARY=llama-cpp
-
-## P2P/GRPC mode
-if [ -n "$LLAMACPP_GRPC_SERVERS" ]; then
-	if [ -e $CURDIR/llama-cpp-grpc ]; then
-		BINARY=llama-cpp-grpc
-	fi
-fi
- 
-# Extend ld library path with the dir where this script is located/lib
-if [ "$(uname)" == "Darwin" ]; then
-	DYLD_FALLBACK_LIBRARY_PATH=$CURDIR/lib:$DYLD_FALLBACK_LIBRARY_PATH
-else
-	export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
-fi
-
-# If there is a lib/ld.so, use it
-if [ -f $CURDIR/lib/ld.so ]; then
-	echo "Using lib/ld.so"
-	echo "Using binary: $BINARY"
-	exec $CURDIR/lib/ld.so $CURDIR/$BINARY "$@"
-fi
-
-echo "Using binary: $BINARY"
-exec $CURDIR/$BINARY "$@"
--- a/backend/cpp/llama-cpp/CMakeLists.txt
+++ b/backend/cpp/llama-cpp/CMakeLists.txt
@@ -17,8 +17,6 @@ if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
    include_directories("${HOMEBREW_DEFAULT_PREFIX}/include")
 endif()

-set(Protobuf_USE_STATIC_LIBS OFF)
-set(gRPC_USE_STATIC_LIBS OFF)
 find_package(absl CONFIG REQUIRED)
 find_package(Protobuf CONFIG REQUIRED)
 find_package(gRPC CONFIG REQUIRED)
--- a/backend/cpp/llama/Makefile
+++ b/backend/cpp/llama/Makefile
@@ -0,0 +1,87 @@
+
+LLAMA_VERSION?=
+LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
+
+CMAKE_ARGS?=
+BUILD_TYPE?=
+ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh
+TARGET?=--target grpc-server
+
+# Disable Shared libs as we are linking on static gRPC and we can't mix shared and static
+CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF -DLLAMA_CURL=OFF
+
+# If build type is cublas, then we set -DGGML_CUDA=ON to CMAKE_ARGS automatically
+ifeq ($(BUILD_TYPE),cublas)
+	CMAKE_ARGS+=-DGGML_CUDA=ON
+# If build type is openblas then we set -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
+# to CMAKE_ARGS automatically
+else ifeq ($(BUILD_TYPE),openblas)
+	CMAKE_ARGS+=-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
+# If build type is clblas (openCL) we set -DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
+else ifeq ($(BUILD_TYPE),clblas)
+	CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
+# If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++ 
+else ifeq ($(BUILD_TYPE),hipblas)
+	CMAKE_ARGS+=-DGGML_HIP=ON
+# If it's OSX, DO NOT embed the metal library - -DGGML_METAL_EMBED_LIBRARY=ON requires further investigation
+# But if it's OSX without metal, disable it here
+else ifeq ($(OS),Darwin)
+	ifneq ($(BUILD_TYPE),metal)
+		CMAKE_ARGS+=-DGGML_METAL=OFF
+	else
+		CMAKE_ARGS+=-DGGML_METAL=ON
+		CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON
+		TARGET+=--target ggml-metal
+	endif
+endif
+
+ifeq ($(BUILD_TYPE),sycl_f16)
+	CMAKE_ARGS+=-DGGML_SYCL=ON \
+		-DCMAKE_C_COMPILER=icx \
+		-DCMAKE_CXX_COMPILER=icpx \
+		-DCMAKE_CXX_FLAGS="-fsycl" \
+		-DGGML_SYCL_F16=ON
+endif
+
+ifeq ($(BUILD_TYPE),sycl_f32)
+	CMAKE_ARGS+=-DGGML_SYCL=ON \
+		-DCMAKE_C_COMPILER=icx \
+		-DCMAKE_CXX_COMPILER=icpx \
+		-DCMAKE_CXX_FLAGS="-fsycl"
+endif
+
+llama.cpp:
+	mkdir -p llama.cpp
+	cd llama.cpp && \
+	git init && \
+	git remote add origin $(LLAMA_REPO)  && \
+	git fetch origin && \
+	git checkout -b build $(LLAMA_VERSION) && \
+	git submodule update --init --recursive --depth 1 --single-branch
+
+llama.cpp/tools/grpc-server: llama.cpp
+	mkdir -p llama.cpp/tools/grpc-server
+	bash prepare.sh
+
+rebuild:
+	bash prepare.sh
+	rm -rf grpc-server
+	$(MAKE) grpc-server
+
+purge:
+	rm -rf llama.cpp/build
+	rm -rf llama.cpp/tools/grpc-server
+	rm -rf grpc-server
+
+clean: purge
+	rm -rf llama.cpp
+
+grpc-server: llama.cpp llama.cpp/tools/grpc-server
+	@echo "Building grpc-server with $(BUILD_TYPE) build type and $(CMAKE_ARGS)"
+ifneq (,$(findstring sycl,$(BUILD_TYPE)))
+	+bash -c "source $(ONEAPI_VARS); \
+	cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release $(TARGET)"
+else
+	+cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release $(TARGET)
+endif
+	cp llama.cpp/build/bin/grpc-server .
--- a/backend/cpp/llama-cpp/grpc-server.cpp
+++ b/backend/cpp/llama-cpp/grpc-server.cpp
--- a/backend/cpp/llama-cpp/patches/01-llava.patch
+++ b/backend/cpp/llama-cpp/patches/01-llava.patch
--- a/backend/cpp/llama-cpp/prepare.sh
+++ b/backend/cpp/llama-cpp/prepare.sh
--- a/backend/go/bark-cpp/Makefile
+++ b/backend/go/bark-cpp/Makefile
@@ -3,49 +3,23 @@ LIBRARY_PATH := $(abspath ./)

 AR?=ar

-CMAKE_ARGS?=-DGGML_NATIVE=OFF
 BUILD_TYPE?=
-GOCMD=go
 # keep standard at C11 and C++11
-CXXFLAGS = -I. -I$(INCLUDE_PATH)/sources/bark.cpp/examples -I$(INCLUDE_PATH)/sources/bark.cpp/encodec.cpp/ggml/include -I$(INCLUDE_PATH)/sources/bark.cpp/spm-headers -I$(INCLUDE_PATH)/sources/bark.cpp -O3 -DNDEBUG -std=c++17 -fPIC
-LDFLAGS  = -L$(LIBRARY_PATH) -L$(LIBRARY_PATH)/sources/bark.cpp/build/examples -lbark -lstdc++ -lm
-
-# bark.cpp
-BARKCPP_REPO?=https://github.com/PABannier/bark.cpp.git
-BARKCPP_VERSION?=5d5be84f089ab9ea53b7a793f088d3fbf7247495
+CXXFLAGS = -I. -I$(INCLUDE_PATH)/../../../sources/bark.cpp/examples -I$(INCLUDE_PATH)/../../../sources/bark.cpp/spm-headers -I$(INCLUDE_PATH)/../../../sources/bark.cpp -O3 -DNDEBUG -std=c++17 -fPIC
+LDFLAGS  = -L$(LIBRARY_PATH) -L$(LIBRARY_PATH)/../../../sources/bark.cpp/build/examples -lbark -lstdc++ -lm

 # warnings
 CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function

-## bark.cpp
-sources/bark.cpp:
-	git clone --recursive $(BARKCPP_REPO) sources/bark.cpp && \
-	cd sources/bark.cpp && \
-	git checkout $(BARKCPP_VERSION) && \
-	git submodule update --init --recursive --depth 1 --single-branch
-
-sources/bark.cpp/build/libbark.a: sources/bark.cpp
-	cd sources/bark.cpp && \
-	mkdir -p build && \
-	cd build && \
-	cmake $(CMAKE_ARGS) .. && \
-	cmake --build . --config Release
-
 gobark.o:
 	$(CXX) $(CXXFLAGS) gobark.cpp -o gobark.o -c $(LDFLAGS)

-libbark.a: sources/bark.cpp/build/libbark.a gobark.o
-	cp $(INCLUDE_PATH)/sources/bark.cpp/build/libbark.a ./
+libbark.a: gobark.o
+	cp $(INCLUDE_PATH)/../../../sources/bark.cpp/build/libbark.a ./
 	$(AR) rcs libbark.a gobark.o
-
-bark-cpp: libbark.a
-	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH="$(CURDIR)" LIBRARY_PATH=$(CURDIR) \
-	$(GOCMD) build -v -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o bark-cpp ./
-
-package:
-	bash package.sh
-
-build: bark-cpp package
+	$(AR) rcs libbark.a $(LIBRARY_PATH)/../../../sources/bark.cpp/build/encodec.cpp/ggml/src/CMakeFiles/ggml.dir/ggml.c.o
+	$(AR) rcs libbark.a $(LIBRARY_PATH)/../../../sources/bark.cpp/build/encodec.cpp/ggml/src/CMakeFiles/ggml.dir/ggml-alloc.c.o
+	$(AR) rcs libbark.a $(LIBRARY_PATH)/../../../sources/bark.cpp/build/encodec.cpp/ggml/src/CMakeFiles/ggml.dir/ggml-backend.c.o

 clean:
 	rm -f gobark.o libbark.a
--- a/backend/go/bark-cpp/gobark.go
+++ b/backend/go/bark-cpp/gobark.go
@@ -1,7 +1,7 @@
 package main

-// #cgo CXXFLAGS: -I${SRCDIR}/sources/bark.cpp/ -I${SRCDIR}/sources/bark.cpp/encodec.cpp -I${SRCDIR}/sources/bark.cpp/encodec.cpp/ggml/include -I${SRCDIR}/sources/bark.cpp/examples -I${SRCDIR}/sources/bark.cpp/spm-headers
-// #cgo LDFLAGS: -L${SRCDIR}/ -L${SRCDIR}/sources/bark.cpp/build/examples -L${SRCDIR}/sources/bark.cpp/build/encodec.cpp/ggml/src/ -L${SRCDIR}/sources/bark.cpp/build/encodec.cpp/ -lbark -lencodec -lcommon -lggml -lgomp
+// #cgo CXXFLAGS: -I${SRCDIR}/../../../sources/bark.cpp/ -I${SRCDIR}/../../../sources/bark.cpp/encodec.cpp -I${SRCDIR}/../../../sources/bark.cpp/examples -I${SRCDIR}/../../../sources/bark.cpp/spm-headers
+// #cgo LDFLAGS: -L${SRCDIR}/ -L${SRCDIR}/../../../sources/bark.cpp/build/examples -L${SRCDIR}/../../../sources/bark.cpp/build/encodec.cpp/ -lbark -lencodec -lcommon
 // #include <gobark.h>
 // #include <stdlib.h>
 import "C"
--- a/backend/go/bark-cpp/package.sh
+++ b/backend/go/bark-cpp/package.sh
@@ -1,41 +0,0 @@
-#!/bin/bash
-
-# Script to copy the appropriate libraries based on architecture
-# This script is used in the final stage of the Dockerfile
-
-set -e
-
-CURDIR=$(dirname "$(realpath $0)")
-
-# Create lib directory
-mkdir -p $CURDIR/package/lib
-cp -avrf $CURDIR/bark-cpp $CURDIR/package/
-cp -rfv $CURDIR/run.sh $CURDIR/package/
-
-# Detect architecture and copy appropriate libraries
-if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
-    # x86_64 architecture
-    echo "Detected x86_64 architecture, copying x86_64 libraries..."
-    cp -arfLv /lib64/ld-linux-x86-64.so.2 $CURDIR/package/lib/ld.so
-    cp -arfLv /lib/x86_64-linux-gnu/libc.so.6 $CURDIR/package/lib/libc.so.6
-    cp -arfLv /lib/x86_64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
-    cp -arfLv /lib/x86_64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
-    cp -arfLv /lib/x86_64-linux-gnu/libm.so.6 $CURDIR/package/lib/libm.so.6
-    cp -arfLv /lib/x86_64-linux-gnu/libgomp.so.1 $CURDIR/package/lib/libgomp.so.1
-elif [ -f "/lib/ld-linux-aarch64.so.1" ]; then
-    # ARM64 architecture
-    echo "Detected ARM64 architecture, copying ARM64 libraries..."
-    cp -arfLv /lib/ld-linux-aarch64.so.1 $CURDIR/package/lib/ld.so
-    cp -arfLv /lib/aarch64-linux-gnu/libc.so.6 $CURDIR/package/lib/libc.so.6
-    cp -arfLv /lib/aarch64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
-    cp -arfLv /lib/aarch64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
-    cp -arfLv /lib/aarch64-linux-gnu/libm.so.6 $CURDIR/package/lib/libm.so.6
-    cp -arfLv /lib/aarch64-linux-gnu/libgomp.so.1 $CURDIR/package/lib/libgomp.so.1
-else
-    echo "Error: Could not detect architecture"
-    exit 1
-fi
-
-echo "Packaging completed successfully" 
-ls -liah $CURDIR/package/
-ls -liah $CURDIR/package/lib/
--- a/backend/go/bark-cpp/run.sh
+++ b/backend/go/bark-cpp/run.sh
@@ -1,13 +1,3 @@
 #!/bin/bash
 set -ex
-
-CURDIR=$(dirname "$(realpath $0)")
-export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
-
-# If there is a lib/ld.so, use it
-if [ -f $CURDIR/lib/ld.so ]; then
-	echo "Using lib/ld.so"
-	exec $CURDIR/lib/ld.so $CURDIR/bark-cpp "$@"
-fi
-
-exec $CURDIR/bark-cpp "$@"
+exec ./bark-cpp
--- a/backend/go/huggingface/Makefile
+++ b/backend/go/huggingface/Makefile
@@ -1,9 +0,0 @@
-GOCMD=go
-
-huggingface:
-	CGO_ENABLED=0 $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o huggingface ./
-
-package:
-	bash package.sh
-
-build: huggingface package
--- a/backend/go/huggingface/package.sh
+++ b/backend/go/huggingface/package.sh
@@ -1,12 +0,0 @@
-#!/bin/bash
-
-# Script to copy the appropriate libraries based on architecture
-# This script is used in the final stage of the Dockerfile
-
-set -e
-
-CURDIR=$(dirname "$(realpath $0)")
-
-mkdir -p $CURDIR/package
-cp -avrf $CURDIR/huggingface $CURDIR/package/
-cp -rfv $CURDIR/run.sh $CURDIR/package/
--- a/backend/go/huggingface/run.sh
+++ b/backend/go/huggingface/run.sh
@@ -1,6 +0,0 @@
-#!/bin/bash
-set -ex
-
-CURDIR=$(dirname "$(realpath $0)")
-
-exec $CURDIR/huggingface "$@"
--- a/backend/go/image/stablediffusion-ggml/Makefile
+++ b/backend/go/image/stablediffusion-ggml/Makefile
@@ -4,11 +4,9 @@ LIBRARY_PATH := $(abspath ./)
 AR?=ar
 CMAKE_ARGS?=
 BUILD_TYPE?=
-NATIVE?=false
-CUDA_LIBPATH?=/usr/local/cuda/lib64/
 ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh
 # keep standard at C11 and C++11
-CXXFLAGS = -I. -I$(INCLUDE_PATH)/sources/stablediffusion-ggml.cpp/thirdparty -I$(INCLUDE_PATH)/sources/stablediffusion-ggml.cpp/ggml/include -I$(INCLUDE_PATH)/sources/stablediffusion-ggml.cpp -O3 -DNDEBUG -std=c++17 -fPIC
+CXXFLAGS = -I. -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp/thirdparty -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp/ggml/include -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp -O3 -DNDEBUG -std=c++17 -fPIC

 GOCMD?=go
 CGO_LDFLAGS?=
@@ -17,21 +15,12 @@ CGO_LDFLAGS_SYCL=
 GO_TAGS?=
 LD_FLAGS?=

-# stablediffusion.cpp (ggml)
-STABLEDIFFUSION_GGML_REPO?=https://github.com/richiejp/stable-diffusion.cpp
-STABLEDIFFUSION_GGML_VERSION?=53e3b17eb3d0b5760ced06a1f98320b68b34aaae
-
 # Disable Shared libs as we are linking on static gRPC and we can't mix shared and static
 CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF

-ifeq ($(NATIVE),false)
-	CMAKE_ARGS+=-DGGML_NATIVE=OFF
-endif
-
 # If build type is cublas, then we set -DGGML_CUDA=ON to CMAKE_ARGS automatically
 ifeq ($(BUILD_TYPE),cublas)
-	CMAKE_ARGS+=-DSD_CUDA=ON -DGGML_CUDA=ON
-	CGO_LDFLAGS+=-lcublas -lcudart -L$(CUDA_LIBPATH) -L$(CUDA_LIBPATH)/stubs/ -lcuda
+	CMAKE_ARGS+=-DSD_CUDA=ON
 # If build type is openblas then we set -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
 # to CMAKE_ARGS automatically
 else ifeq ($(BUILD_TYPE),openblas)
@@ -41,17 +30,14 @@ else ifeq ($(BUILD_TYPE),clblas)
 	CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
 # If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++
 else ifeq ($(BUILD_TYPE),hipblas)
-	CMAKE_ARGS+=-DSD_HIPBLAS=ON -DGGML_HIPBLAS=ON
+	CMAKE_ARGS+=-DSD_HIPBLAS=ON
 # If it's OSX, DO NOT embed the metal library - -DGGML_METAL_EMBED_LIBRARY=ON requires further investigation
 # But if it's OSX without metal, disable it here
-else ifeq ($(BUILD_TYPE),vulkan)
-	CMAKE_ARGS+=-DSD_VULKAN=ON -DGGML_VULKAN=ON
-	CGO_LDFLAGS+=-lvulkan
 else ifeq ($(OS),Darwin)
 	ifneq ($(BUILD_TYPE),metal)
-		CMAKE_ARGS+=-DSD_METAL=OFF -DGGML_METAL=OFF
+		CMAKE_ARGS+=-DSD_METAL=OFF
 	else
-		CMAKE_ARGS+=-DSD_METAL=ON -DGGML_METAL=ON
+		CMAKE_ARGS+=-DSD_METAL=ON
 		CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON
 		TARGET+=--target ggml-metal
 	endif
@@ -63,8 +49,8 @@ ifeq ($(BUILD_TYPE),sycl_f16)
 		-DCMAKE_CXX_COMPILER=icpx \
 		-DSD_SYCL=ON \
 		-DGGML_SYCL_F16=ON
-	export CC=icx
-	export CXX=icpx
+	CC=icx
+	CXX=icpx
 	CGO_LDFLAGS_SYCL += -fsycl -L${DNNLROOT}/lib -ldnnl ${MKLROOT}/lib/intel64/libmkl_sycl.a -fiopenmp -fopenmp-targets=spir64 -lOpenCL
 	CGO_LDFLAGS_SYCL += $(shell pkg-config --libs mkl-static-lp64-gomp)
 	CGO_CXXFLAGS += -fiopenmp -fopenmp-targets=spir64
@@ -76,8 +62,8 @@ ifeq ($(BUILD_TYPE),sycl_f32)
 		-DCMAKE_C_COMPILER=icx \
 		-DCMAKE_CXX_COMPILER=icpx \
 		-DSD_SYCL=ON
-	export CC=icx
-	export CXX=icpx
+	CC=icx
+	CXX=icpx
 	CGO_LDFLAGS_SYCL += -fsycl -L${DNNLROOT}/lib -ldnnl ${MKLROOT}/lib/intel64/libmkl_sycl.a -fiopenmp -fopenmp-targets=spir64 -lOpenCL
 	CGO_LDFLAGS_SYCL += $(shell pkg-config --libs mkl-static-lp64-gomp)
 	CGO_CXXFLAGS += -fiopenmp -fopenmp-targets=spir64
@@ -115,12 +101,12 @@ ifneq (,$(findstring sycl,$(BUILD_TYPE)))
 	+bash -c "source $(ONEAPI_VARS); \
 	mkdir -p build && \
 	cd build && \
-	cmake $(CMAKE_ARGS) ../sources/stablediffusion-ggml.cpp && \
+	cmake $(CMAKE_ARGS) ../../../../../sources/stablediffusion-ggml.cpp && \
 	cmake --build . --config Release"
 else
 	mkdir -p build && \
 	cd build && \
-	cmake $(CMAKE_ARGS) ../sources/stablediffusion-ggml.cpp && \
+	cmake $(CMAKE_ARGS) ../../../../../sources/stablediffusion-ggml.cpp && \
 	cmake --build . --config Release
 endif
 	$(MAKE) $(COMBINED_LIB)
@@ -133,26 +119,17 @@ else
 	$(CXX) $(CXXFLAGS) gosd.cpp -o gosd.o -c
 endif

-## stablediffusion (ggml)
-sources/stablediffusion-ggml.cpp:
-	git clone --recursive $(STABLEDIFFUSION_GGML_REPO) sources/stablediffusion-ggml.cpp && \
-	cd sources/stablediffusion-ggml.cpp && \
-	git checkout $(STABLEDIFFUSION_GGML_VERSION) && \
-	git submodule update --init --recursive --depth 1 --single-branch
-
-libsd.a: sources/stablediffusion-ggml.cpp build/libstable-diffusion.a gosd.o
+libsd.a: gosd.o
 	cp $(INCLUDE_PATH)/build/libstable-diffusion.a ./libsd.a
 	$(AR) rcs libsd.a gosd.o

-stablediffusion-ggml: libsd.a
+stablediffusion-ggml:
 	CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_SYCL)" C_INCLUDE_PATH="$(INCLUDE_PATH)" LIBRARY_PATH="$(LIBRARY_PATH)" \
 	CC="$(CC)" CXX="$(CXX)" CGO_CXXFLAGS="$(CGO_CXXFLAGS)" \
-	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o stablediffusion-ggml ./
-
-package:
-	bash package.sh
-
-build: stablediffusion-ggml package
+	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o ../../../../backend-assets/grpc/stablediffusion-ggml ./
+ifneq ($(UPX),)
+	$(UPX) ../../../../backend-assets/grpc/stablediffusion-ggml
+endif

 clean:
 	rm -rf gosd.o libsd.a build $(COMBINED_LIB)
--- a/backend/go/image/stablediffusion-ggml/gosd.cpp
+++ b/backend/go/image/stablediffusion-ggml/gosd.cpp
--- a/backend/go/image/stablediffusion-ggml/gosd.go
+++ b/backend/go/image/stablediffusion-ggml/gosd.go
@@ -1,6 +1,6 @@
 package main

-// #cgo CXXFLAGS: -I${SRCDIR}/sources/stablediffusion-ggml.cpp/thirdparty -I${SRCDIR}/sources/stablediffusion-ggml.cpp -I${SRCDIR}/sources/stablediffusion-ggml.cpp/ggml/include
+// #cgo CXXFLAGS: -I${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp/thirdparty -I${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp -I${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp/ggml/include
 // #cgo LDFLAGS: -L${SRCDIR}/ -lsd -lstdc++ -lm -lggmlall -lgomp
 // #include <gosd.h>
 // #include <stdlib.h>
--- a/backend/go/image/stablediffusion-ggml/gosd.h
+++ b/backend/go/image/stablediffusion-ggml/gosd.h
--- a/backend/go/image/stablediffusion-ggml/main.go
+++ b/backend/go/image/stablediffusion-ggml/main.go
--- a/backend/go/llm/langchain/langchain.go
+++ b/backend/go/llm/langchain/langchain.go
--- a/backend/go/llm/langchain/main.go
+++ b/backend/go/llm/langchain/main.go
--- a/backend/go/local-store/Makefile
+++ b/backend/go/local-store/Makefile
@@ -1,9 +0,0 @@
-GOCMD=go
-
-local-store:
-	CGO_ENABLED=0 $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o local-store ./
-
-package:
-	bash package.sh
-
-build: local-store package
--- a/backend/go/local-store/package.sh
+++ b/backend/go/local-store/package.sh
@@ -1,12 +0,0 @@
-#!/bin/bash
-
-# Script to copy the appropriate libraries based on architecture
-# This script is used in the final stage of the Dockerfile
-
-set -e
-
-CURDIR=$(dirname "$(realpath $0)")
-
-mkdir -p $CURDIR/package
-cp -avrf $CURDIR/local-store $CURDIR/package/
-cp -rfv $CURDIR/run.sh $CURDIR/package/
--- a/backend/go/local-store/run.sh
+++ b/backend/go/local-store/run.sh
@@ -1,6 +0,0 @@
-#!/bin/bash
-set -ex
-
-CURDIR=$(dirname "$(realpath $0)")
-
-exec $CURDIR/local-store "$@"
--- a/backend/go/piper/Makefile
+++ b/backend/go/piper/Makefile
@@ -1,37 +0,0 @@
-
-# go-piper version
-PIPER_REPO?=https://github.com/mudler/go-piper
-PIPER_VERSION?=e10ca041a885d4a8f3871d52924b47792d5e5aa0
-
-CURRENT_DIR=$(abspath ./)
-GOCMD=go
-
-PIPER_CGO_CXXFLAGS+=-I$(CURRENT_DIR)/sources/go-piper/piper/src/cpp -I$(CURRENT_DIR)/sources/go-piper/piper/build/fi/include -I$(CURRENT_DIR)/sources/go-piper/piper/build/pi/include -I$(CURRENT_DIR)/sources/go-piper/piper/build/si/include
-PIPER_CGO_LDFLAGS+=-L$(CURRENT_DIR)/sources/go-piper/piper/build/fi/lib -L$(CURRENT_DIR)/sources/go-piper/piper/build/pi/lib -L$(CURRENT_DIR)/sources/go-piper/piper/build/si/lib -lfmt -lspdlog -lucd
-
-## go-piper
-sources/go-piper:
-	mkdir -p sources/go-piper
-	cd sources/go-piper && \
-	git init && \
-	git remote add origin $(PIPER_REPO) && \
-	git fetch origin && \
-	git checkout $(PIPER_VERSION) && \
-	git submodule update --init --recursive --depth 1 --single-branch
-
-sources/go-piper/libpiper_binding.a: sources/go-piper
-	$(MAKE) -C sources/go-piper libpiper_binding.a example/main piper.o
-
-espeak-ng-data: sources/go-piper sources/go-piper/libpiper_binding.a
-	mkdir -p espeak-ng-data
-	@cp -rf sources/go-piper/piper-phonemize/pi/share/espeak-ng-data/. espeak-ng-data
-
-piper: sources/go-piper sources/go-piper/libpiper_binding.a espeak-ng-data
-	$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURRENT_DIR)/sources/go-piper
-	CGO_CXXFLAGS="$(PIPER_CGO_CXXFLAGS)" CGO_LDFLAGS="$(PIPER_CGO_LDFLAGS)" LIBRARY_PATH=$(CURRENT_DIR)/sources/go-piper \
-	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o piper ./
-
-package:
-	bash package.sh
-
-build: piper package
--- a/backend/go/piper/package.sh
+++ b/backend/go/piper/package.sh
@@ -1,54 +0,0 @@
-#!/bin/bash
-
-# Script to copy the appropriate libraries based on architecture
-# This script is used in the final stage of the Dockerfile
-
-set -e
-
-CURDIR=$(dirname "$(realpath $0)")
-
-# Create lib directory
-mkdir -p $CURDIR/package/lib
-
-cp -avrf $CURDIR/piper $CURDIR/package/
-cp -avrf $CURDIR/espeak-ng-data $CURDIR/package/
-cp -rfv $CURDIR/run.sh $CURDIR/package/
-cp -rfLv $CURDIR/sources/go-piper/piper-phonemize/pi/lib/* $CURDIR/package/lib/
-
-# Detect architecture and copy appropriate libraries
-if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
-    # x86_64 architecture
-    echo "Detected x86_64 architecture, copying x86_64 libraries..."
-    cp -arfLv /lib64/ld-linux-x86-64.so.2 $CURDIR/package/lib/ld.so
-    cp -arfLv /lib/x86_64-linux-gnu/libc.so.6 $CURDIR/package/lib/libc.so.6
-    cp -arfLv /lib/x86_64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
-    cp -arfLv /lib/x86_64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
-    cp -arfLv /lib/x86_64-linux-gnu/libm.so.6 $CURDIR/package/lib/libm.so.6
-    cp -arfLv /lib/x86_64-linux-gnu/libgomp.so.1 $CURDIR/package/lib/libgomp.so.1
-    cp -arfLv /lib/x86_64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
-    cp -arfLv /lib/x86_64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
-    cp -arfLv /lib/x86_64-linux-gnu/libdl.so.2 $CURDIR/package/lib/libdl.so.2
-    cp -arfLv /lib/x86_64-linux-gnu/librt.so.1 $CURDIR/package/lib/librt.so.1
-    cp -arfLv /lib/x86_64-linux-gnu/libpthread.so.0 $CURDIR/package/lib/libpthread.so.0
-elif [ -f "/lib/ld-linux-aarch64.so.1" ]; then
-    # ARM64 architecture
-    echo "Detected ARM64 architecture, copying ARM64 libraries..."
-    cp -arfLv /lib/ld-linux-aarch64.so.1 $CURDIR/package/lib/ld.so
-    cp -arfLv /lib/aarch64-linux-gnu/libc.so.6 $CURDIR/package/lib/libc.so.6
-    cp -arfLv /lib/aarch64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
-    cp -arfLv /lib/aarch64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
-    cp -arfLv /lib/aarch64-linux-gnu/libm.so.6 $CURDIR/package/lib/libm.so.6
-    cp -arfLv /lib/aarch64-linux-gnu/libgomp.so.1 $CURDIR/package/lib/libgomp.so.1
-    cp -arfLv /lib/aarch64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
-    cp -arfLv /lib/aarch64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
-    cp -arfLv /lib/aarch64-linux-gnu/libdl.so.2 $CURDIR/package/lib/libdl.so.2
-    cp -arfLv /lib/aarch64-linux-gnu/librt.so.1 $CURDIR/package/lib/librt.so.1
-    cp -arfLv /lib/aarch64-linux-gnu/libpthread.so.0 $CURDIR/package/lib/libpthread.so.0
-else
-    echo "Error: Could not detect architecture"
-    exit 1
-fi
-
-echo "Packaging completed successfully" 
-ls -liah $CURDIR/package/
-ls -liah $CURDIR/package/lib/
--- a/backend/go/piper/run.sh
+++ b/backend/go/piper/run.sh
@@ -1,15 +0,0 @@
-#!/bin/bash
-set -ex
-
-CURDIR=$(dirname "$(realpath $0)")
-
-export ESPEAK_NG_DATA=$CURDIR/espeak-ng-data
-export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
-
-# If there is a lib/ld.so, use it
-if [ -f $CURDIR/lib/ld.so ]; then
-	echo "Using lib/ld.so"
-	exec $CURDIR/lib/ld.so $CURDIR/piper "$@"
-fi
-
-exec $CURDIR/piper "$@"
--- a/backend/go/silero-vad/Makefile
+++ b/backend/go/silero-vad/Makefile
@@ -1,47 +0,0 @@
-
-CURRENT_DIR=$(abspath ./)
-GOCMD=go
-
-ONNX_VERSION?=1.20.0
-ONNX_ARCH?=x64
-ONNX_OS?=linux
-
-# Detect if we are running on arm64
-ifneq (,$(findstring aarch64,$(shell uname -m)))
-	ONNX_ARCH=aarch64
-endif
-
-ifeq ($(OS),Darwin)
-	ONNX_OS=osx
-	ifneq (,$(findstring aarch64,$(shell uname -m)))
-		ONNX_ARCH=arm64
-	else ifneq (,$(findstring arm64,$(shell uname -m)))
-		ONNX_ARCH=arm64
-	else
-		ONNX_ARCH=x86_64
-	endif
-endif
-
-sources/onnxruntime:
-	mkdir -p sources/onnxruntime
-	curl -L https://github.com/microsoft/onnxruntime/releases/download/v$(ONNX_VERSION)/onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz -o sources/onnxruntime/onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz
-	cd sources/onnxruntime && tar -xvf onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz && rm onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz
-	cd sources/onnxruntime && mv onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION)/* ./
-
-backend-assets/lib/libonnxruntime.so.1: sources/onnxruntime
-	mkdir -p backend-assets/lib
-	cp -rfLv sources/onnxruntime/lib/* backend-assets/lib/
-ifeq ($(OS),Darwin)
-	mv backend-assets/lib/libonnxruntime.$(ONNX_VERSION).dylib backend-assets/lib/libonnxruntime.dylib
-else
-	mv backend-assets/lib/libonnxruntime.so.$(ONNX_VERSION) backend-assets/lib/libonnxruntime.so.1
-endif
-
-silero-vad: backend-assets/lib/libonnxruntime.so.1
-	CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURRENT_DIR)/sources/onnxruntime/include/" LIBRARY_PATH=$(CURRENT_DIR)/backend-assets/lib \
-	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o silero-vad ./
-
-package:
-	bash package.sh
-
-build: silero-vad package
--- a/backend/go/silero-vad/package.sh
+++ b/backend/go/silero-vad/package.sh
@@ -1,53 +0,0 @@
-#!/bin/bash
-
-# Script to copy the appropriate libraries based on architecture
-# This script is used in the final stage of the Dockerfile
-
-set -e
-
-CURDIR=$(dirname "$(realpath $0)")
-
-# Create lib directory
-mkdir -p $CURDIR/package/lib
-
-cp -avrf $CURDIR/silero-vad $CURDIR/package/
-cp -avrf $CURDIR/run.sh $CURDIR/package/
-cp -rfLv $CURDIR/backend-assets/lib/* $CURDIR/package/lib/
-
-# Detect architecture and copy appropriate libraries
-if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
-    # x86_64 architecture
-    echo "Detected x86_64 architecture, copying x86_64 libraries..."
-    cp -arfLv /lib64/ld-linux-x86-64.so.2 $CURDIR/package/lib/ld.so
-    cp -arfLv /lib/x86_64-linux-gnu/libc.so.6 $CURDIR/package/lib/libc.so.6
-    cp -arfLv /lib/x86_64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
-    cp -arfLv /lib/x86_64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
-    cp -arfLv /lib/x86_64-linux-gnu/libm.so.6 $CURDIR/package/lib/libm.so.6
-    cp -arfLv /lib/x86_64-linux-gnu/libgomp.so.1 $CURDIR/package/lib/libgomp.so.1
-    cp -arfLv /lib/x86_64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
-    cp -arfLv /lib/x86_64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
-    cp -arfLv /lib/x86_64-linux-gnu/libdl.so.2 $CURDIR/package/lib/libdl.so.2
-    cp -arfLv /lib/x86_64-linux-gnu/librt.so.1 $CURDIR/package/lib/librt.so.1
-    cp -arfLv /lib/x86_64-linux-gnu/libpthread.so.0 $CURDIR/package/lib/libpthread.so.0
-elif [ -f "/lib/ld-linux-aarch64.so.1" ]; then
-    # ARM64 architecture
-    echo "Detected ARM64 architecture, copying ARM64 libraries..."
-    cp -arfLv /lib/ld-linux-aarch64.so.1 $CURDIR/package/lib/ld.so
-    cp -arfLv /lib/aarch64-linux-gnu/libc.so.6 $CURDIR/package/lib/libc.so.6
-    cp -arfLv /lib/aarch64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
-    cp -arfLv /lib/aarch64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
-    cp -arfLv /lib/aarch64-linux-gnu/libm.so.6 $CURDIR/package/lib/libm.so.6
-    cp -arfLv /lib/aarch64-linux-gnu/libgomp.so.1 $CURDIR/package/lib/libgomp.so.1
-    cp -arfLv /lib/aarch64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
-    cp -arfLv /lib/aarch64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
-    cp -arfLv /lib/aarch64-linux-gnu/libdl.so.2 $CURDIR/package/lib/libdl.so.2
-    cp -arfLv /lib/aarch64-linux-gnu/librt.so.1 $CURDIR/package/lib/librt.so.1
-    cp -arfLv /lib/aarch64-linux-gnu/libpthread.so.0 $CURDIR/package/lib/libpthread.so.0
-else
-    echo "Error: Could not detect architecture"
-    exit 1
-fi
-
-echo "Packaging completed successfully" 
-ls -liah $CURDIR/package/
-ls -liah $CURDIR/package/lib/
--- a/backend/go/silero-vad/run.sh
+++ b/backend/go/silero-vad/run.sh
@@ -1,14 +0,0 @@
-#!/bin/bash
-set -ex
-
-CURDIR=$(dirname "$(realpath $0)")
-
-export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
-
-# If there is a lib/ld.so, use it
-if [ -f $CURDIR/lib/ld.so ]; then
-	echo "Using lib/ld.so"
-	exec $CURDIR/lib/ld.so $CURDIR/silero-vad "$@"
-fi
-
-exec $CURDIR/silero-vad "$@"
--- a/backend/go/stablediffusion-ggml/package.sh
+++ b/backend/go/stablediffusion-ggml/package.sh
@@ -1,52 +0,0 @@
-#!/bin/bash
-
-# Script to copy the appropriate libraries based on architecture
-# This script is used in the final stage of the Dockerfile
-
-set -e
-
-CURDIR=$(dirname "$(realpath $0)")
-
-# Create lib directory
-mkdir -p $CURDIR/package/lib
-
-cp -avrf $CURDIR/stablediffusion-ggml $CURDIR/package/
-cp -rfv $CURDIR/run.sh $CURDIR/package/
-
-# Detect architecture and copy appropriate libraries
-if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
-    # x86_64 architecture
-    echo "Detected x86_64 architecture, copying x86_64 libraries..."
-    cp -arfLv /lib64/ld-linux-x86-64.so.2 $CURDIR/package/lib/ld.so
-    cp -arfLv /lib/x86_64-linux-gnu/libc.so.6 $CURDIR/package/lib/libc.so.6
-    cp -arfLv /lib/x86_64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
-    cp -arfLv /lib/x86_64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
-    cp -arfLv /lib/x86_64-linux-gnu/libm.so.6 $CURDIR/package/lib/libm.so.6
-    cp -arfLv /lib/x86_64-linux-gnu/libgomp.so.1 $CURDIR/package/lib/libgomp.so.1
-    cp -arfLv /lib/x86_64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
-    cp -arfLv /lib/x86_64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
-    cp -arfLv /lib/x86_64-linux-gnu/libdl.so.2 $CURDIR/package/lib/libdl.so.2
-    cp -arfLv /lib/x86_64-linux-gnu/librt.so.1 $CURDIR/package/lib/librt.so.1
-    cp -arfLv /lib/x86_64-linux-gnu/libpthread.so.0 $CURDIR/package/lib/libpthread.so.0
-elif [ -f "/lib/ld-linux-aarch64.so.1" ]; then
-    # ARM64 architecture
-    echo "Detected ARM64 architecture, copying ARM64 libraries..."
-    cp -arfLv /lib/ld-linux-aarch64.so.1 $CURDIR/package/lib/ld.so
-    cp -arfLv /lib/aarch64-linux-gnu/libc.so.6 $CURDIR/package/lib/libc.so.6
-    cp -arfLv /lib/aarch64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
-    cp -arfLv /lib/aarch64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
-    cp -arfLv /lib/aarch64-linux-gnu/libm.so.6 $CURDIR/package/lib/libm.so.6
-    cp -arfLv /lib/aarch64-linux-gnu/libgomp.so.1 $CURDIR/package/lib/libgomp.so.1
-    cp -arfLv /lib/aarch64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
-    cp -arfLv /lib/aarch64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
-    cp -arfLv /lib/aarch64-linux-gnu/libdl.so.2 $CURDIR/package/lib/libdl.so.2
-    cp -arfLv /lib/aarch64-linux-gnu/librt.so.1 $CURDIR/package/lib/librt.so.1
-    cp -arfLv /lib/aarch64-linux-gnu/libpthread.so.0 $CURDIR/package/lib/libpthread.so.0
-else
-    echo "Error: Could not detect architecture"
-    exit 1
-fi
-
-echo "Packaging completed successfully" 
-ls -liah $CURDIR/package/
-ls -liah $CURDIR/package/lib/
--- a/backend/go/stablediffusion-ggml/run.sh
+++ b/backend/go/stablediffusion-ggml/run.sh
@@ -1,14 +0,0 @@
-#!/bin/bash
-set -ex
-
-CURDIR=$(dirname "$(realpath $0)")
-
-export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
-
-# If there is a lib/ld.so, use it
-if [ -f $CURDIR/lib/ld.so ]; then
-	echo "Using lib/ld.so"
-	exec $CURDIR/lib/ld.so $CURDIR/stablediffusion-ggml "$@"
-fi
-
-exec $CURDIR/stablediffusion-ggml "$@"
--- a/backend/go/local-store/debug.go
+++ b/backend/go/local-store/debug.go
--- a/backend/go/local-store/main.go
+++ b/backend/go/local-store/main.go
--- a/backend/go/local-store/production.go
+++ b/backend/go/local-store/production.go
--- a/backend/go/local-store/store.go
+++ b/backend/go/local-store/store.go
@@ -4,7 +4,6 @@ package main
 // It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
 import (
 	"container/heap"
-	"errors"
 	"fmt"
 	"math"
 	"slices"
@@ -100,9 +99,6 @@ func sortIntoKeySlicese(keys []*pb.StoresKey) [][]float32 {
 }

 func (s *Store) Load(opts *pb.ModelOptions) error {
-	if opts.Model != "" {
-		return errors.New("not implemented")
-	}
 	return nil
 }

@@ -319,7 +315,7 @@ func isNormalized(k []float32) bool {

 	for _, v := range k {
 		v64 := float64(v)
-		sum += v64 * v64
+		sum += v64*v64
 	}

 	s := math.Sqrt(sum)
--- a/backend/go/transcribe/whisper/main.go
+++ b/backend/go/transcribe/whisper/main.go
--- a/backend/go/transcribe/whisper/whisper.go
+++ b/backend/go/transcribe/whisper/whisper.go
--- a/backend/go/piper/main.go
+++ b/backend/go/piper/main.go
--- a/backend/go/piper/piper.go
+++ b/backend/go/piper/piper.go
@@ -23,7 +23,7 @@ func (sd *Piper) Load(opts *pb.ModelOptions) error {
 	}
 	var err error
 	// Note: the Model here is a path to a directory containing the model files
-	sd.piper, err = New(os.Getenv("ESPEAK_NG_DATA"))
+	sd.piper, err = New(opts.LibrarySearchPath)
 	return err
 }

--- a/backend/go/vad/silero/main.go
+++ b/backend/go/vad/silero/main.go
--- a/backend/go/vad/silero/vad.go
+++ b/backend/go/vad/silero/vad.go
--- a/backend/go/whisper/Makefile
+++ b/backend/go/whisper/Makefile
@@ -1,131 +0,0 @@
-GOCMD=go
-NATIVE?=false
-
-BUILD_TYPE?=
-CMAKE_ARGS?=
-
-# whisper.cpp version
-WHISPER_REPO?=https://github.com/ggml-org/whisper.cpp
-WHISPER_CPP_VERSION?=1f5cf0b2888402d57bb17b2029b2caa97e5f3baf
-
-export WHISPER_CMAKE_ARGS?=-DBUILD_SHARED_LIBS=OFF
-export WHISPER_DIR=$(abspath ./sources/whisper.cpp)
-export WHISPER_INCLUDE_PATH=$(WHISPER_DIR)/include:$(WHISPER_DIR)/ggml/include
-export WHISPER_LIBRARY_PATH=$(WHISPER_DIR)/build/src/:$(WHISPER_DIR)/build/ggml/src
-
-CGO_LDFLAGS_WHISPER?=
-CGO_LDFLAGS_WHISPER+=-lggml
-CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF -DLLAMA_CURL=OFF
-CUDA_LIBPATH?=/usr/local/cuda/lib64/
-
-ONEAPI_VERSION?=2025.2
-
-# IF native is false, we add -DGGML_NATIVE=OFF to CMAKE_ARGS
-ifeq ($(NATIVE),false)
-	CMAKE_ARGS+=-DGGML_NATIVE=OFF
-	WHISPER_CMAKE_ARGS+=-DGGML_NATIVE=OFF
-endif
-CURRENT_MAKEFILE_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
-ifeq ($(NATIVE),false)
-	CMAKE_ARGS+=-DGGML_NATIVE=OFF
-endif
-# If build type is cublas, then we set -DGGML_CUDA=ON to CMAKE_ARGS automatically
-ifeq ($(BUILD_TYPE),cublas)
-	CGO_LDFLAGS+=-lcublas -lcudart -L$(CUDA_LIBPATH) -L$(CUDA_LIBPATH)/stubs/ -lcuda
-	CMAKE_ARGS+=-DGGML_CUDA=ON
-	CGO_LDFLAGS_WHISPER+=-lcufft -lggml-cuda
-	export WHISPER_LIBRARY_PATH:=$(WHISPER_LIBRARY_PATH):$(WHISPER_DIR)/build/ggml/src/ggml-cuda/
-# If build type is openblas then we set -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
-# to CMAKE_ARGS automatically
-else ifeq ($(BUILD_TYPE),openblas)
-	CMAKE_ARGS+=-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
-# If build type is clblas (openCL) we set -DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
-else ifeq ($(BUILD_TYPE),clblas)
-	CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
-# If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++
-else ifeq ($(BUILD_TYPE),hipblas)
-	ROCM_HOME ?= /opt/rocm
-	ROCM_PATH ?= /opt/rocm
-	LD_LIBRARY_PATH ?= /opt/rocm/lib:/opt/rocm/llvm/lib
-	export STABLE_BUILD_TYPE=
-	export CXX=$(ROCM_HOME)/llvm/bin/clang++
-	export CC=$(ROCM_HOME)/llvm/bin/clang
-#	GPU_TARGETS ?= gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102
-#	AMDGPU_TARGETS ?= "$(GPU_TARGETS)"
-	CMAKE_ARGS+=-DGGML_HIP=ON
-	CGO_LDFLAGS += -O3 --rtlib=compiler-rt -unwindlib=libgcc -lhipblas -lrocblas --hip-link -L${ROCM_HOME}/lib/llvm/lib -L$(CURRENT_MAKEFILE_DIR)/sources/whisper.cpp/build/ggml/src/ggml-hip/ -lggml-hip
-#	CMAKE_ARGS+=-DGGML_HIP=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
-else ifeq ($(BUILD_TYPE),vulkan)
-	CMAKE_ARGS+=-DGGML_VULKAN=1
-	CGO_LDFLAGS_WHISPER+=-lggml-vulkan -lvulkan
-	export WHISPER_LIBRARY_PATH:=$(WHISPER_LIBRARY_PATH):$(WHISPER_DIR)/build/ggml/src/ggml-vulkan/
-else ifeq ($(OS),Darwin)
-	ifeq ($(BUILD_TYPE),)
-		BUILD_TYPE=metal
-	endif
-	ifneq ($(BUILD_TYPE),metal)
-		CMAKE_ARGS+=-DGGML_METAL=OFF
-		CGO_LDFLAGS_WHISPER+=-lggml-blas
-		export WHISPER_LIBRARY_PATH:=$(WHISPER_LIBRARY_PATH):$(WHISPER_DIR)/build/ggml/src/ggml-blas
-	else
-		CMAKE_ARGS+=-DGGML_METAL=ON
-		CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON
-		CMAKE_ARGS+=-DGGML_METAL_USE_BF16=ON
-		CMAKE_ARGS+=-DGGML_OPENMP=OFF
-		CMAKE_ARGS+=-DWHISPER_BUILD_EXAMPLES=OFF
-		CMAKE_ARGS+=-DWHISPER_BUILD_TESTS=OFF
-		CMAKE_ARGS+=-DWHISPER_BUILD_SERVER=OFF
-		CGO_LDFLAGS += -framework Accelerate
-		CGO_LDFLAGS_WHISPER+=-lggml-metal -lggml-blas
-		export WHISPER_LIBRARY_PATH:=$(WHISPER_LIBRARY_PATH):$(WHISPER_DIR)/build/ggml/src/ggml-metal/:$(WHISPER_DIR)/build/ggml/src/ggml-blas
-	endif
-	TARGET+=--target ggml-metal
-endif
-
-ifneq (,$(findstring sycl,$(BUILD_TYPE)))
-	export CC=icx
-	export CXX=icpx
-	CGO_LDFLAGS_WHISPER += -fsycl -L${DNNLROOT}/lib -rpath ${ONEAPI_ROOT}/${ONEAPI_VERSION}/lib -ldnnl ${MKLROOT}/lib/intel64/libmkl_sycl.a -fiopenmp -fopenmp-targets=spir64 -lOpenCL -lggml-sycl
-	CGO_LDFLAGS_WHISPER += $(shell pkg-config --libs mkl-static-lp64-gomp)
-	CGO_CXXFLAGS_WHISPER += -fiopenmp -fopenmp-targets=spir64
-	CGO_CXXFLAGS_WHISPER += $(shell pkg-config --cflags mkl-static-lp64-gomp )
-	export WHISPER_LIBRARY_PATH:=$(WHISPER_LIBRARY_PATH):$(WHISPER_DIR)/build/ggml/src/ggml-sycl/
-	CMAKE_ARGS+=-DGGML_SYCL=ON \
-		-DCMAKE_C_COMPILER=icx \
-		-DCMAKE_CXX_COMPILER=icpx \
-		-DCMAKE_CXX_FLAGS="-fsycl"
-endif
-
-ifeq ($(BUILD_TYPE),sycl_f16)
-	CMAKE_ARGS+=-DGGML_SYCL_F16=ON
-endif
-
-ifneq ($(OS),Darwin)
-	CGO_LDFLAGS_WHISPER+=-lgomp
-endif
-
-## whisper
-sources/whisper.cpp:
-	mkdir -p sources/whisper.cpp
-	cd sources/whisper.cpp && \
-	git init && \
-	git remote add origin $(WHISPER_REPO) && \
-	git fetch origin && \
-	git checkout $(WHISPER_CPP_VERSION) && \
-	git submodule update --init --recursive --depth 1 --single-branch
-
-sources/whisper.cpp/build/src/libwhisper.a: sources/whisper.cpp
-	cd sources/whisper.cpp && cmake $(CMAKE_ARGS) $(WHISPER_CMAKE_ARGS) . -B ./build
-	cd sources/whisper.cpp/build && cmake --build . --config Release
-
-whisper: sources/whisper.cpp sources/whisper.cpp/build/src/libwhisper.a
-	$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
-	$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go
-	CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_WHISPER)" C_INCLUDE_PATH="${WHISPER_INCLUDE_PATH}" LIBRARY_PATH="${WHISPER_LIBRARY_PATH}" LD_LIBRARY_PATH="${WHISPER_LIBRARY_PATH}" \
-	CGO_CXXFLAGS="$(CGO_CXXFLAGS_WHISPER)" \
-	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o whisper ./
-
-package:
-	bash package.sh
-
-build: whisper package
--- a/backend/go/whisper/package.sh
+++ b/backend/go/whisper/package.sh
@@ -1,52 +0,0 @@
-#!/bin/bash
-
-# Script to copy the appropriate libraries based on architecture
-# This script is used in the final stage of the Dockerfile
-
-set -e
-
-CURDIR=$(dirname "$(realpath $0)")
-
-# Create lib directory
-mkdir -p $CURDIR/package/lib
-
-cp -avrf $CURDIR/whisper $CURDIR/package/
-cp -rfv $CURDIR/run.sh $CURDIR/package/
-
-# Detect architecture and copy appropriate libraries
-if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
-    # x86_64 architecture
-    echo "Detected x86_64 architecture, copying x86_64 libraries..."
-    cp -arfLv /lib64/ld-linux-x86-64.so.2 $CURDIR/package/lib/ld.so
-    cp -arfLv /lib/x86_64-linux-gnu/libc.so.6 $CURDIR/package/lib/libc.so.6
-    cp -arfLv /lib/x86_64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
-    cp -arfLv /lib/x86_64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
-    cp -arfLv /lib/x86_64-linux-gnu/libm.so.6 $CURDIR/package/lib/libm.so.6
-    cp -arfLv /lib/x86_64-linux-gnu/libgomp.so.1 $CURDIR/package/lib/libgomp.so.1
-    cp -arfLv /lib/x86_64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
-    cp -arfLv /lib/x86_64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
-    cp -arfLv /lib/x86_64-linux-gnu/libdl.so.2 $CURDIR/package/lib/libdl.so.2
-    cp -arfLv /lib/x86_64-linux-gnu/librt.so.1 $CURDIR/package/lib/librt.so.1
-    cp -arfLv /lib/x86_64-linux-gnu/libpthread.so.0 $CURDIR/package/lib/libpthread.so.0
-elif [ -f "/lib/ld-linux-aarch64.so.1" ]; then
-    # ARM64 architecture
-    echo "Detected ARM64 architecture, copying ARM64 libraries..."
-    cp -arfLv /lib/ld-linux-aarch64.so.1 $CURDIR/package/lib/ld.so
-    cp -arfLv /lib/aarch64-linux-gnu/libc.so.6 $CURDIR/package/lib/libc.so.6
-    cp -arfLv /lib/aarch64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
-    cp -arfLv /lib/aarch64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
-    cp -arfLv /lib/aarch64-linux-gnu/libm.so.6 $CURDIR/package/lib/libm.so.6
-    cp -arfLv /lib/aarch64-linux-gnu/libgomp.so.1 $CURDIR/package/lib/libgomp.so.1
-    cp -arfLv /lib/aarch64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
-    cp -arfLv /lib/aarch64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
-    cp -arfLv /lib/aarch64-linux-gnu/libdl.so.2 $CURDIR/package/lib/libdl.so.2
-    cp -arfLv /lib/aarch64-linux-gnu/librt.so.1 $CURDIR/package/lib/librt.so.1
-    cp -arfLv /lib/aarch64-linux-gnu/libpthread.so.0 $CURDIR/package/lib/libpthread.so.0
-else
-    echo "Error: Could not detect architecture"
-    exit 1
-fi
-
-echo "Packaging completed successfully" 
-ls -liah $CURDIR/package/
-ls -liah $CURDIR/package/lib/
--- a/backend/go/whisper/run.sh
+++ b/backend/go/whisper/run.sh
@@ -1,14 +0,0 @@
-#!/bin/bash
-set -ex
-
-CURDIR=$(dirname "$(realpath $0)")
-
-export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
-
-# If there is a lib/ld.so, use it
-if [ -f $CURDIR/lib/ld.so ]; then
-	echo "Using lib/ld.so"
-	exec $CURDIR/lib/ld.so $CURDIR/whisper "$@"
-fi
-
-exec $CURDIR/whisper "$@"
--- a/backend/index.yaml
+++ b/backend/index.yaml
@@ -1,78 +1,5 @@
 ---
-## metas
- &llamacpp
-  name: "llama-cpp"
-  alias: "llama-cpp"
-  license: mit
-  icon: https://user-images.githubusercontent.com/1991296/230134379-7181e485-c521-4d23-a0d6-f7b3b61ba524.png
-  description: |
-    LLM inference in C/C++
-  urls:
-    - https://github.com/ggerganov/llama.cpp
-  tags:
-    - text-to-text
-    - LLM
-    - CPU
-    - GPU
-    - Metal
-    - CUDA
-    - HIP
-  capabilities:
-    default: "cpu-llama-cpp"
-    nvidia: "cuda12-llama-cpp"
-    intel: "intel-sycl-f16-llama-cpp"
-    amd: "rocm-llama-cpp"
-    metal: "metal-llama-cpp"
-    vulkan: "vulkan-llama-cpp"
-    nvidia-l4t: "nvidia-l4t-arm64-llama-cpp"
-    darwin-x86: "darwin-x86-llama-cpp"
- &whispercpp
-  name: "whisper"
-  alias: "whisper"
-  license: mit
-  icon: https://user-images.githubusercontent.com/1991296/235238348-05d0f6a4-da44-4900-a1de-d0707e75b763.jpeg
-  description: |
-    Port of OpenAI's Whisper model in C/C++
-  urls:
-    - https://github.com/ggml-org/whisper.cpp
-  tags:
-    - audio-transcription
-    - CPU
-    - GPU
-    - CUDA
-    - HIP
-  capabilities:
-    default: "cpu-whisper"
-    nvidia: "cuda12-whisper"
-    intel: "intel-sycl-f16-whisper"
-    amd: "rocm-whisper"
-    vulkan: "vulkan-whisper"
-    nvidia-l4t: "nvidia-l4t-arm64-whisper"
- &stablediffusionggml
-  name: "stablediffusion-ggml"
-  alias: "stablediffusion-ggml"
-  license: mit
-  icon: https://github.com/leejet/stable-diffusion.cpp/raw/master/assets/cat_with_sd_cpp_42.png
-  description: |
-     Stable Diffusion and Flux in pure C/C++
-  urls:
-    - https://github.com/leejet/stable-diffusion.cpp
-  tags:
-    - image-generation
-    - CPU
-    - GPU
-    - Metal
-    - CUDA
-    - HIP
-  capabilities:
-    default: "cpu-stablediffusion-ggml"
-    nvidia: "cuda12-stablediffusion-ggml"
-    intel: "intel-sycl-f16-stablediffusion-ggml"
-    # amd: "rocm-stablediffusion-ggml"
-    vulkan: "vulkan-stablediffusion-ggml"
-    nvidia-l4t: "nvidia-l4t-arm64-stablediffusion-ggml"
-    # metal: "metal-stablediffusion-ggml"
-    # darwin-x86: "darwin-x86-stablediffusion-ggml"
+## vLLM
 - &vllm
  name: "vllm"
  license: apache-2.0
@@ -105,404 +32,15 @@
    nvidia: "cuda12-vllm"
    amd: "rocm-vllm"
    intel: "intel-sycl-f16-vllm"
- &rerankers
-  name: "rerankers"
-  alias: "rerankers"
-  capabilities:
-    nvidia: "cuda12-rerankers"
-    intel: "intel-sycl-f16-rerankers"
-    amd: "rocm-rerankers"
- &transformers
-  name: "transformers"
-  icon: https://camo.githubusercontent.com/26569a27b8a30a488dd345024b71dbc05da7ff1b2ba97bb6080c9f1ee0f26cc7/68747470733a2f2f68756767696e67666163652e636f2f64617461736574732f68756767696e67666163652f646f63756d656e746174696f6e2d696d616765732f7265736f6c76652f6d61696e2f7472616e73666f726d6572732f7472616e73666f726d6572735f61735f615f6d6f64656c5f646566696e6974696f6e2e706e67
-  alias: "transformers"
-  license: apache-2.0
-  description: |
-    Transformers acts as the model-definition framework for state-of-the-art machine learning models in text, computer vision, audio, video, and multimodal model, for both inference and training.
-    It centralizes the model definition so that this definition is agreed upon across the ecosystem. transformers is the pivot across frameworks: if a model definition is supported, it will be compatible with the majority of training frameworks (Axolotl, Unsloth, DeepSpeed, FSDP, PyTorch-Lightning, ...), inference engines (vLLM, SGLang, TGI, ...), and adjacent modeling libraries (llama.cpp, mlx, ...) which leverage the model definition from transformers.
-  urls:
-    - https://github.com/huggingface/transformers
-  tags:
-    - text-to-text
-    - multimodal
-  capabilities:
-    nvidia: "cuda12-transformers"
-    intel: "intel-sycl-f16-transformers"
-    amd: "rocm-transformers"
- &diffusers
-  name: "diffusers"
-  icon: https://raw.githubusercontent.com/huggingface/diffusers/main/docs/source/en/imgs/diffusers_library.jpg
-  description: |
-    🤗 Diffusers is the go-to library for state-of-the-art pretrained diffusion models for generating images, audio, and even 3D structures of molecules. Whether you're looking for a simple inference solution or training your own diffusion models, 🤗 Diffusers is a modular toolbox that supports both.
-  urls:
-    - https://github.com/huggingface/diffusers
-  tags:
-    - image-generation
-    - video-generation
-    - diffusion-models
-  license: apache-2.0
-  alias: "diffusers"
-  capabilities:
-    nvidia: "cuda12-diffusers"
-    intel: "intel-sycl-f32-diffusers"
-    amd: "rocm-diffusers"
- &exllama2
-  name: "exllama2"
-  urls:
-    - https://github.com/turboderp-org/exllamav2
-  tags:
-    - text-to-text
-    - LLM
-    - EXL2
-  license: MIT
-  description: |
-    ExLlamaV2 is an inference library for running local LLMs on modern consumer GPUs.
-  alias: "exllama2"
-  capabilities:
-    nvidia: "cuda12-exllama2"
-    intel: "intel-sycl-f32-exllama2"
-    amd: "rocm-exllama2"
- &faster-whisper
-  icon: https://avatars.githubusercontent.com/u/1520500?s=200&v=4
-  description: |
-    faster-whisper is a reimplementation of OpenAI's Whisper model using CTranslate2, which is a fast inference engine for Transformer models.
-    This implementation is up to 4 times faster than openai/whisper for the same accuracy while using less memory. The efficiency can be further improved with 8-bit quantization on both CPU and GPU.
-  urls:
-    - https://github.com/SYSTRAN/faster-whisper
-  tags:
-    - speech-to-text
-    - Whisper
-  license: MIT
-  name: "faster-whisper"
-  capabilities:
-    nvidia: "cuda12-faster-whisper"
-    intel: "intel-sycl-f32-faster-whisper"
-    amd: "rocm-faster-whisper"
- &kokoro
-  icon: https://avatars.githubusercontent.com/u/166769057?v=4
-  description: |
-    Kokoro is an open-weight TTS model with 82 million parameters. Despite its lightweight architecture, it delivers comparable quality to larger models while being significantly faster and more cost-efficient. With Apache-licensed weights, Kokoro can be deployed anywhere from production environments to personal projects.
-  urls:
-    - https://huggingface.co/hexgrad/Kokoro-82M
-    - https://github.com/hexgrad/kokoro
-  tags:
-    - text-to-speech
-    - TTS
-    - LLM
-  license: apache-2.0
-  alias: "kokoro"
-  name: "kokoro"
-  capabilities:
-    nvidia: "cuda12-kokoro"
-    intel: "intel-sycl-f32-kokoro"
-    amd: "rocm-kokoro"
- &coqui
-  urls:
-    - https://github.com/idiap/coqui-ai-TTS
-  description: |
-    🐸 Coqui TTS is a library for advanced Text-to-Speech generation.
-
-    🚀 Pretrained models in +1100 languages.
-
-    🛠️ Tools for training new models and fine-tuning existing models in any language.
-
-    📚 Utilities for dataset analysis and curation.
-  tags:
-    - text-to-speech
-    - TTS
-  license: mpl-2.0
-  name: "coqui"
-  alias: "coqui"
-  capabilities:
-    nvidia: "cuda12-coqui"
-    intel: "intel-sycl-f32-coqui"
-    amd: "rocm-coqui"
-  icon: https://avatars.githubusercontent.com/u/1338804?s=200&v=4
- &bark
-  urls:
-    - https://github.com/suno-ai/bark
-  description: |
-    Bark is a transformer-based text-to-audio model created by Suno. Bark can generate highly realistic, multilingual speech as well as other audio - including music, background noise and simple sound effects. The model can also produce nonverbal communications like laughing, sighing and crying. To support the research community, we are providing access to pretrained model checkpoints, which are ready for inference and available for commercial use.
-  tags:
-    - text-to-speech
-    - TTS
-  license: MIT
-  name: "bark"
-  alias: "bark"
-  capabilities:
-    cuda: "cuda12-bark"
-    intel: "intel-sycl-f32-bark"
-    rocm: "rocm-bark"
-  icon: https://avatars.githubusercontent.com/u/99442120?s=200&v=4
- &barkcpp
-  urls:
-    - https://github.com/PABannier/bark.cpp
-  description: |
-      With bark.cpp, our goal is to bring real-time realistic multilingual text-to-speech generation to the community.
-
-      Plain C/C++ implementation without dependencies
-      AVX, AVX2 and AVX512 for x86 architectures
-      CPU and GPU compatible backends
-      Mixed F16 / F32 precision
-      4-bit, 5-bit and 8-bit integer quantization
-      Metal and CUDA backends
-
-      Models supported
-
-          Bark Small
-          Bark Large
-  tags:
-    - text-to-speech
-    - TTS
-  license: MIT
-  icon: https://github.com/PABannier/bark.cpp/raw/main/assets/banner.png
-  name: "bark-cpp"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-bark-cpp"
-  alias: "bark-cpp"
- &chatterbox
-  urls:
-    - https://github.com/resemble-ai/chatterbox
-  description: |
-    Resemble AI's first production-grade open source TTS model. Licensed under MIT, Chatterbox has been benchmarked against leading closed-source systems like ElevenLabs, and is consistently preferred in side-by-side evaluations.
-    Whether you're working on memes, videos, games, or AI agents, Chatterbox brings your content to life. It's also the first open source TTS model to support emotion exaggeration control, a powerful feature that makes your voices stand out.
-  tags:
-    - text-to-speech
-    - TTS
-  license: MIT
-  icon: https://private-user-images.githubusercontent.com/660224/448166653-bd8c5f03-e91d-4ee5-b680-57355da204d1.png?jwt=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbSIsImtleSI6ImtleTUiLCJleHAiOjE3NTAxOTE0MDAsIm5iZiI6MTc1MDE5MTEwMCwicGF0aCI6Ii82NjAyMjQvNDQ4MTY2NjUzLWJkOGM1ZjAzLWU5MWQtNGVlNS1iNjgwLTU3MzU1ZGEyMDRkMS5wbmc_WC1BbXotQWxnb3JpdGhtPUFXUzQtSE1BQy1TSEEyNTYmWC1BbXotQ3JlZGVudGlhbD1BS0lBVkNPRFlMU0E1M1BRSzRaQSUyRjIwMjUwNjE3JTJGdXMtZWFzdC0xJTJGczMlMkZhd3M0X3JlcXVlc3QmWC1BbXotRGF0ZT0yMDI1MDYxN1QyMDExNDBaJlgtQW16LUV4cGlyZXM9MzAwJlgtQW16LVNpZ25hdHVyZT1hMmI1NGY3OGFiZTlhNGFkNTVlYTY4NTIwMWEzODRiZGE4YzdhNGQ5MGNhNzE3MDYyYTA2NDIxYTkyYzhiODkwJlgtQW16LVNpZ25lZEhlYWRlcnM9aG9zdCJ9.mR9kM9xX0TdzPuSpuspCllHYQiq79dFQ2rtuNvjrl6w
-  name: "chatterbox"
-  capabilities:
-    nvidia: "cuda12-chatterbox"
- &piper
-  name: "piper"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-piper"
-  icon: https://github.com/OHF-Voice/piper1-gpl/raw/main/etc/logo.png
-  urls:
-    - https://github.com/rhasspy/piper
-    - https://github.com/mudler/go-piper
-  license: MIT
-  description: |
-     A fast, local neural text to speech system
-  tags:
-    - text-to-speech
-    - TTS
- &silero-vad
-  name: "silero-vad"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-silero-vad"
-  icon: https://user-images.githubusercontent.com/12515440/89997349-b3523080-dc94-11ea-9906-ca2e8bc50535.png
-  urls:
-    - https://github.com/snakers4/silero-vad
-  description: |
-    Silero VAD: pre-trained enterprise-grade Voice Activity Detector.
-    Silero VAD is a voice activity detection model that can be used to detect whether a given audio contains speech or not.
-  tags:
-    - voice-activity-detection
-    - VAD
-    - silero-vad
-    - CPU
- &local-store
-  name: "local-store"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-local-store"
-  urls:
-    - https://github.com/mudler/LocalAI
-  description: |
-    Local Store is a local-first, self-hosted, and open-source vector database.
-  tags:
-    - vector-database
-    - local-first
-    - open-source
-    - CPU
-  license: MIT
- &huggingface
-  name: "huggingface"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-huggingface"
-  icon: https://huggingface.co/front/assets/huggingface_logo-noborder.svg
-  urls:
-    - https://huggingface.co/docs/hub/en/api
-  description: |
-    HuggingFace is a backend which uses the huggingface API to run models.
-  tags:
-    - LLM
-    - huggingface
-  license: MIT
- !!merge <<: *huggingface
-  name: "huggingface-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-huggingface"
- !!merge <<: *local-store
-  name: "local-store-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-cpu-local-store"
- !!merge <<: *silero-vad
-  name: "silero-vad-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-cpu-silero-vad"
- !!merge <<: *piper
-  name: "piper-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-piper"
-## llama-cpp
- !!merge <<: *llamacpp
-  name: "darwin-x86-llama-cpp"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-darwin-x86-llama-cpp"
- !!merge <<: *llamacpp
-  name: "darwin-x86-llama-cpp-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-darwin-x86-llama-cpp"
- !!merge <<: *llamacpp
-  name: "nvidia-l4t-arm64-llama-cpp"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-arm64-llama-cpp"
- !!merge <<: *llamacpp
-  name: "nvidia-l4t-arm64-llama-cpp-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-arm64-llama-cpp"
- !!merge <<: *llamacpp
-  name: "cpu-llama-cpp"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-llama-cpp"
- !!merge <<: *llamacpp
-  name: "cpu-llama-cpp-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-cpu-llama-cpp"
- !!merge <<: *llamacpp
-  name: "cuda11-llama-cpp"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-llama-cpp"
- !!merge <<: *llamacpp
-  name: "cuda12-llama-cpp"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-llama-cpp"
- !!merge <<: *llamacpp
-  name: "rocm-llama-cpp"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-llama-cpp"
- !!merge <<: *llamacpp
-  name: "intel-sycl-f32-llama-cpp"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f32-llama-cpp"
- !!merge <<: *llamacpp
-  name: "intel-sycl-f16-llama-cpp"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f16-llama-cpp"
- !!merge <<: *llamacpp
-  name: "vulkan-llama-cpp"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-vulkan-llama-cpp"
- !!merge <<: *llamacpp
-  name: "vulkan-llama-cpp-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-vulkan-llama-cpp"
- !!merge <<: *llamacpp
-  name: "metal-llama-cpp"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-llama-cpp"
- !!merge <<: *llamacpp
-  name: "metal-llama-cpp-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-llama-cpp"
- !!merge <<: *llamacpp
-  name: "cuda11-llama-cpp-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-llama-cpp"
- !!merge <<: *llamacpp
-  name: "cuda12-llama-cpp-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-llama-cpp"
- !!merge <<: *llamacpp
-  name: "rocm-llama-cpp-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-llama-cpp"
- !!merge <<: *llamacpp
-  name: "intel-sycl-f32-llama-cpp-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f32-llama-cpp"
- !!merge <<: *llamacpp
-  name: "intel-sycl-f16-llama-cpp-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-llama-cpp"
-## whisper
- !!merge <<: *whispercpp
-  name: "nvidia-l4t-arm64-whisper"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-arm64-whisper"
- !!merge <<: *whispercpp
-  name: "nvidia-l4t-arm64-whisper-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-arm64-whisper"
- !!merge <<: *whispercpp
-  name: "cpu-whisper"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-whisper"
- !!merge <<: *whispercpp
-  name: "cpu-whisper-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-cpu-whisper"
- !!merge <<: *whispercpp
-  name: "cuda11-whisper"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-whisper"
- !!merge <<: *whispercpp
-  name: "cuda12-whisper"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-whisper"
- !!merge <<: *whispercpp
-  name: "rocm-whisper"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-whisper"
- !!merge <<: *whispercpp
-  name: "intel-sycl-f32-whisper"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f32-whisper"
- !!merge <<: *whispercpp
-  name: "intel-sycl-f16-whisper"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f16-whisper"
- !!merge <<: *whispercpp
-  name: "vulkan-whisper"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-vulkan-whisper"
- !!merge <<: *whispercpp
-  name: "vulkan-whisper-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-vulkan-whisper"
- !!merge <<: *whispercpp
-  name: "metal-whisper"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-whisper"
- !!merge <<: *whispercpp
-  name: "metal-whisper-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-whisper"
- !!merge <<: *whispercpp
-  name: "cuda11-whisper-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-whisper"
- !!merge <<: *whispercpp
-  name: "cuda12-whisper-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-whisper"
- !!merge <<: *whispercpp
-  name: "rocm-whisper-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-whisper"
- !!merge <<: *whispercpp
-  name: "intel-sycl-f32-whisper-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f32-whisper"
- !!merge <<: *whispercpp
-  name: "intel-sycl-f16-whisper-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-whisper"
-## stablediffusion-ggml
- !!merge <<: *stablediffusionggml
-  name: "cpu-stablediffusion-ggml"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-stablediffusion-ggml"
- !!merge <<: *stablediffusionggml
-  name: "cpu-stablediffusion-ggml-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-cpu-stablediffusion-ggml"
- !!merge <<: *stablediffusionggml
-  name: "vulkan-stablediffusion-ggml"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-vulkan-stablediffusion-ggml"
- !!merge <<: *stablediffusionggml
-  name: "vulkan-stablediffusion-ggml-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-vulkan-stablediffusion-ggml"
- !!merge <<: *stablediffusionggml
-  name: "cuda12-stablediffusion-ggml"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-stablediffusion-ggml"
- !!merge <<: *stablediffusionggml
-  name: "intel-sycl-f32-stablediffusion-ggml"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f32-stablediffusion-ggml"
- !!merge <<: *stablediffusionggml
-  name: "intel-sycl-f16-stablediffusion-ggml"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f16-stablediffusion-ggml"
- !!merge <<: *stablediffusionggml
-  name: "cuda11-stablediffusion-ggml"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-stablediffusion-ggml"
- !!merge <<: *stablediffusionggml
-  name: "cuda12-stablediffusion-ggml-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-stablediffusion-ggml"
- !!merge <<: *stablediffusionggml
-  name: "intel-sycl-f32-stablediffusion-ggml-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f32-stablediffusion-ggml"
- !!merge <<: *stablediffusionggml
-  name: "intel-sycl-f16-stablediffusion-ggml-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-stablediffusion-ggml"
- !!merge <<: *stablediffusionggml
-  name: "cuda11-stablediffusion-ggml-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-stablediffusion-ggml"
- !!merge <<: *stablediffusionggml
-  name: "nvidia-l4t-arm64-stablediffusion-ggml-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-arm64-stablediffusion-ggml"
- !!merge <<: *stablediffusionggml
-  name: "nvidia-l4t-arm64-stablediffusion-ggml"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-arm64-stablediffusion-ggml"
-# vllm
 - !!merge <<: *vllm
  name: "vllm-development"
  capabilities:
    nvidia: "cuda12-vllm-development"
    amd: "rocm-vllm-development"
    intel: "intel-sycl-f16-vllm-development"
+- !!merge <<: *vllm
+  name: "cuda11-vllm"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-vllm"
 - !!merge <<: *vllm
  name: "cuda12-vllm"
  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-vllm"
@@ -515,6 +53,9 @@
 - !!merge <<: *vllm
  name: "intel-sycl-f16-vllm"
  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f16-vllm"
+- !!merge <<: *vllm
+  name: "cuda11-vllm-development"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-vllm"
 - !!merge <<: *vllm
  name: "cuda12-vllm-development"
  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-vllm"
@@ -528,6 +69,13 @@
  name: "intel-sycl-f16-vllm-development"
  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-vllm"
 ## Rerankers
+- &rerankers
+  name: "rerankers"
+  alias: "rerankers"
+  capabilities:
+    nvidia: "cuda12-rerankers"
+    intel: "intel-sycl-f16-rerankers"
+    amd: "rocm-rerankers"
 - !!merge <<: *rerankers
  name: "rerankers-development"
  capabilities:
@@ -565,6 +113,23 @@
  name: "intel-sycl-f16-rerankers-development"
  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-rerankers"
 ## Transformers
+- &transformers
+  name: "transformers"
+  icon: https://camo.githubusercontent.com/26569a27b8a30a488dd345024b71dbc05da7ff1b2ba97bb6080c9f1ee0f26cc7/68747470733a2f2f68756767696e67666163652e636f2f64617461736574732f68756767696e67666163652f646f63756d656e746174696f6e2d696d616765732f7265736f6c76652f6d61696e2f7472616e73666f726d6572732f7472616e73666f726d6572735f61735f615f6d6f64656c5f646566696e6974696f6e2e706e67
+  alias: "transformers"
+  license: apache-2.0
+  description: |
+    Transformers acts as the model-definition framework for state-of-the-art machine learning models in text, computer vision, audio, video, and multimodal model, for both inference and training.
+    It centralizes the model definition so that this definition is agreed upon across the ecosystem. transformers is the pivot across frameworks: if a model definition is supported, it will be compatible with the majority of training frameworks (Axolotl, Unsloth, DeepSpeed, FSDP, PyTorch-Lightning, ...), inference engines (vLLM, SGLang, TGI, ...), and adjacent modeling libraries (llama.cpp, mlx, ...) which leverage the model definition from transformers.
+  urls:
+    - https://github.com/huggingface/transformers
+  tags:
+    - text-to-text
+    - multimodal
+  capabilities:
+    nvidia: "cuda12-transformers"
+    intel: "intel-sycl-f16-transformers"
+    amd: "rocm-transformers"
 - !!merge <<: *transformers
  name: "transformers-development"
  capabilities:
@@ -602,6 +167,22 @@
  name: "intel-sycl-f16-transformers-development"
  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-transformers"
 ## Diffusers
+- &diffusers
+  icon: https://raw.githubusercontent.com/huggingface/diffusers/main/docs/source/en/imgs/diffusers_library.jpg
+  description: |
+    🤗 Diffusers is the go-to library for state-of-the-art pretrained diffusion models for generating images, audio, and even 3D structures of molecules. Whether you're looking for a simple inference solution or training your own diffusion models, 🤗 Diffusers is a modular toolbox that supports both.
+  urls:
+    - https://github.com/huggingface/diffusers
+  tags:
+    - image-generation
+    - video-generation
+    - diffusion-models
+  license: apache-2.0
+  alias: "diffusers"
+  capabilities:
+    nvidia: "cuda12-diffusers"
+    intel: "intel-sycl-f32-diffusers"
+    amd: "rocm-diffusers"
 - !!merge <<: *diffusers
  name: "diffusers-development"
  capabilities:
@@ -633,6 +214,22 @@
  name: "intel-sycl-f32-diffusers-development"
  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f32-diffusers"
  ## exllama2
+- &exllama2
+  name: "exllama2"
+  urls:
+    - https://github.com/turboderp-org/exllamav2
+  tags:
+    - text-to-text
+    - LLM
+    - EXL2
+  license: MIT
+  description: |
+    ExLlamaV2 is an inference library for running local LLMs on modern consumer GPUs.
+  alias: "exllama2"
+  capabilities:
+    nvidia: "cuda12-exllama2"
+    intel: "intel-sycl-f32-exllama2"
+    amd: "rocm-exllama2"
 - !!merge <<: *exllama2
  name: "exllama2-development"
  capabilities:
@@ -652,6 +249,24 @@
  name: "cuda12-exllama2-development"
  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-exllama2"
 ## kokoro
+- &kokoro
+  icon: https://avatars.githubusercontent.com/u/166769057?v=4
+  description: |
+    Kokoro is an open-weight TTS model with 82 million parameters. Despite its lightweight architecture, it delivers comparable quality to larger models while being significantly faster and more cost-efficient. With Apache-licensed weights, Kokoro can be deployed anywhere from production environments to personal projects.
+  urls:
+    - https://huggingface.co/hexgrad/Kokoro-82M
+    - https://github.com/hexgrad/kokoro
+  tags:
+    - text-to-speech
+    - TTS
+    - LLM
+  license: apache-2.0
+  alias: "kokoro"
+  name: "kokoro"
+  capabilities:
+    nvidia: "cuda12-kokoro"
+    intel: "intel-sycl-f32-kokoro"
+    amd: "rocm-kokoro"
 - !!merge <<: *kokoro
  name: "kokoro-development"
  capabilities:
@@ -689,6 +304,22 @@
  name: "rocm-kokoro"
  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-kokoro"
 ## faster-whisper
+- &faster-whisper
+  icon: https://avatars.githubusercontent.com/u/1520500?s=200&v=4
+  description: |
+    faster-whisper is a reimplementation of OpenAI's Whisper model using CTranslate2, which is a fast inference engine for Transformer models.
+    This implementation is up to 4 times faster than openai/whisper for the same accuracy while using less memory. The efficiency can be further improved with 8-bit quantization on both CPU and GPU.
+  urls:
+    - https://github.com/SYSTRAN/faster-whisper
+  tags:
+    - speech-to-text
+    - Whisper
+  license: MIT
+  name: "faster-whisper"
+  capabilities:
+    nvidia: "cuda12-faster-whisper"
+    intel: "intel-sycl-f32-faster-whisper"
+    amd: "rocm-faster-whisper"
 - !!merge <<: *faster-whisper
  name: "faster-whisper-development"
  capabilities:
@@ -717,7 +348,28 @@
  name: "sycl-f16-faster-whisper-development"
  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-faster-whisper"
 ## coqui
+- &coqui
+  urls:
+    - https://github.com/idiap/coqui-ai-TTS
+  description: |
+    🐸 Coqui TTS is a library for advanced Text-to-Speech generation.

+    🚀 Pretrained models in +1100 languages.
+
+    🛠️ Tools for training new models and fine-tuning existing models in any language.
+
+    📚 Utilities for dataset analysis and curation.
+  tags:
+    - text-to-speech
+    - TTS
+  license: mpl-2.0
+  name: "coqui"
+  alias: "coqui"
+  capabilities:
+    nvidia: "cuda12-coqui"
+    intel: "intel-sycl-f32-coqui"
+    amd: "rocm-coqui"
+  icon: https://avatars.githubusercontent.com/u/1338804?s=200&v=4
 - !!merge <<: *coqui
  name: "coqui-development"
  capabilities:
@@ -755,6 +407,22 @@
  name: "rocm-coqui"
  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-coqui"
 ## bark
+- &bark
+  urls:
+    - https://github.com/suno-ai/bark
+  description: |
+    Bark is a transformer-based text-to-audio model created by Suno. Bark can generate highly realistic, multilingual speech as well as other audio - including music, background noise and simple sound effects. The model can also produce nonverbal communications like laughing, sighing and crying. To support the research community, we are providing access to pretrained model checkpoints, which are ready for inference and available for commercial use.
+  tags:
+    - text-to-speech
+    - TTS
+  license: MIT
+  name: "bark"
+  alias: "bark"
+  capabilities:
+    cuda: "cuda12-bark"
+    intel: "intel-sycl-f32-bark"
+    rocm: "rocm-bark"
+  icon: https://avatars.githubusercontent.com/u/99442120?s=200&v=4
 - !!merge <<: *bark
  name: "bark-development"
  capabilities:
@@ -791,11 +459,50 @@
 - !!merge <<: *bark
  name: "cuda12-bark-development"
  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-bark"
+- &barkcpp
+  urls:
+    - https://github.com/PABannier/bark.cpp
+  description: |
+      With bark.cpp, our goal is to bring real-time realistic multilingual text-to-speech generation to the community.
+
+      Plain C/C++ implementation without dependencies
+      AVX, AVX2 and AVX512 for x86 architectures
+      CPU and GPU compatible backends
+      Mixed F16 / F32 precision
+      4-bit, 5-bit and 8-bit integer quantization
+      Metal and CUDA backends
+
+      Models supported
+
+          Bark Small
+          Bark Large
+  tags:
+    - text-to-speech
+    - TTS
+  license: MIT
+  icon: https://github.com/PABannier/bark.cpp/raw/main/assets/banner.png
+  name: "bark-cpp"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-bark-cpp"
+  alias: "bark-cpp"
 - !!merge <<: *barkcpp
  name: "bark-cpp-development"
  uri: "quay.io/go-skynet/local-ai-backends:master-bark-cpp"
  alias: "bark-cpp"
 ## chatterbox
+- &chatterbox
+  urls:
+    - https://github.com/resemble-ai/chatterbox
+  description: |
+    Resemble AI's first production-grade open source TTS model. Licensed under MIT, Chatterbox has been benchmarked against leading closed-source systems like ElevenLabs, and is consistently preferred in side-by-side evaluations.
+    Whether you're working on memes, videos, games, or AI agents, Chatterbox brings your content to life. It's also the first open source TTS model to support emotion exaggeration control, a powerful feature that makes your voices stand out.
+  tags:
+    - text-to-speech
+    - TTS
+  license: MIT
+  icon: https://private-user-images.githubusercontent.com/660224/448166653-bd8c5f03-e91d-4ee5-b680-57355da204d1.png?jwt=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbSIsImtleSI6ImtleTUiLCJleHAiOjE3NTAxOTE0MDAsIm5iZiI6MTc1MDE5MTEwMCwicGF0aCI6Ii82NjAyMjQvNDQ4MTY2NjUzLWJkOGM1ZjAzLWU5MWQtNGVlNS1iNjgwLTU3MzU1ZGEyMDRkMS5wbmc_WC1BbXotQWxnb3JpdGhtPUFXUzQtSE1BQy1TSEEyNTYmWC1BbXotQ3JlZGVudGlhbD1BS0lBVkNPRFlMU0E1M1BRSzRaQSUyRjIwMjUwNjE3JTJGdXMtZWFzdC0xJTJGczMlMkZhd3M0X3JlcXVlc3QmWC1BbXotRGF0ZT0yMDI1MDYxN1QyMDExNDBaJlgtQW16LUV4cGlyZXM9MzAwJlgtQW16LVNpZ25hdHVyZT1hMmI1NGY3OGFiZTlhNGFkNTVlYTY4NTIwMWEzODRiZGE4YzdhNGQ5MGNhNzE3MDYyYTA2NDIxYTkyYzhiODkwJlgtQW16LVNpZ25lZEhlYWRlcnM9aG9zdCJ9.mR9kM9xX0TdzPuSpuspCllHYQiq79dFQ2rtuNvjrl6w
+  name: "chatterbox"
+  capabilities:
+    nvidia: "cuda12-chatterbox"
 - !!merge <<: *chatterbox
  name: "chatterbox-development"
  capabilities:
--- a/core/application/startup.go
+++ b/core/application/startup.go
@@ -9,7 +9,9 @@ import (
 	"github.com/mudler/LocalAI/core/gallery"
 	"github.com/mudler/LocalAI/core/services"
 	"github.com/mudler/LocalAI/internal"
+	"github.com/mudler/LocalAI/pkg/assets"

+	"github.com/mudler/LocalAI/pkg/library"
 	"github.com/mudler/LocalAI/pkg/model"
 	pkgStartup "github.com/mudler/LocalAI/pkg/startup"
 	"github.com/mudler/LocalAI/pkg/xsysinfo"
@@ -101,6 +103,23 @@ func New(opts ...config.AppOption) (*Application, error) {
 		}
 	}

+	if options.AssetsDestination != "" {
+		// Extract files from the embedded FS
+		err := assets.ExtractFiles(options.BackendAssets, options.AssetsDestination)
+		log.Debug().Msgf("Extracting backend assets files to %s", options.AssetsDestination)
+		if err != nil {
+			log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly)", err)
+		}
+	}
+
+	if options.LibPath != "" {
+		// If there is a lib directory, set LD_LIBRARY_PATH to include it
+		err := library.LoadExternal(options.LibPath)
+		if err != nil {
+			log.Error().Err(err).Str("LibPath", options.LibPath).Msg("Error while loading external libraries")
+		}
+	}
+
 	// turn off any process that was started by GRPC if the context is canceled
 	go func() {
 		<-options.Context.Done()
--- a/core/backend/options.go
+++ b/core/backend/options.go
@@ -20,6 +20,7 @@ func ModelOptions(c config.BackendConfig, so *config.ApplicationConfig, opts ...
 	defOpts := []model.Option{
 		model.WithBackendString(c.Backend),
 		model.WithModel(c.Model),
+		model.WithAssetDir(so.AssetsDestination),
 		model.WithContext(so.Context),
 		model.WithModelID(name),
 	}
--- a/core/backend/stores.go
+++ b/core/backend/stores.go
@@ -7,12 +7,14 @@ import (
 	"github.com/mudler/LocalAI/pkg/model"
 )

-func StoreBackend(sl *model.ModelLoader, appConfig *config.ApplicationConfig, storeName string, backend string) (grpc.Backend, error) {
-	if backend == "" {
-		backend = model.LocalStoreBackend
+func StoreBackend(sl *model.ModelLoader, appConfig *config.ApplicationConfig, storeName string) (grpc.Backend, error) {
+	if storeName == "" {
+		storeName = "default"
 	}
+
 	sc := []model.Option{
-		model.WithBackendString(backend),
+		model.WithBackendString(model.LocalStoreBackend),
+		model.WithAssetDir(appConfig.AssetsDestination),
 		model.WithModel(storeName),
 	}

--- a/core/backend/tts.go
+++ b/core/backend/tts.go
@@ -21,7 +21,7 @@ func ModelTTS(
 	appConfig *config.ApplicationConfig,
 	backendConfig config.BackendConfig,
 ) (string, *proto.Result, error) {
-	opts := ModelOptions(backendConfig, appConfig)
+	opts := ModelOptions(backendConfig, appConfig, model.WithDefaultBackendString(model.PiperBackend))
 	ttsModel, err := loader.Load(opts...)
 	if err != nil {
 		return "", nil, err
--- a/core/cli/backends.go
+++ b/core/cli/backends.go
@@ -8,6 +8,7 @@ import (
 	"github.com/mudler/LocalAI/core/config"

 	"github.com/mudler/LocalAI/core/gallery"
+	"github.com/mudler/LocalAI/pkg/downloader"
 	"github.com/mudler/LocalAI/pkg/startup"
 	"github.com/rs/zerolog/log"
 	"github.com/schollz/progressbar/v3"
@@ -22,6 +23,12 @@ type BackendsList struct {
 	BackendsCMDFlags `embed:""`
 }

+type BackendsInstallSingle struct {
+	InstallArgs []string `arg:"" optional:"" name:"backend" help:"Backend images to install"`
+
+	BackendsCMDFlags `embed:""`
+}
+
 type BackendsInstall struct {
 	BackendArgs []string `arg:"" optional:"" name:"backends" help:"Backend configuration URLs to load"`

@@ -35,9 +42,36 @@ type BackendsUninstall struct {
 }

 type BackendsCMD struct {
-	List      BackendsList      `cmd:"" help:"List the backends available in your galleries" default:"withargs"`
-	Install   BackendsInstall   `cmd:"" help:"Install a backend from the gallery"`
-	Uninstall BackendsUninstall `cmd:"" help:"Uninstall a backend"`
+	List          BackendsList          `cmd:"" help:"List the backends available in your galleries" default:"withargs"`
+	Install       BackendsInstall       `cmd:"" help:"Install a backend from the gallery"`
+	InstallSingle BackendsInstallSingle `cmd:"" help:"Install a single backend from the gallery"`
+	Uninstall     BackendsUninstall     `cmd:"" help:"Uninstall a backend"`
+}
+
+func (bi *BackendsInstallSingle) Run(ctx *cliContext.Context) error {
+	for _, backend := range bi.InstallArgs {
+		progressBar := progressbar.NewOptions(
+			1000,
+			progressbar.OptionSetDescription(fmt.Sprintf("downloading backend %s", backend)),
+			progressbar.OptionShowBytes(false),
+			progressbar.OptionClearOnFinish(),
+		)
+		progressCallback := func(fileName string, current string, total string, percentage float64) {
+			v := int(percentage * 10)
+			err := progressBar.Set(v)
+			if err != nil {
+				log.Error().Err(err).Str("filename", fileName).Int("value", v).Msg("error while updating progress bar")
+			}
+		}
+
+		if err := gallery.InstallBackend(bi.BackendsPath, &gallery.GalleryBackend{
+			URI: backend,
+		}, progressCallback); err != nil {
+			return err
+		}
+	}
+
+	return nil
 }

 func (bl *BackendsList) Run(ctx *cliContext.Context) error {
@@ -82,6 +116,23 @@ func (bi *BackendsInstall) Run(ctx *cliContext.Context) error {
 			}
 		}

+		backendURI := downloader.URI(backendName)
+
+		if !backendURI.LooksLikeOCI() {
+			backends, err := gallery.AvailableBackends(galleries, bi.BackendsPath)
+			if err != nil {
+				return err
+			}
+
+			backend := gallery.FindGalleryElement(backends, backendName, bi.BackendsPath)
+			if backend == nil {
+				log.Error().Str("backend", backendName).Msg("backend not found")
+				return fmt.Errorf("backend not found: %s", backendName)
+			}
+
+			log.Info().Str("backend", backendName).Str("license", backend.License).Msg("installing backend")
+		}
+
 		err := startup.InstallExternalBackends(galleries, bi.BackendsPath, progressCallback, backendName)
 		if err != nil {
 			return err
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Ettore Di Giacinto	057d5c25f1	test CI (remove me) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2025-07-04 18:49:15 +02:00
Ettore Di Giacinto	cdf70db0d6	chore(deps): switch to ubuntu 24.04 Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2025-07-04 18:48:53 +02:00