wip: try to let JSON grammar to return strings as well

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
feat: auto select llama-cpp cuda runtime (#2306 )
2026-07-07 06:49:49 -04:00 · 2024-05-14 22:58:27 +02:00 · 2024-05-14 19:40:18 +02:00 · 2024-05-14 09:39:20 +02:00 · 2024-05-14 00:32:32 +02:00 · 2024-05-13 22:25:14 +00:00
311 changed files with 8231 additions and 3868 deletions
--- a/.dockerignore
+++ b/.dockerignore
@@ -6,6 +6,11 @@ examples/chatbot-ui/models
 examples/rwkv/models
 examples/**/models
 Dockerfile*
+__pycache__

 # SonarQube
-.scannerwork
+.scannerwork
+
+# backend virtual environments
+**/venv
+backend/python/**/source
--- a/.env
+++ b/.env
@@ -10,7 +10,7 @@
 #
 ## Define galleries.
 ## models will to install will be visible in `/models/available`
-# LOCALAI_GALLERIES=[{"name":"model-gallery", "url":"github:go-skynet/model-gallery/index.yaml"}]
+# LOCALAI_GALLERIES=[{"name":"localai", "url":"github:mudler/LocalAI/gallery/index.yaml@master"}]

 ## CORS settings
 # LOCALAI_CORS=true
@@ -86,4 +86,4 @@
 # LOCALAI_WATCHDOG_BUSY=true
 #
 # Time in duration format (e.g. 1h30m) after which a backend is considered busy
-# LOCALAI_WATCHDOG_BUSY_TIMEOUT=5m
+# LOCALAI_WATCHDOG_BUSY_TIMEOUT=5m
--- a/.github/bump_docs.sh
+++ b/.github/bump_docs.sh
@@ -2,6 +2,6 @@
 set -xe
 REPO=$1

-LATEST_TAG=$(curl -s "https://api.github.com/repos/$REPO/releases/latest" | jq -r '.name')
+LATEST_TAG=$(curl -s "https://api.github.com/repos/$REPO/releases/latest" | jq -r '.tag_name')

 cat <<< $(jq ".version = \"$LATEST_TAG\"" docs/data/version.json) > docs/data/version.json
--- a/.github/checksum_checker.sh
+++ b/.github/checksum_checker.sh
@@ -0,0 +1,111 @@
+#!/bin/bash
+# This scripts needs yq and huggingface_hub to be installed
+# to install hugingface_hub run pip install huggingface_hub
+
+# Path to the input YAML file
+input_yaml=$1
+
+# Function to download file and check checksum using Python
+function check_and_update_checksum() {
+    model_name="$1"
+    file_name="$2"
+    uri="$3"
+    old_checksum="$4"
+    idx="$5"
+
+    # Download the file and calculate new checksum using Python
+    new_checksum=$(python3 -c "
+import hashlib
+from huggingface_hub import hf_hub_download
+import requests
+import sys
+import os
+
+uri = '$uri'
+file_name = uri.split('/')[-1]
+
+# Function to parse the URI and determine download method
+# Function to parse the URI and determine download method
+def parse_uri(uri):
+    if uri.startswith('huggingface://'):
+        repo_id = uri.split('://')[1]
+        return 'huggingface', repo_id.rsplit('/', 1)[0]
+    elif 'huggingface.co' in uri:
+        parts = uri.split('/resolve/')
+        if len(parts) > 1:
+            repo_path = parts[0].split('https://huggingface.co/')[-1]
+            return 'huggingface', repo_path
+    return 'direct', uri
+
+def calculate_sha256(file_path):
+    sha256_hash = hashlib.sha256()
+    with open(file_path, 'rb') as f:
+        for byte_block in iter(lambda: f.read(4096), b''):
+            sha256_hash.update(byte_block)
+    return sha256_hash.hexdigest()
+
+download_type, repo_id_or_url = parse_uri(uri)
+
+# Decide download method based on URI type
+if download_type == 'huggingface':
+    try:
+        file_path = hf_hub_download(repo_id=repo_id_or_url, filename=file_name)
+    except Exception as e:
+        print(f'Error from Hugging Face Hub: {str(e)}', file=sys.stderr)
+        sys.exit(2)
+else:
+    response = requests.get(repo_id_or_url)
+    if response.status_code == 200:
+        with open(file_name, 'wb') as f:
+            f.write(response.content)
+        file_path = file_name
+    elif response.status_code == 404:
+        print(f'File not found: {response.status_code}', file=sys.stderr)
+        sys.exit(2)
+    else:
+        print(f'Error downloading file: {response.status_code}', file=sys.stderr)
+        sys.exit(1)
+
+print(calculate_sha256(file_path))
+# Clean up the downloaded file
+os.remove(file_path)
+")
+
+    if [[ "$new_checksum" == "" ]]; then
+        echo "Error calculating checksum for $file_name. Skipping..."
+        return
+    fi
+
+    echo "Checksum for $file_name: $new_checksum"
+
+    # Compare and update the YAML file if checksums do not match
+    result=$?
+    if [[ $result -eq 2 ]]; then
+        echo "File not found, deleting entry for $file_name..."
+        # yq eval -i "del(.[$idx].files[] | select(.filename == \"$file_name\"))" "$input_yaml"
+    elif [[ "$old_checksum" != "$new_checksum" ]]; then
+        echo "Checksum mismatch for $file_name. Updating..."
+        yq eval -i "del(.[$idx].files[] | select(.filename == \"$file_name\").sha256)" "$input_yaml"
+        yq eval -i "(.[$idx].files[] | select(.filename == \"$file_name\")).sha256 = \"$new_checksum\"" "$input_yaml"
+    elif [[ $result -ne 0 ]]; then
+        echo "Error downloading file $file_name. Skipping..."
+    else
+        echo "Checksum match for $file_name. No update needed."
+    fi
+}
+
+# Read the YAML and process each file
+len=$(yq eval '. | length' "$input_yaml")
+for ((i=0; i<$len; i++))
+do
+    name=$(yq eval ".[$i].name" "$input_yaml")
+    files_len=$(yq eval ".[$i].files | length" "$input_yaml")
+    for ((j=0; j<$files_len; j++))
+    do
+        filename=$(yq eval ".[$i].files[$j].filename" "$input_yaml")
+        uri=$(yq eval ".[$i].files[$j].uri" "$input_yaml")
+        checksum=$(yq eval ".[$i].files[$j].sha256" "$input_yaml")
+        echo "Checking model $name, file $filename. URI = $uri, Checksum = $checksum"
+        check_and_update_checksum "$name" "$filename" "$uri" "$checksum" "$i"
+    done
+done
--- a/.github/labeler.yml
+++ b/.github/labeler.yml
@@ -8,6 +8,11 @@ kind/documentation:
  - changed-files:
    - any-glob-to-any-file: '*.md'

+area/ai-model:
+- any:
+  - changed-files:
+    - any-glob-to-any-file: 'gallery/*'
+
 examples:
 - any:
  - changed-files:
@@ -16,4 +21,4 @@ examples:
 ci:
 - any:
  - changed-files:
-    - any-glob-to-any-file: '.github/*'
+    - any-glob-to-any-file: '.github/*'
--- a/.github/workflows/checksum_checker.yaml
+++ b/.github/workflows/checksum_checker.yaml
@@ -0,0 +1,47 @@
+name: Check if checksums are up-to-date
+on:
+  schedule:
+    - cron: 0 20 * * *
+  workflow_dispatch:
+jobs:
+  checksum_check:
+    runs-on: arc-runner-set
+    steps:
+      - name: Force Install GIT latest
+        run: |
+          sudo apt-get update \
+          && sudo apt-get install -y software-properties-common \
+          && sudo apt-get update \
+          && sudo add-apt-repository -y ppa:git-core/ppa \
+          && sudo apt-get update \
+          && sudo apt-get install -y git
+      - uses: actions/checkout@v4
+      - name: Install dependencies
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y pip wget
+          sudo pip install --upgrade pip 
+          pip install huggingface_hub
+      - name: 'Setup yq'
+        uses: dcarbone/install-yq-action@v1.1.1
+        with:
+          version: 'v4.43.1'
+          download-compressed: true
+          force: true
+
+      - name: Checksum checker 🔧
+        run: |
+          export HF_HOME=/hf_cache
+          sudo mkdir /hf_cache
+          sudo chmod 777 /hf_cache
+          bash .github/checksum_checker.sh gallery/index.yaml
+      - name: Create Pull Request
+        uses: peter-evans/create-pull-request@v6
+        with:
+          token: ${{ secrets.UPDATE_BOT_TOKEN }}
+          push-to-fork: ci-forks/LocalAI
+          commit-message: ':arrow_up: Checksum updates in gallery/index.yaml'
+          title: 'models(gallery): :arrow_up: update checksum'
+          branch: "update/checksum"
+          body: Updating checksums in gallery/index.yaml
+          signoff: true
--- a/.github/workflows/dependabot_auto.yml
+++ b/.github/workflows/dependabot_auto.yml
@@ -14,7 +14,7 @@ jobs:
    steps:
      - name: Dependabot metadata
        id: metadata
-        uses: dependabot/fetch-metadata@v2.0.0
+        uses: dependabot/fetch-metadata@v2.1.0
        with:
          github-token: "${{ secrets.GITHUB_TOKEN }}"
          skip-commit-verification: true
--- a/.github/workflows/generate_grpc_cache.yaml
+++ b/.github/workflows/generate_grpc_cache.yaml
@@ -1,7 +1,10 @@
 name: 'generate and publish GRPC docker caches'

 on:
- workflow_dispatch
+  workflow_dispatch:
+  push:
+    branches:
+      - master

 concurrency:
  group: grpc-cache-${{ github.head_ref || github.ref }}-${{ github.repository }}
@@ -80,11 +83,12 @@ jobs:
          # If the build-args are not an EXACT match, it will result in a cache miss, which will require GRPC to be built from scratch.
          build-args: |
            GRPC_BASE_IMAGE=${{ matrix.grpc-base-image }}
-            MAKEFLAGS=--jobs=4 --output-sync=target
-            GRPC_VERSION=v1.58.0
+            GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
+            GRPC_VERSION=v1.63.0
          context: .
          file: ./Dockerfile
          cache-to: type=gha,ignore-error=true
+          cache-from: type=gha
          target: grpc
          platforms: ${{ matrix.platforms }}
          push: false
--- a/.github/workflows/image-pr.yml
+++ b/.github/workflows/image-pr.yml
@@ -61,14 +61,14 @@ jobs:
            tag-suffix: '-hipblas'
            ffmpeg: 'false'
            image-type: 'extras'
-            base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
+            base-image: "rocm/dev-ubuntu-22.04:6.1"
            grpc-base-image: "ubuntu:22.04"
            runs-on: 'arc-runner-set'
            makeflags: "--jobs=3 --output-sync=target"
          - build-type: 'sycl_f16'
            platforms: 'linux/amd64'
            tag-latest: 'false'
-            base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
+            base-image: "intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04"
            grpc-base-image: "ubuntu:22.04"
            tag-suffix: 'sycl-f16-ffmpeg'
            ffmpeg: 'true'
@@ -110,7 +110,7 @@ jobs:
          - build-type: 'sycl_f16'
            platforms: 'linux/amd64'
            tag-latest: 'false'
-            base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
+            base-image: "intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04"
            grpc-base-image: "ubuntu:22.04"
            tag-suffix: 'sycl-f16-ffmpeg-core'
            ffmpeg: 'true'
--- a/.github/workflows/image.yml
+++ b/.github/workflows/image.yml
@@ -129,7 +129,7 @@ jobs:
            ffmpeg: 'true'
            image-type: 'extras'
            aio: "-aio-gpu-hipblas"
-            base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
+            base-image: "rocm/dev-ubuntu-22.04:6.1"
            grpc-base-image: "ubuntu:22.04"
            latest-image: 'latest-gpu-hipblas'
            latest-image-aio: 'latest-aio-gpu-hipblas'
@@ -141,14 +141,14 @@ jobs:
            tag-suffix: '-hipblas'
            ffmpeg: 'false'
            image-type: 'extras'
-            base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
+            base-image: "rocm/dev-ubuntu-22.04:6.1"
            grpc-base-image: "ubuntu:22.04"
            runs-on: 'arc-runner-set'
            makeflags: "--jobs=3 --output-sync=target"
          - build-type: 'sycl_f16'
            platforms: 'linux/amd64'
            tag-latest: 'auto'
-            base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
+            base-image: "intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04"
            grpc-base-image: "ubuntu:22.04"
            tag-suffix: '-sycl-f16-ffmpeg'
            ffmpeg: 'true'
@@ -161,7 +161,7 @@ jobs:
          - build-type: 'sycl_f32'
            platforms: 'linux/amd64'
            tag-latest: 'auto'
-            base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
+            base-image: "intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04"
            grpc-base-image: "ubuntu:22.04"
            tag-suffix: '-sycl-f32-ffmpeg'
            ffmpeg: 'true'
@@ -175,7 +175,7 @@ jobs:
          - build-type: 'sycl_f16'
            platforms: 'linux/amd64'
            tag-latest: 'false'
-            base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
+            base-image: "intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04"
            grpc-base-image: "ubuntu:22.04"
            tag-suffix: '-sycl-f16-core'
            ffmpeg: 'false'
@@ -185,7 +185,7 @@ jobs:
          - build-type: 'sycl_f32'
            platforms: 'linux/amd64'
            tag-latest: 'false'
-            base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
+            base-image: "intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04"
            grpc-base-image: "ubuntu:22.04"
            tag-suffix: '-sycl-f32-core'
            ffmpeg: 'false'
@@ -195,7 +195,7 @@ jobs:
          - build-type: 'sycl_f16'
            platforms: 'linux/amd64'
            tag-latest: 'false'
-            base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
+            base-image: "intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04"
            grpc-base-image: "ubuntu:22.04"
            tag-suffix: '-sycl-f16-ffmpeg-core'
            ffmpeg: 'true'
@@ -205,7 +205,7 @@ jobs:
          - build-type: 'sycl_f32'
            platforms: 'linux/amd64'
            tag-latest: 'false'
-            base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
+            base-image: "intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04"
            grpc-base-image: "ubuntu:22.04"
            tag-suffix: '-sycl-f32-ffmpeg-core'
            ffmpeg: 'true'
@@ -218,7 +218,7 @@ jobs:
            tag-suffix: '-hipblas-ffmpeg-core'
            ffmpeg: 'true'
            image-type: 'core'
-            base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
+            base-image: "rocm/dev-ubuntu-22.04:6.1"
            grpc-base-image: "ubuntu:22.04"
            runs-on: 'arc-runner-set'
            makeflags: "--jobs=3 --output-sync=target"
@@ -228,7 +228,7 @@ jobs:
            tag-suffix: '-hipblas-core'
            ffmpeg: 'false'
            image-type: 'core'
-            base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
+            base-image: "rocm/dev-ubuntu-22.04:6.1"
            grpc-base-image: "ubuntu:22.04"
            runs-on: 'arc-runner-set'
            makeflags: "--jobs=3 --output-sync=target"
--- a/.github/workflows/image_build.yml
+++ b/.github/workflows/image_build.yml
@@ -201,30 +201,14 @@ jobs:
          username: ${{ secrets.quayUsername }}
          password: ${{ secrets.quayPassword }}

-      - name: Cache GRPC
+      - name: Build and push
        uses: docker/build-push-action@v5
        with:
          builder: ${{ steps.buildx.outputs.name }}
          # The build-args MUST be an EXACT match between the image cache and other workflow steps that want to use that cache.
          # This means that even the MAKEFLAGS have to be an EXACT match.
          # If the build-args are not an EXACT match, it will result in a cache miss, which will require GRPC to be built from scratch.
-          build-args: |
-            GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }}
-            MAKEFLAGS=--jobs=4 --output-sync=target
-            GRPC_VERSION=v1.58.0
-          context: .
-          file: ./Dockerfile
-          cache-from: type=gha
-          target: grpc
-          platforms: ${{ inputs.platforms }}
-          push: false
-          tags: ${{ steps.meta.outputs.tags }}
-          labels: ${{ steps.meta.outputs.labels }}
-
-      - name: Build and push
-        uses: docker/build-push-action@v5
-        with:
-          builder: ${{ steps.buildx.outputs.name }}
+          # This is why some build args like GRPC_VERSION and MAKEFLAGS are hardcoded
          build-args: |
            BUILD_TYPE=${{ inputs.build-type }}
            CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }}
@@ -232,6 +216,9 @@ jobs:
            FFMPEG=${{ inputs.ffmpeg }}
            IMAGE_TYPE=${{ inputs.image-type }}
            BASE_IMAGE=${{ inputs.base-image }}
+            GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }}
+            GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
+            GRPC_VERSION=v1.63.0
            MAKEFLAGS=${{ inputs.makeflags }}
          context: .
          file: ./Dockerfile
@@ -241,14 +228,6 @@ jobs:
          tags: ${{ steps.meta.outputs.tags }}
          labels: ${{ steps.meta.outputs.labels }}

-      - name: Inspect image
-        if: github.event_name != 'pull_request'
-        run: |
-          docker pull localai/localai:${{ steps.meta.outputs.version }}
-          docker image inspect localai/localai:${{ steps.meta.outputs.version }}
-          docker pull quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }}
-          docker image inspect quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }}
-
      - name: Build and push AIO image
        if: inputs.aio != ''
        uses: docker/build-push-action@v5
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -1,11 +1,11 @@
 name: Build and Release

-on: 
+on:
 - push
 - pull_request

 env:
-  GRPC_VERSION: v1.58.0
+  GRPC_VERSION: v1.63.0

 permissions:
  contents: write
@@ -16,19 +16,6 @@ concurrency:

 jobs:
  build-linux:
-    strategy:
-      matrix:
-        include:
-          - build: 'avx2'
-            defines: ''
-          - build: 'avx'
-            defines: '-DLLAMA_AVX2=OFF'
-          - build: 'avx512'
-            defines: '-DLLAMA_AVX512=ON'
-          - build: 'cuda12'
-            defines: ''
-          - build: 'cuda11'
-            defines: ''
    runs-on: ubuntu-latest
    steps:
      - name: Clone
@@ -44,17 +31,13 @@ jobs:
          sudo apt-get update
          sudo apt-get install build-essential ffmpeg protobuf-compiler
      - name: Install CUDA Dependencies
-        if: ${{ matrix.build == 'cuda12' || matrix.build == 'cuda11' }}
        run: |
-          if [ "${{ matrix.build }}" == "cuda12" ]; then
-            export CUDA_VERSION=12-3
-          else
-            export CUDA_VERSION=11-7
-          fi
          curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
          sudo dpkg -i cuda-keyring_1.1-1_all.deb
          sudo apt-get update
          sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
+        env:
+          CUDA_VERSION: 12-3
      - name: Cache grpc
        id: cache-grpc
        uses: actions/cache@v4
@@ -73,23 +56,15 @@ jobs:
          cd grpc && cd cmake/build && sudo make --jobs 5 --output-sync=target install
      - name: Build
        id: build
-        env:
-          CMAKE_ARGS: "${{ matrix.defines }}"
-          BUILD_ID: "${{ matrix.build }}"
        run: |
          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest
          go install google.golang.org/protobuf/cmd/protoc-gen-go@latest
          export PATH=$PATH:$GOPATH/bin
-          if [ "${{ matrix.build }}" == "cuda12" ] || [ "${{ matrix.build }}" == "cuda11" ]; then
-            export BUILD_TYPE=cublas
-            export PATH=/usr/local/cuda/bin:$PATH
-            make dist
-          else
-            STATIC=true make dist
-          fi
+          export PATH=/usr/local/cuda/bin:$PATH
+          make dist
      - uses: actions/upload-artifact@v4
        with:
-          name: LocalAI-linux-${{ matrix.build }}
+          name: LocalAI-linux
          path: release/
      - name: Release
        uses: softprops/action-gh-release@v2
@@ -124,63 +99,7 @@ jobs:
          name: stablediffusion
          path: release/

-  build-macOS:
-    strategy:
-      matrix:
-        include:
-          - build: 'avx2'
-            defines: ''
-          - build: 'avx'
-            defines: '-DLLAMA_AVX2=OFF'
-          - build: 'avx512'
-            defines: '-DLLAMA_AVX512=ON'
-    runs-on: macOS-latest
-    steps:
-      - name: Clone
-        uses: actions/checkout@v4
-        with:
-          submodules: true
-      - uses: actions/setup-go@v5
-        with:
-          go-version: '1.21.x'
-          cache: false
-      - name: Dependencies
-        run: |
-          brew install protobuf grpc
-          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest
-          go install google.golang.org/protobuf/cmd/protoc-gen-go@latest
-      - name: Build
-        id: build
-        env:
-          CMAKE_ARGS: "${{ matrix.defines }}"
-          BUILD_ID: "${{ matrix.build }}"
-        run: |
-          export C_INCLUDE_PATH=/usr/local/include
-          export CPLUS_INCLUDE_PATH=/usr/local/include
-          export PATH=$PATH:$GOPATH/bin
-          make dist
-      - uses: actions/upload-artifact@v4
-        with:
-          name: LocalAI-MacOS-${{ matrix.build }}
-          path: release/
-      - name: Release
-        uses: softprops/action-gh-release@v2
-        if: startsWith(github.ref, 'refs/tags/')
-        with:
-          files: |
-            release/*
-
-
  build-macOS-arm64:
-    strategy:
-      matrix:
-        include:
-          - build: 'avx2'
-            defines: ''
-          - build: 'avx'
-            defines: '-DLLAMA_AVX2=OFF'
-          - build: 'avx512'
-            defines: '-DLLAMA_AVX512=ON'
    runs-on: macos-14
    steps:
      - name: Clone
@@ -198,9 +117,6 @@ jobs:
          go install google.golang.org/protobuf/cmd/protoc-gen-go@latest
      - name: Build
        id: build
-        env:
-          CMAKE_ARGS: "${{ matrix.defines }}"
-          BUILD_ID: "${{ matrix.build }}"
        run: |
          export C_INCLUDE_PATH=/usr/local/include
          export CPLUS_INCLUDE_PATH=/usr/local/include
@@ -208,7 +124,7 @@ jobs:
          make dist
      - uses: actions/upload-artifact@v4
        with:
-          name: LocalAI-MacOS-arm64-${{ matrix.build }}
+          name: LocalAI-MacOS-arm64
          path: release/
      - name: Release
        uses: softprops/action-gh-release@v2
--- a/.github/workflows/test-extra.yml
+++ b/.github/workflows/test-extra.yml
@@ -25,22 +25,14 @@ jobs:
        run: |
          sudo apt-get update
          sudo apt-get install build-essential ffmpeg
-          curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
-             sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
-              gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
-             sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
-             sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
-             sudo apt-get update && \
-             sudo apt-get install -y conda
+          # Install UV
+          curl -LsSf https://astral.sh/uv/install.sh | sh
          sudo apt-get install -y ca-certificates cmake curl patch python3-pip
          sudo apt-get install -y libopencv-dev
-          pip install --user grpcio-tools
+          pip install --user grpcio-tools==1.63.0
          
-          sudo rm -rfv /usr/bin/conda || true
-
      - name: Test transformers
        run: |
-           export PATH=$PATH:/opt/conda/bin
           make --jobs=5 --output-sync=target -C backend/python/transformers
           make --jobs=5 --output-sync=target -C backend/python/transformers test

@@ -55,22 +47,14 @@ jobs:
        run: |
          sudo apt-get update
          sudo apt-get install build-essential ffmpeg
-          curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
-             sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
-              gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
-             sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
-             sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
-             sudo apt-get update && \
-             sudo apt-get install -y conda
+          # Install UV
+          curl -LsSf https://astral.sh/uv/install.sh | sh
          sudo apt-get install -y ca-certificates cmake curl patch python3-pip
          sudo apt-get install -y libopencv-dev
-          pip install --user grpcio-tools
+          pip install --user grpcio-tools==1.63.0
          
-          sudo rm -rfv /usr/bin/conda || true
-
      - name: Test sentencetransformers
        run: |
-           export PATH=$PATH:/opt/conda/bin
           make --jobs=5 --output-sync=target -C backend/python/sentencetransformers
           make --jobs=5 --output-sync=target -C backend/python/sentencetransformers test

@@ -86,22 +70,14 @@ jobs:
        run: |
          sudo apt-get update
          sudo apt-get install build-essential ffmpeg
-          curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
-             sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
-              gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
-             sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
-             sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
-             sudo apt-get update && \
-             sudo apt-get install -y conda
+          # Install UV
+          curl -LsSf https://astral.sh/uv/install.sh | sh
          sudo apt-get install -y ca-certificates cmake curl patch python3-pip
          sudo apt-get install -y libopencv-dev
-          pip install --user grpcio-tools
-          
-          sudo rm -rfv /usr/bin/conda || true
+          pip install --user grpcio-tools==1.63.0

      - name: Test rerankers
        run: |
-           export PATH=$PATH:/opt/conda/bin
           make --jobs=5 --output-sync=target -C backend/python/rerankers
           make --jobs=5 --output-sync=target -C backend/python/rerankers test

@@ -115,25 +91,16 @@ jobs:
      - name: Dependencies
        run: |
          sudo apt-get update
-          sudo apt-get install build-essential ffmpeg
-          curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
-             sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
-              gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
-             sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
-             sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
-             sudo apt-get update && \
-             sudo apt-get install -y conda
+          sudo apt-get install -y build-essential ffmpeg
          sudo apt-get install -y ca-certificates cmake curl patch python3-pip
          sudo apt-get install -y libopencv-dev
-          pip install --user grpcio-tools
-          
-          sudo rm -rfv /usr/bin/conda || true
-
+          # Install UV
+          curl -LsSf https://astral.sh/uv/install.sh | sh
+          pip install --user grpcio-tools==1.63.0
      - name: Test diffusers
        run: |
-           export PATH=$PATH:/opt/conda/bin
-           make --jobs=5 --output-sync=target -C backend/python/diffusers
-           make --jobs=5 --output-sync=target -C backend/python/diffusers test
+          make --jobs=5 --output-sync=target -C backend/python/diffusers
+          make --jobs=5 --output-sync=target -C backend/python/diffusers test

  tests-parler-tts:
    runs-on: ubuntu-latest
@@ -146,22 +113,14 @@ jobs:
        run: |
          sudo apt-get update
          sudo apt-get install build-essential ffmpeg
-          curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
-             sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
-              gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
-             sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
-             sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
-             sudo apt-get update && \
-             sudo apt-get install -y conda
+          # Install UV
+          curl -LsSf https://astral.sh/uv/install.sh | sh
          sudo apt-get install -y ca-certificates cmake curl patch python3-pip
          sudo apt-get install -y libopencv-dev
-          pip install --user grpcio-tools
-          
-          sudo rm -rfv /usr/bin/conda || true
+          pip install --user grpcio-tools==1.63.0

      - name: Test parler-tts
        run: |
-           export PATH=$PATH:/opt/conda/bin
           make --jobs=5 --output-sync=target -C backend/python/parler-tts
           make --jobs=5 --output-sync=target -C backend/python/parler-tts test

@@ -176,22 +135,14 @@ jobs:
        run: |
          sudo apt-get update
          sudo apt-get install build-essential ffmpeg
-          curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
-             sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
-              gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
-             sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
-             sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
-             sudo apt-get update && \
-             sudo apt-get install -y conda
+          # Install UV
+          curl -LsSf https://astral.sh/uv/install.sh | sh
          sudo apt-get install -y ca-certificates cmake curl patch python3-pip
          sudo apt-get install -y libopencv-dev
-          pip install --user grpcio-tools
-          
-          sudo rm -rfv /usr/bin/conda || true
+          pip install --user grpcio-tools==1.63.0

      - name: Test transformers-musicgen
        run: |
-           export PATH=$PATH:/opt/conda/bin
           make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen
           make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen test

@@ -208,22 +159,14 @@ jobs:
  #       run: |
  #         sudo apt-get update
  #         sudo apt-get install build-essential ffmpeg
-  #         curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
-  #            sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
-  #             gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
-  #            sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
-  #            sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
-  #            sudo apt-get update && \
-  #            sudo apt-get install -y conda
+  #         # Install UV
+  #         curl -LsSf https://astral.sh/uv/install.sh | sh
  #         sudo apt-get install -y ca-certificates cmake curl patch python3-pip
  #         sudo apt-get install -y libopencv-dev
-  #         pip install --user grpcio-tools
-          
-  #         sudo rm -rfv /usr/bin/conda || true
+  #         pip install --user grpcio-tools==1.63.0

  #     - name: Test petals
  #       run: |
-  #          export PATH=$PATH:/opt/conda/bin
  #          make --jobs=5 --output-sync=target -C backend/python/petals
  #          make --jobs=5 --output-sync=target -C backend/python/petals test

@@ -280,22 +223,14 @@ jobs:
  #       run: |
  #         sudo apt-get update
  #         sudo apt-get install build-essential ffmpeg
-  #         curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
-  #            sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
-  #             gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
-  #            sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
-  #            sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
-  #            sudo apt-get update && \
-  #            sudo apt-get install -y conda
+  #         # Install UV
+  #         curl -LsSf https://astral.sh/uv/install.sh | sh
  #         sudo apt-get install -y ca-certificates cmake curl patch python3-pip
  #         sudo apt-get install -y libopencv-dev
-  #         pip install --user grpcio-tools
-          
-  #         sudo rm -rfv /usr/bin/conda || true
+  #         pip install --user grpcio-tools==1.63.0

  #     - name: Test bark
  #       run: |
-  #          export PATH=$PATH:/opt/conda/bin
  #          make --jobs=5 --output-sync=target -C backend/python/bark
  #          make --jobs=5 --output-sync=target -C backend/python/bark test

@@ -313,20 +248,13 @@ jobs:
  #       run: |
  #         sudo apt-get update
  #         sudo apt-get install build-essential ffmpeg
-  #         curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
-  #            sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
-  #             gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
-  #            sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
-  #            sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
-  #            sudo apt-get update && \
-  #            sudo apt-get install -y conda
+  #         # Install UV
+  #         curl -LsSf https://astral.sh/uv/install.sh | sh
  #         sudo apt-get install -y ca-certificates cmake curl patch python3-pip
  #         sudo apt-get install -y libopencv-dev
-  #         pip install --user grpcio-tools
-  #         sudo rm -rfv /usr/bin/conda || true
+  #         pip install --user grpcio-tools==1.63.0
  #     - name: Test vllm
  #       run: |
-  #          export PATH=$PATH:/opt/conda/bin
  #          make --jobs=5 --output-sync=target -C backend/python/vllm
  #          make --jobs=5 --output-sync=target -C backend/python/vllm test
  tests-vallex:
@@ -340,20 +268,13 @@ jobs:
        run: |
          sudo apt-get update
          sudo apt-get install build-essential ffmpeg
-          curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
-             sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
-              gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
-             sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
-             sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
-             sudo apt-get update && \
-             sudo apt-get install -y conda
+          # Install UV
+          curl -LsSf https://astral.sh/uv/install.sh | sh
          sudo apt-get install -y ca-certificates cmake curl patch python3-pip
          sudo apt-get install -y libopencv-dev
-          pip install --user grpcio-tools
-          sudo rm -rfv /usr/bin/conda || true
+          pip install --user grpcio-tools==1.63.0
      - name: Test vall-e-x
        run: |
-           export PATH=$PATH:/opt/conda/bin
           make --jobs=5 --output-sync=target -C backend/python/vall-e-x
           make --jobs=5 --output-sync=target -C backend/python/vall-e-x test

@@ -368,19 +289,11 @@ jobs:
        run: |
          sudo apt-get update
          sudo apt-get install build-essential ffmpeg
-          curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
-             sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
-              gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
-             sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
-             sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
-             sudo apt-get update && \
-             sudo apt-get install -y conda
          sudo apt-get install -y ca-certificates cmake curl patch espeak espeak-ng python3-pip
-          pip install --user grpcio-tools
-          sudo rm -rfv /usr/bin/conda || true
-
+          # Install UV
+          curl -LsSf https://astral.sh/uv/install.sh | sh
+          pip install --user grpcio-tools==1.63.0
      - name: Test coqui
        run: |
-           export PATH=$PATH:/opt/conda/bin
-           make --jobs=5 --output-sync=target -C backend/python/coqui
-           make --jobs=5 --output-sync=target -C backend/python/coqui test
+          make --jobs=5 --output-sync=target -C backend/python/coqui
+          make --jobs=5 --output-sync=target -C backend/python/coqui test
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -10,7 +10,7 @@ on:
      - '*'

 env:
-  GRPC_VERSION: v1.58.0
+  GRPC_VERSION: v1.63.0

 concurrency:
  group: ci-tests-${{ github.head_ref || github.ref }}-${{ github.repository }}
@@ -57,7 +57,7 @@ jobs:
          df -h
      - name: Clone
        uses: actions/checkout@v4
-        with: 
+        with:
          submodules: true
      - name: Setup Go ${{ matrix.go-version }}
        uses: actions/setup-go@v5
@@ -78,6 +78,8 @@ jobs:
             sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
             sudo apt-get update && \
             sudo apt-get install -y conda
+          # Install UV
+          curl -LsSf https://astral.sh/uv/install.sh | sh
          sudo apt-get install -y ca-certificates cmake patch python3-pip unzip
          sudo apt-get install -y libopencv-dev

@@ -85,6 +87,12 @@ jobs:
          unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
          rm protoc.zip

+          curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
+          sudo dpkg -i cuda-keyring_1.1-1_all.deb
+          sudo apt-get update
+          sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
+          export CUDACXX=/usr/local/cuda/bin/nvcc
+
          go install google.golang.org/protobuf/cmd/protoc-gen-go@latest
          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest

@@ -100,6 +108,8 @@ jobs:
          sudo cp -rfv sources/go-piper/piper-phonemize/pi/lib/. /usr/lib/ && \
          # Pre-build stable diffusion before we install a newer version of abseil (not compatible with stablediffusion-ncn)
          PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
+        env:
+          CUDA_VERSION: 12-3
      - name: Cache grpc
        id: cache-grpc
        uses: actions/cache@v4
@@ -164,7 +174,7 @@ jobs:
          df -h
      - name: Clone
        uses: actions/checkout@v4
-        with: 
+        with:
          submodules: true
      - name: Build images
        run: |
@@ -190,7 +200,7 @@ jobs:
    steps:
      - name: Clone
        uses: actions/checkout@v4
-        with: 
+        with:
          submodules: true
      - name: Setup Go ${{ matrix.go-version }}
        uses: actions/setup-go@v5
@@ -203,7 +213,7 @@ jobs:
      - name: Dependencies
        run: |
          brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc
-          pip install --user grpcio-tools
+          pip install --user grpcio-tools==1.63.0
      - name: Test
        run: |
          export C_INCLUDE_PATH=/usr/local/include
--- a/.gitignore
+++ b/.gitignore
@@ -46,4 +46,7 @@ prepare
 *pb2_grpc.py

 # SonarQube
-.scannerwork
+.scannerwork
+
+# backend virtual environments
+**/venv
--- a/.yamllint
+++ b/.yamllint
@@ -0,0 +1,4 @@
+extends: default
+
+rules:
+    line-length: disable
--- a/303
+++ b/303
@@ -2,41 +2,42 @@ ARG IMAGE_TYPE=extras
 ARG BASE_IMAGE=ubuntu:22.04
 ARG GRPC_BASE_IMAGE=${BASE_IMAGE}

-# extras or core
+# The requirements-core target is common to all images.  It should not be placed in requirements-core unless every single build will use it.
 FROM ${BASE_IMAGE} AS requirements-core

 USER root

 ARG GO_VERSION=1.21.7
-ARG BUILD_TYPE
-ARG CUDA_MAJOR_VERSION=11
-ARG CUDA_MINOR_VERSION=7
 ARG TARGETARCH
 ARG TARGETVARIANT

-ENV BUILD_TYPE=${BUILD_TYPE}
 ENV DEBIAN_FRONTEND=noninteractive
 ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,petals:/build/backend/python/petals/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"

 ARG GO_TAGS="stablediffusion tinydream tts"

 RUN apt-get update && \
-    apt-get install -y ca-certificates curl python3-pip unzip && apt-get clean
+    apt-get install -y --no-install-recommends \
+        build-essential \
+        ca-certificates \
+        cmake \
+        curl \
+        git \
+        python3-pip \
+        python-is-python3 \
+        unzip && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/* && \
+    pip install --upgrade pip

 # Install Go
 RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz
-ENV PATH $PATH:/usr/local/go/bin
+ENV PATH $PATH:/root/go/bin:/usr/local/go/bin

 # Install grpc compilers
-ENV PATH $PATH:/root/go/bin
 RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@latest && \
    go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest

-# Install protobuf (the version in 22.04 is too old)
-RUN curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v26.1/protoc-26.1-linux-x86_64.zip -o protoc.zip && \
-    unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
-    rm protoc.zip
-
 # Install grpcio-tools (the version in 22.04 is too old)
 RUN pip install --user grpcio-tools

@@ -47,16 +48,6 @@ RUN update-ca-certificates
 RUN echo "Target Architecture: $TARGETARCH"
 RUN echo "Target Variant: $TARGETVARIANT"

-# CuBLAS requirements
-RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
-    apt-get install -y software-properties-common && \
-    curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb && \
-    dpkg -i cuda-keyring_1.1-1_all.deb && \
-    rm -f cuda-keyring_1.1-1_all.deb && \
-    apt-get update && \
-    apt-get install -y cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION}  && apt-get clean \
-    ; fi
-
 # Cuda
 ENV PATH /usr/local/cuda/bin:${PATH}

@@ -64,10 +55,12 @@ ENV PATH /usr/local/cuda/bin:${PATH}
 ENV PATH /opt/rocm/bin:${PATH}

 # OpenBLAS requirements and stable diffusion
-RUN apt-get install -y \
-    libopenblas-dev \
-    libopencv-dev \ 
-    && apt-get clean
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+        libopenblas-dev \
+        libopencv-dev && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*

 # Set up OpenCV
 RUN ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
@@ -80,56 +73,116 @@ RUN test -n "$TARGETARCH" \
 ###################################
 ###################################

+# The requirements-extras target is for any builds with IMAGE_TYPE=extras. It should not be placed in this target unless every IMAGE_TYPE=extras build will use it
 FROM requirements-core AS requirements-extras

-RUN apt install -y gpg && \
-    curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
-    install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
-    gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
-    echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list && \
-    echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list && \
-    apt-get update && \
-    apt-get install -y conda && apt-get clean
-
+RUN curl -LsSf https://astral.sh/uv/install.sh | sh
 ENV PATH="/root/.cargo/bin:${PATH}"
-RUN apt-get install -y python3-pip && apt-get clean
-RUN pip install --upgrade pip

 RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
-RUN apt-get install -y espeak-ng espeak && apt-get clean
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+        espeak-ng \
+        espeak \
+        python3-dev \
+        python3-venv && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*

-RUN if [ ! -e /usr/bin/python ]; then \
-	  ln -s /usr/bin/python3 /usr/bin/python \
+###################################
+###################################
+
+# The requirements-drivers target is for BUILD_TYPE specific items.  If you need to install something specific to CUDA, or specific to ROCM, it goes here.
+# This target will be built on top of requirements-core or requirements-extras as retermined by the IMAGE_TYPE build-arg
+FROM requirements-${IMAGE_TYPE} AS requirements-drivers
+
+ARG BUILD_TYPE
+ARG CUDA_MAJOR_VERSION=11
+ARG CUDA_MINOR_VERSION=7
+
+ENV BUILD_TYPE=${BUILD_TYPE}
+
+# CuBLAS requirements
+RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
+        apt-get update && \
+        apt-get install -y  --no-install-recommends \
+            software-properties-common && \
+        curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb && \
+        dpkg -i cuda-keyring_1.1-1_all.deb && \
+        rm -f cuda-keyring_1.1-1_all.deb && \
+        apt-get update && \
+        apt-get install -y --no-install-recommends \
+            cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
+            libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
+            libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
+            libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
+            libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \
+        apt-get clean && \
+        rm -rf /var/lib/apt/lists/* \
+    ; fi
+
+# If we are building with clblas support, we need the libraries for the builds
+RUN if [ "${BUILD_TYPE}" = "clblas" ]; then \
+        apt-get update && \
+        apt-get install -y --no-install-recommends \
+            libclblast-dev && \
+        apt-get clean && \
+        rm -rf /var/lib/apt/lists/* \
+    ; fi
+
+RUN if [ "${BUILD_TYPE}" = "hipblas" ]; then \
+        apt-get update && \
+        apt-get install -y --no-install-recommends \
+            hipblas-dev \
+            rocblas-dev && \
+        apt-get clean && \
+        rm -rf /var/lib/apt/lists/* && \
+        # I have no idea why, but the ROCM lib packages don't trigger ldconfig after they install, which results in local-ai and others not being able
+        # to locate the libraries. We run ldconfig ourselves to work around this packaging deficiency
+        ldconfig \
    ; fi

 ###################################
 ###################################

+# The grpc target does one thing, it builds and installs GRPC.  This is in it's own layer so that it can be effectively cached by CI.
+# You probably don't need to change anything here, and if you do, make sure that CI is adjusted so that the cache continues to work.
 FROM ${GRPC_BASE_IMAGE} AS grpc

-ARG MAKEFLAGS
+# This is a bit of a hack, but it's required in order to be able to effectively cache this layer in CI
+ARG GRPC_MAKEFLAGS="-j4 -Otarget"
 ARG GRPC_VERSION=v1.58.0

-ENV MAKEFLAGS=${MAKEFLAGS}
+ENV MAKEFLAGS=${GRPC_MAKEFLAGS}

 WORKDIR /build

 RUN apt-get update && \
-    apt-get install -y build-essential cmake git  && \
+    apt-get install -y --no-install-recommends \
+        ca-certificates \
+        build-essential \
+        cmake \
+        git && \
    apt-get clean && \
    rm -rf /var/lib/apt/lists/*

-RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shallow-submodules https://github.com/grpc/grpc
-
-WORKDIR /build/grpc/cmake/build
-
-RUN cmake -DgRPC_INSTALL=ON -DgRPC_BUILD_TESTS=OFF ../.. && \
-    make
+# We install GRPC to a different prefix here so that we can copy in only the build artifacts later
+# saves several hundred MB on the final docker image size vs copying in the entire GRPC source tree
+# and running make install in the target container
+RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
+    mkdir -p /build/grpc/cmake/build && \
+    cd /build/grpc/cmake/build && \
+    cmake -DgRPC_INSTALL=ON -DgRPC_BUILD_TESTS=OFF -DCMAKE_INSTALL_PREFIX:PATH=/opt/grpc ../.. && \
+    make && \
+    make install && \
+    rm -rf /build

 ###################################
 ###################################

-FROM requirements-${IMAGE_TYPE} AS builder
+# The builder target compiles LocalAI. This target is not the target that will be uploaded to the registry.
+# Adjustments to the build process should likely be made here.
+FROM requirements-drivers AS builder

 ARG GO_TAGS="stablediffusion tts"
 ARG GRPC_BACKENDS
@@ -148,46 +201,42 @@ COPY . .
 COPY .git .
 RUN echo "GO_TAGS: $GO_TAGS"

-RUN apt-get update && \
-    apt-get install -y build-essential cmake git  && \
-    apt-get clean && \
-    rm -rf /var/lib/apt/lists/*
-
 RUN make prepare

-# If we are building with clblas support, we need the libraries for the builds
-RUN if [ "${BUILD_TYPE}" = "clblas" ]; then \
-    apt-get update && \
-    apt-get install -y libclblast-dev && \
-    apt-get clean \
-    ; fi
+# We need protoc installed, and the version in 22.04 is too old.  We will create one as part installing the GRPC build below
+# but that will also being in a newer version of absl which stablediffusion cannot compile with.  This version of protoc is only
+# here so that we can generate the grpc code for the stablediffusion build
+RUN curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v26.1/protoc-26.1-linux-x86_64.zip -o protoc.zip && \
+    unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
+    rm protoc.zip

 # stablediffusion does not tolerate a newer version of abseil, build it first
 RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build

-COPY --from=grpc /build/grpc ./grpc/
-
-WORKDIR /build/grpc/cmake/build
-RUN make install
+# Install the pre-built GRPC
+COPY --from=grpc /opt/grpc /usr/local

 # Rebuild with defaults backends
 WORKDIR /build
 RUN make build

 RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \
-    mkdir -p /build/sources/go-piper/piper-phonemize/pi/lib/ \
-    touch /build/sources/go-piper/piper-phonemize/pi/lib/keep \
+        mkdir -p /build/sources/go-piper/piper-phonemize/pi/lib/ \
+        touch /build/sources/go-piper/piper-phonemize/pi/lib/keep \
    ; fi

 ###################################
 ###################################

-FROM requirements-${IMAGE_TYPE}
+# This is the final target. The result of this target will be the image uploaded to the registry.
+# If you cannot find a more suitable place for an addition, this layer is a suitable place for it.
+FROM requirements-drivers

 ARG FFMPEG
 ARG BUILD_TYPE
 ARG TARGETARCH
 ARG IMAGE_TYPE=extras
+ARG EXTRA_BACKENDS
 ARG MAKEFLAGS

 ENV BUILD_TYPE=${BUILD_TYPE}
@@ -199,25 +248,16 @@ ARG CUDA_MAJOR_VERSION=11
 ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
 ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
 ENV NVIDIA_VISIBLE_DEVICES=all
-ENV PIP_CACHE_PURGE=true

 # Add FFmpeg
 RUN if [ "${FFMPEG}" = "true" ]; then \
-    apt-get install -y ffmpeg && apt-get clean \
+        apt-get update && \
+        apt-get install -y --no-install-recommends \
+            ffmpeg && \
+        apt-get clean && \
+        rm -rf /var/lib/apt/lists/* \
    ; fi

-# Add OpenCL
-RUN if [ "${BUILD_TYPE}" = "clblas" ]; then \
-    apt-get update && \
-    apt-get install -y libclblast1 && \
-    apt-get clean \
-    ; fi
-
-RUN apt-get update && \
-    apt-get install -y cmake git  && \
-    apt-get clean && \
-    rm -rf /var/lib/apt/lists/*
-
 WORKDIR /build

 # we start fresh & re-copy all assets because `make build` does not clean up nicely after itself
@@ -227,9 +267,9 @@ WORKDIR /build
 COPY . .

 COPY --from=builder /build/sources ./sources/
-COPY --from=grpc /build/grpc ./grpc/
+COPY --from=grpc /opt/grpc /usr/local

-RUN make prepare-sources && cd /build/grpc/cmake/build && make install && rm -rf /build/grpc
+RUN make prepare-sources

 # Copy the binary
 COPY --from=builder /build/local-ai ./
@@ -240,51 +280,58 @@ COPY --from=builder /build/sources/go-piper/piper-phonemize/pi/lib/* /usr/lib/
 # do not let stablediffusion rebuild (requires an older version of absl)
 COPY --from=builder /build/backend-assets/grpc/stablediffusion ./backend-assets/grpc/stablediffusion

-## Duplicated from Makefile to avoid having a big layer that's hard to push
-RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
-    make -C backend/python/autogptq \
+# Change the shell to bash so we can use [[ tests below
+SHELL ["/bin/bash", "-c"]
+# We try to strike a balance between individual layer size (as that affects total push time) and total image size
+# Splitting the backends into more groups with fewer items results in a larger image, but a smaller size for the largest layer
+# Splitting the backends into fewer groups with more items results in a smaller image, but a larger size for the largest layer
+
+RUN if [[ ( "${EXTRA_BACKENDS}" =~ "coqui" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
+        make -C backend/python/coqui \
+    ; fi && \
+    if [[ ( "${EXTRA_BACKENDS}" =~ "parler-tts" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
+        make -C backend/python/parler-tts \
+    ; fi && \
+    if [[ ( "${EXTRA_BACKENDS}" =~ "diffusers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
+        make -C backend/python/diffusers \
+    ; fi && \
+    if [[ ( "${EXTRA_BACKENDS}" =~ "transformers-musicgen" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
+        make -C backend/python/transformers-musicgen \
+    ; fi && \
+    if [[ ( "${EXTRA_BACKENDS}" =~ "exllama1" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
+        make -C backend/python/exllama \
    ; fi
-RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
-    make -C backend/python/bark \
+
+RUN if [[ ( "${EXTRA_BACKENDS}" =~ "vall-e-x" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
+        make -C backend/python/vall-e-x \
+    ; fi && \
+    if [[ ( "${EXTRA_BACKENDS}" =~ "petals" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
+        make -C backend/python/petals \
+    ; fi && \
+    if [[ ( "${EXTRA_BACKENDS}" =~ "sentencetransformers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
+        make -C backend/python/sentencetransformers \
+    ; fi && \
+    if [[ ( "${EXTRA_BACKENDS}" =~ "exllama2" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
+        make -C backend/python/exllama2 \
+    ; fi && \
+    if [[ ( "${EXTRA_BACKENDS}" =~ "transformers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
+        make -C backend/python/transformers \
    ; fi
-RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
-    make -C backend/python/diffusers \
-    ; fi
-RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
-    make -C backend/python/vllm \
-    ; fi
-RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
-    make -C backend/python/mamba \
-    ; fi
-RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
-    make -C backend/python/sentencetransformers \
-    ; fi
-RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
-    make -C backend/python/rerankers \
-    ; fi
-RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
-    make -C backend/python/transformers \
-    ; fi
-RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
-    make -C backend/python/vall-e-x \
-    ; fi
-RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
-    make -C backend/python/exllama \
-    ; fi
-RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
-    make -C backend/python/exllama2 \
-    ; fi
-RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
-    make -C backend/python/petals \
-    ; fi
-RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
-    make -C backend/python/transformers-musicgen \
-    ; fi
-RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
-    make -C backend/python/parler-tts \
-    ; fi
-RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
-    make -C backend/python/coqui \
+
+RUN if [[ ( "${EXTRA_BACKENDS}" =~ "vllm" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
+        make -C backend/python/vllm \
+    ; fi && \
+    if [[ ( "${EXTRA_BACKENDS}" =~ "autogptq" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
+        make -C backend/python/autogptq \
+    ; fi && \
+    if [[ ( "${EXTRA_BACKENDS}" =~ "bark" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
+        make -C backend/python/bark \
+    ; fi && \
+    if [[ ( "${EXTRA_BACKENDS}" =~ "rerankers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
+        make -C backend/python/rerankers \
+    ; fi && \
+    if [[ ( "${EXTRA_BACKENDS}" =~ "mamba" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
+        make -C backend/python/mamba \
    ; fi

 # Make sure the models directory exists
--- a/86
+++ b/86
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai

 # llama.cpp versions
 GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=784e11dea1f5ce9638851b2b0dddb107e2a609c8
+CPPLLAMA_VERSION?=dc685be46622a8fabfd57cfa804237c8f15679b8

 # gpt4all version
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
@@ -16,7 +16,7 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
 RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6

 # whisper.cpp version
-WHISPER_CPP_VERSION?=858452d58dba3acdc3431c9bced2bb8cfd9bf418
+WHISPER_CPP_VERSION?=4ef8d9f44eb402c528ab6d990ab50a9f4f666347

 # bert.cpp version
 BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d
@@ -25,10 +25,10 @@ BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d
 PIPER_VERSION?=9d0100873a7dbb0824dfea40e8cec70a1b110759

 # stablediffusion version
-STABLEDIFFUSION_VERSION?=362df9da29f882dbf09ade61972d16a1f53c3485
+STABLEDIFFUSION_VERSION?=4a3cd6aeae6f66ee57eae9a0075f8c58c3a6a38f

 # tinydream version
-TINYDREAM_VERSION?=22a12a4bc0ac5455856f28f3b771331a551a4293
+TINYDREAM_VERSION?=c04fa463ace9d9a6464313aa5f9cd0f953b6c057

 export BUILD_TYPE?=
 export STABLE_BUILD_TYPE?=$(BUILD_TYPE)
@@ -38,7 +38,7 @@ CGO_LDFLAGS?=
 CGO_LDFLAGS_WHISPER?=
 CUDA_LIBPATH?=/usr/local/cuda/lib64/
 GO_TAGS?=
-BUILD_ID?=git
+BUILD_ID?=

 TEST_DIR=/tmp/test

@@ -70,7 +70,7 @@ UNAME_S := $(shell uname -s)
 endif

 ifeq ($(OS),Darwin)
-	
+
 	ifeq ($(OSX_SIGNING_IDENTITY),)
 		OSX_SIGNING_IDENTITY := $(shell security find-identity -v -p codesigning | grep '"' | head -n 1 | sed -E 's/.*"(.*)"/\1/')
 	endif
@@ -99,7 +99,7 @@ endif
 ifeq ($(BUILD_TYPE),cublas)
 	CGO_LDFLAGS+=-lcublas -lcudart -L$(CUDA_LIBPATH)
 	export LLAMA_CUBLAS=1
-	export WHISPER_CUBLAS=1
+	export WHISPER_CUDA=1
 	CGO_LDFLAGS_WHISPER+=-L$(CUDA_LIBPATH)/stubs/ -lcuda
 endif

@@ -152,9 +152,11 @@ ifeq ($(findstring tts,$(GO_TAGS)),tts)
 	OPTIONAL_GRPC+=backend-assets/grpc/piper
 endif

-ALL_GRPC_BACKENDS=backend-assets/grpc/langchain-huggingface
+ALL_GRPC_BACKENDS=backend-assets/grpc/huggingface
 ALL_GRPC_BACKENDS+=backend-assets/grpc/bert-embeddings
-ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp
+ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx
+ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx2
+ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback
 ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml
 ALL_GRPC_BACKENDS+=backend-assets/grpc/gpt4all
 ALL_GRPC_BACKENDS+=backend-assets/grpc/rwkv
@@ -240,7 +242,7 @@ sources/whisper.cpp:
 	cd sources/whisper.cpp && git checkout -b build $(WHISPER_CPP_VERSION) && git submodule update --init --recursive --depth 1

 sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
-	cd sources/whisper.cpp && make libwhisper.a
+	cd sources/whisper.cpp && $(MAKE) libwhisper.a

 get-sources: sources/go-llama.cpp sources/gpt4all sources/go-piper sources/go-rwkv.cpp sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream

@@ -293,6 +295,7 @@ clean: ## Remove build related file
 	rm -rf backend-assets/*
 	$(MAKE) -C backend/cpp/grpc clean
 	$(MAKE) -C backend/cpp/llama clean
+	rm -rf backend/cpp/llama-* || true
 	$(MAKE) dropreplace
 	$(MAKE) protogen-clean
 	rmdir pkg/grpc/proto || true
@@ -311,14 +314,26 @@ build: prepare backend-assets grpcs ## Build the project
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(BINARY_NAME) ./

 build-minimal:
-	BUILD_GRPC_FOR_BACKEND_LLAMA=true GRPC_BACKENDS=backend-assets/grpc/llama-cpp GO_TAGS=none $(MAKE) build
+	BUILD_GRPC_FOR_BACKEND_LLAMA=true GRPC_BACKENDS="backend-assets/grpc/llama-cpp" GO_TAGS=none $(MAKE) build

 build-api:
 	BUILD_GRPC_FOR_BACKEND_LLAMA=true BUILD_API_ONLY=true GO_TAGS=none $(MAKE) build

-dist: build
+dist:
+	STATIC=true $(MAKE) backend-assets/grpc/llama-cpp-avx2
+ifeq ($(OS),Darwin)
+	$(info ${GREEN}I Skip CUDA build on MacOS${RESET})
+else
+	$(MAKE) backend-assets/grpc/llama-cpp-cuda
+endif
+	$(MAKE) build
 	mkdir -p release
+# if BUILD_ID is empty, then we don't append it to the binary name
+ifeq ($(BUILD_ID),)
+	cp $(BINARY_NAME) release/$(BINARY_NAME)-$(OS)-$(ARCH)
+else
 	cp $(BINARY_NAME) release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-$(ARCH)
+endif

 osx-signed: build
 	codesign --deep --force --sign "$(OSX_SIGNING_IDENTITY)" --entitlements "./Entitlements.plist" "./$(BINARY_NAME)"
@@ -616,8 +631,8 @@ backend-assets/grpc/gpt4all: sources/gpt4all sources/gpt4all/gpt4all-bindings/go
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ LIBRARY_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gpt4all ./backend/go/llm/gpt4all/

-backend-assets/grpc/langchain-huggingface: backend-assets/grpc
-	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/langchain-huggingface ./backend/go/llm/langchain/
+backend-assets/grpc/huggingface: backend-assets/grpc
+	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/huggingface ./backend/go/llm/langchain/

 backend/cpp/llama/llama.cpp:
 	LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama llama.cpp
@@ -629,7 +644,7 @@ ADDED_CMAKE_ARGS=-Dabsl_DIR=${INSTALLED_LIB_CMAKE}/absl \
 				 -Dutf8_range_DIR=${INSTALLED_LIB_CMAKE}/utf8_range \
 				 -DgRPC_DIR=${INSTALLED_LIB_CMAKE}/grpc \
 				 -DCMAKE_CXX_STANDARD_INCLUDE_DIRECTORIES=${INSTALLED_PACKAGES}/include
-backend/cpp/llama/grpc-server:
+build-llama-cpp-grpc-server:
 # Conditionally build grpc for the llama backend to use if needed
 ifdef BUILD_GRPC_FOR_BACKEND_LLAMA
 	$(MAKE) -C backend/cpp/grpc build
@@ -638,19 +653,44 @@ ifdef BUILD_GRPC_FOR_BACKEND_LLAMA
 	PATH="${INSTALLED_PACKAGES}/bin:${PATH}" \
 	CMAKE_ARGS="${CMAKE_ARGS} ${ADDED_CMAKE_ARGS}" \
 	LLAMA_VERSION=$(CPPLLAMA_VERSION) \
-	$(MAKE) -C backend/cpp/llama grpc-server
+	$(MAKE) -C backend/cpp/${VARIANT} grpc-server
 else
 	echo "BUILD_GRPC_FOR_BACKEND_LLAMA is not defined."
-	LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama grpc-server
+	LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/${VARIANT} grpc-server
 endif

-backend-assets/grpc/llama-cpp: backend-assets/grpc backend/cpp/llama/grpc-server
-	cp -rfv backend/cpp/llama/grpc-server backend-assets/grpc/llama-cpp
+backend-assets/grpc/llama-cpp-avx2: backend-assets/grpc
+	cp -rf backend/cpp/llama backend/cpp/llama-avx2
+	$(MAKE) -C backend/cpp/llama-avx2 purge
+	$(info ${GREEN}I llama-cpp build info:avx2${RESET})
+	CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_AVX512=off -DLLAMA_FMA=on -DLLAMA_F16C=on" $(MAKE) VARIANT="llama-avx2" build-llama-cpp-grpc-server
+	cp -rfv backend/cpp/llama-avx2/grpc-server backend-assets/grpc/llama-cpp-avx2
+
+backend-assets/grpc/llama-cpp-avx: backend-assets/grpc
+	cp -rf backend/cpp/llama backend/cpp/llama-avx
+	$(MAKE) -C backend/cpp/llama-avx purge
+	$(info ${GREEN}I llama-cpp build info:avx${RESET})
+	CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off" $(MAKE) VARIANT="llama-avx" build-llama-cpp-grpc-server
+	cp -rfv backend/cpp/llama-avx/grpc-server backend-assets/grpc/llama-cpp-avx
+
+backend-assets/grpc/llama-cpp-fallback: backend-assets/grpc
+	cp -rf backend/cpp/llama backend/cpp/llama-fallback
+	$(MAKE) -C backend/cpp/llama-fallback purge
+	$(info ${GREEN}I llama-cpp build info:fallback${RESET})
+	CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off" $(MAKE) VARIANT="llama-fallback" build-llama-cpp-grpc-server
+	cp -rfv backend/cpp/llama-fallback/grpc-server backend-assets/grpc/llama-cpp-fallback
 # TODO: every binary should have its own folder instead, so can have different metal implementations
 ifeq ($(BUILD_TYPE),metal)
-	cp backend/cpp/llama/llama.cpp/build/bin/default.metallib backend-assets/grpc/
+	cp backend/cpp/llama-fallback/llama.cpp/build/bin/default.metallib backend-assets/grpc/
 endif

+backend-assets/grpc/llama-cpp-cuda: backend-assets/grpc
+	cp -rf backend/cpp/llama backend/cpp/llama-cuda
+	$(MAKE) -C backend/cpp/llama-cuda purge
+	$(info ${GREEN}I llama-cpp build info:cuda${RESET})
+	CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off -DLLAMA_CUDA=ON" $(MAKE) VARIANT="llama-cuda" build-llama-cpp-grpc-server
+	cp -rfv backend/cpp/llama-cuda/grpc-server backend-assets/grpc/llama-cpp-cuda
+
 backend-assets/grpc/llama-ggml: sources/go-llama.cpp sources/go-llama.cpp/libbinding.a backend-assets/grpc
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama.cpp LIBRARY_PATH=$(CURDIR)/sources/go-llama.cpp \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-ggml ./backend/go/llm/llama-ggml/
@@ -693,7 +733,7 @@ docker:
 		--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
 		--build-arg BUILD_TYPE=$(BUILD_TYPE) \
 		-t $(DOCKER_IMAGE) .
-	
+
 docker-aio:
 	@echo "Building AIO image with base $(BASE_IMAGE) as $(DOCKER_AIO_IMAGE)"
 	docker build \
@@ -707,7 +747,7 @@ docker-aio-all:

 docker-image-intel:
 	docker build \
-		--build-arg BASE_IMAGE=intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04 \
+		--build-arg BASE_IMAGE=intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04 \
 		--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
 		--build-arg GO_TAGS="none" \
 		--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
@@ -715,7 +755,7 @@ docker-image-intel:

 docker-image-intel-xpu:
 	docker build \
-		--build-arg BASE_IMAGE=intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04 \
+		--build-arg BASE_IMAGE=intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04 \
 		--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
 		--build-arg GO_TAGS="none" \
 		--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
--- a/README.md
+++ b/README.md
@@ -50,6 +50,7 @@

 [Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)

+- Chat, TTS, and Image generation in the WebUI: https://github.com/mudler/LocalAI/pull/2222
 - Reranker API: https://github.com/mudler/LocalAI/pull/2121
 - Gallery WebUI: https://github.com/mudler/LocalAI/pull/2104
 - llama3: https://github.com/mudler/LocalAI/discussions/2076
@@ -59,6 +60,8 @@
 - All-in-one container image: https://github.com/mudler/LocalAI/issues/1855

 Hot topics (looking for contributors):
+
+- WebUI improvements: https://github.com/mudler/LocalAI/issues/2156
 - Backends v2: https://github.com/mudler/LocalAI/issues/1126
 - Improving UX v2: https://github.com/mudler/LocalAI/issues/1373
 - Assistant API: https://github.com/mudler/LocalAI/issues/1273
@@ -89,7 +92,8 @@ docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
 - 🧠 [Embeddings generation for vector databases](https://localai.io/features/embeddings/)
 - ✍️ [Constrained grammars](https://localai.io/features/constrained_grammars/)
 - 🖼️ [Download Models directly from Huggingface ](https://localai.io/models/)
- 🆕 [Vision API](https://localai.io/features/gpt-vision/)
+- 🥽 [Vision API](https://localai.io/features/gpt-vision/)
+- 🆕 [Reranker API](https://localai.io/features/reranker/)

 ## 💻 Usage

@@ -110,6 +114,7 @@ Model galleries
 Other:
 - Helm chart https://github.com/go-skynet/helm-charts
 - VSCode extension https://github.com/badgooooor/localai-vscode-plugin
+- Terminal utility https://github.com/djcopley/ShellOracle
 - Local Smart assistant https://github.com/mudler/LocalAGI
 - Home Assistant https://github.com/sammcj/homeassistant-localai / https://github.com/drndos/hass-openai-custom-conversation
 - Discord bot https://github.com/mudler/LocalAGI/tree/main/examples/discord
@@ -128,7 +133,7 @@ Other:

 ## :book: 🎥 [Media, Blogs, Social](https://localai.io/basics/news/#media-blogs-social)

- [Run LocalAI on AWS EKS with Pulumi](https://www.pulumi.com/ai/answers/tiZMDoZzZV6TLxgDXNBnFE/deploying-helm-charts-on-aws-eks)
+- [Run LocalAI on AWS EKS with Pulumi](https://www.pulumi.com/blog/low-code-llm-apps-with-local-ai-flowise-and-pulumi/)
 - [Run LocalAI on AWS](https://staleks.hashnode.dev/installing-localai-on-aws-ec2-instance)
 - [Create a slackbot for teams and OSS projects that answer to documentation](https://mudler.pm/posts/smart-slackbot-for-teams/)
 - [LocalAI meets k8sgpt](https://www.youtube.com/watch?v=PKrDNuJ_dfE)
--- a/aio/cpu/text-to-text.yaml
+++ b/aio/cpu/text-to-text.yaml
@@ -1,7 +1,7 @@
 name: gpt-4
 mmap: true
 parameters:
-  model: huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q2_K.gguf
+  model: huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf

 template:
  chat_message: |
--- a/aio/gpu-8g/text-to-text.yaml
+++ b/aio/gpu-8g/text-to-text.yaml
@@ -1,7 +1,7 @@
 name: gpt-4
 mmap: true
 parameters:
-  model: huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q6_K.gguf
+  model: huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf

 template:
  chat_message: |
--- a/aio/intel/text-to-text.yaml
+++ b/aio/intel/text-to-text.yaml
@@ -2,7 +2,7 @@ name: gpt-4
 mmap: false
 f16: false
 parameters:
-  model: huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q6_K.gguf
+  model: huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf

 template:
  chat_message: |
--- a/backend/backend.proto
+++ b/backend/backend.proto
@@ -212,6 +212,9 @@ message ModelOptions {
  float YarnBetaSlow = 47;

  string Type = 49;
+
+  bool FlashAttention = 56;
+  bool NoKVOffload = 57;
 }

 message Result {
--- a/backend/cpp/llama/Makefile
+++ b/backend/cpp/llama/Makefile
@@ -43,35 +43,27 @@ llama.cpp:

 llama.cpp/examples/grpc-server: llama.cpp
 	mkdir -p llama.cpp/examples/grpc-server
-	cp -r $(abspath ./)/CMakeLists.txt llama.cpp/examples/grpc-server/
-	cp -r $(abspath ./)/grpc-server.cpp llama.cpp/examples/grpc-server/
-	cp -rfv $(abspath ./)/json.hpp llama.cpp/examples/grpc-server/
-	cp -rfv $(abspath ./)/utils.hpp llama.cpp/examples/grpc-server/
-	echo "add_subdirectory(grpc-server)" >> llama.cpp/examples/CMakeLists.txt
-## XXX: In some versions of CMake clip wasn't being built before llama.
-## This is an hack for now, but it should be fixed in the future.
-	cp -rfv llama.cpp/examples/llava/clip.h llama.cpp/examples/grpc-server/clip.h
-	cp -rfv llama.cpp/examples/llava/llava.cpp llama.cpp/examples/grpc-server/llava.cpp
-	echo '#include "llama.h"' > llama.cpp/examples/grpc-server/llava.h
-	cat llama.cpp/examples/llava/llava.h >> llama.cpp/examples/grpc-server/llava.h
-	cp -rfv llama.cpp/examples/llava/clip.cpp llama.cpp/examples/grpc-server/clip.cpp
+	bash prepare.sh

 rebuild:
-	cp -rfv $(abspath ./)/CMakeLists.txt llama.cpp/examples/grpc-server/
-	cp -rfv $(abspath ./)/grpc-server.cpp llama.cpp/examples/grpc-server/
-	cp -rfv $(abspath ./)/json.hpp llama.cpp/examples/grpc-server/
+	bash prepare.sh
 	rm -rf grpc-server
 	$(MAKE) grpc-server

-clean:
-	rm -rf llama.cpp
+purge:
+	rm -rf llama.cpp/build
+	rm -rf llama.cpp/examples/grpc-server
 	rm -rf grpc-server

+clean: purge
+	rm -rf llama.cpp
+
 grpc-server: llama.cpp llama.cpp/examples/grpc-server
+	@echo "Building grpc-server with $(BUILD_TYPE) build type and $(CMAKE_ARGS)"
 ifneq (,$(findstring sycl,$(BUILD_TYPE)))
 	bash -c "source $(ONEAPI_VARS); \
-	cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release"	
+	cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && $(MAKE)"
 else
-	cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release
+	cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && $(MAKE)
 endif
 	cp llama.cpp/build/bin/grpc-server .
--- a/backend/cpp/llama/grpc-server.cpp
+++ b/backend/cpp/llama/grpc-server.cpp
@@ -2254,6 +2254,9 @@ static void params_parse(const backend::ModelOptions* request,
    }
    params.use_mlock = request->mlock();
    params.use_mmap = request->mmap();
+    params.flash_attn = request->flashattention();
+    params.no_kv_offload = request->nokvoffload();
+
    params.embedding = request->embeddings();

    if (request->ropescaling() == "none")   { params.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_NONE; }
--- a/backend/cpp/llama/prepare.sh
+++ b/backend/cpp/llama/prepare.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+cp -r CMakeLists.txt llama.cpp/examples/grpc-server/
+cp -r grpc-server.cpp llama.cpp/examples/grpc-server/
+cp -rfv json.hpp llama.cpp/examples/grpc-server/
+cp -rfv utils.hpp llama.cpp/examples/grpc-server/
+    
+if grep -q "grpc-server" llama.cpp/examples/CMakeLists.txt; then
+    echo "grpc-server already added"
+else
+    echo "add_subdirectory(grpc-server)" >> llama.cpp/examples/CMakeLists.txt
+fi
+
+## XXX: In some versions of CMake clip wasn't being built before llama.
+## This is an hack for now, but it should be fixed in the future.
+cp -rfv llama.cpp/examples/llava/clip.h llama.cpp/examples/grpc-server/clip.h
+cp -rfv llama.cpp/examples/llava/llava.cpp llama.cpp/examples/grpc-server/llava.cpp
+echo '#include "llama.h"' > llama.cpp/examples/grpc-server/llava.h
+cat llama.cpp/examples/llava/llava.h >> llama.cpp/examples/grpc-server/llava.h
+cp -rfv llama.cpp/examples/llava/clip.cpp llama.cpp/examples/grpc-server/clip.cpp
--- a/backend/go/llm/langchain/langchain.go
+++ b/backend/go/llm/langchain/langchain.go
@@ -4,6 +4,7 @@ package main
 // It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
 import (
 	"fmt"
+	"os"

 	"github.com/go-skynet/LocalAI/pkg/grpc/base"
 	pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
@@ -18,9 +19,14 @@ type LLM struct {
 }

 func (llm *LLM) Load(opts *pb.ModelOptions) error {
-	llm.langchain, _ = langchain.NewHuggingFace(opts.Model)
+	var err error
+	hfToken := os.Getenv("HUGGINGFACEHUB_API_TOKEN")
+	if hfToken == "" {
+		return fmt.Errorf("no huggingface token provided")
+	}
+	llm.langchain, err = langchain.NewHuggingFace(opts.Model, hfToken)
 	llm.model = opts.Model
-	return nil
+	return err
 }

 func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
--- a/backend/go/transcribe/transcript.go
+++ b/backend/go/transcribe/transcript.go
@@ -11,8 +11,8 @@ import (
 	"github.com/go-skynet/LocalAI/core/schema"
 )

-func runCommand(command []string) (string, error) {
-	cmd := exec.Command(command[0], command[1:]...)
+func ffmpegCommand(args []string) (string, error) {
+	cmd := exec.Command("ffmpeg", args...) // Constrain this to ffmpeg to permit security scanner to see that the command is safe.
 	cmd.Env = os.Environ()
 	out, err := cmd.CombinedOutput()
 	return string(out), err
@@ -21,16 +21,16 @@ func runCommand(command []string) (string, error) {
 // AudioToWav converts audio to wav for transcribe.
 // TODO: use https://github.com/mccoyst/ogg?
 func audioToWav(src, dst string) error {
-    command := []string{"ffmpeg", "-i", src, "-format", "s16le", "-ar", "16000", "-ac", "1", "-acodec", "pcm_s16le", dst}
-	out, err := runCommand(command)
+	commandArgs := []string{"-i", src, "-format", "s16le", "-ar", "16000", "-ac", "1", "-acodec", "pcm_s16le", dst}
+	out, err := ffmpegCommand(commandArgs)
 	if err != nil {
 		return fmt.Errorf("error: %w out: %s", err, out)
 	}
 	return nil
 }

-func Transcript(model whisper.Model, audiopath, language string, threads uint) (schema.Result, error) {
-	res := schema.Result{}
+func Transcript(model whisper.Model, audiopath, language string, threads uint) (schema.TranscriptionResult, error) {
+	res := schema.TranscriptionResult{}

 	dir, err := os.MkdirTemp("", "whisper")
 	if err != nil {
--- a/backend/go/transcribe/whisper.go
+++ b/backend/go/transcribe/whisper.go
@@ -21,6 +21,6 @@ func (sd *Whisper) Load(opts *pb.ModelOptions) error {
 	return err
 }

-func (sd *Whisper) AudioTranscription(opts *pb.TranscriptRequest) (schema.Result, error) {
+func (sd *Whisper) AudioTranscription(opts *pb.TranscriptRequest) (schema.TranscriptionResult, error) {
 	return Transcript(sd.whisper, opts.Dst, opts.Language, uint(opts.Threads))
 }
--- a/backend/python/autogptq/Makefile
+++ b/backend/python/autogptq/Makefile
@@ -1,6 +1,6 @@
 .PHONY: autogptq
 autogptq: protogen
-	$(MAKE) -C ../common-env/transformers
+	bash install.sh

 .PHONY: protogen
 protogen: backend_pb2_grpc.py backend_pb2.py
@@ -10,4 +10,8 @@ protogen-clean:
 	$(RM) backend_pb2_grpc.py backend_pb2.py

 backend_pb2_grpc.py backend_pb2.py:
-	python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
+	python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
+
+.PHONY: clean
+clean: protogen-clean
+	rm -rf venv __pycache__
--- a/backend/python/autogptq/autogptq.yml
+++ b/backend/python/autogptq/autogptq.yml
@@ -1,93 +0,0 @@
-####
-# Attention! This file is abandoned. 
-# Please use the ../common-env/transformers/transformers.yml file to manage dependencies.
-###
-name: autogptq
-channels:
-  - defaults
-dependencies:
-  - _libgcc_mutex=0.1=main
-  - _openmp_mutex=5.1=1_gnu
-  - bzip2=1.0.8=h7b6447c_0
-  - ca-certificates=2023.08.22=h06a4308_0
-  - ld_impl_linux-64=2.38=h1181459_1
-  - libffi=3.4.4=h6a678d5_0
-  - libgcc-ng=11.2.0=h1234567_1
-  - libgomp=11.2.0=h1234567_1
-  - libstdcxx-ng=11.2.0=h1234567_1
-  - libuuid=1.41.5=h5eee18b_0
-  - ncurses=6.4=h6a678d5_0
-  - openssl=3.0.11=h7f8727e_2
-  - pip=23.2.1=py311h06a4308_0
-  - python=3.11.5=h955ad1f_0
-  - readline=8.2=h5eee18b_0
-  - setuptools=68.0.0=py311h06a4308_0
-  - sqlite=3.41.2=h5eee18b_0
-  - tk=8.6.12=h1ccaba5_0
-  - wheel=0.41.2=py311h06a4308_0
-  - xz=5.4.2=h5eee18b_0
-  - zlib=1.2.13=h5eee18b_0
-  - pip:
-      - accelerate==0.27.0
-      - aiohttp==3.8.5
-      - aiosignal==1.3.1
-      - async-timeout==4.0.3
-      - attrs==23.1.0
-      - auto-gptq==0.7.1
-      - certifi==2023.7.22
-      - charset-normalizer==3.3.0
-      - datasets==2.14.5
-      - dill==0.3.7
-      - filelock==3.12.4
-      - frozenlist==1.4.0
-      - fsspec==2023.6.0
-      - grpcio==1.59.0
-      - huggingface-hub==0.16.4
-      - idna==3.4
-      - jinja2==3.1.2
-      - markupsafe==2.1.3
-      - mpmath==1.3.0
-      - multidict==6.0.4
-      - multiprocess==0.70.15
-      - networkx==3.1
-      - numpy==1.26.0
-      - nvidia-cublas-cu12==12.1.3.1
-      - nvidia-cuda-cupti-cu12==12.1.105
-      - nvidia-cuda-nvrtc-cu12==12.1.105
-      - nvidia-cuda-runtime-cu12==12.1.105
-      - nvidia-cudnn-cu12==8.9.2.26
-      - nvidia-cufft-cu12==11.0.2.54
-      - nvidia-curand-cu12==10.3.2.106
-      - nvidia-cusolver-cu12==11.4.5.107
-      - nvidia-cusparse-cu12==12.1.0.106
-      - nvidia-nccl-cu12==2.18.1
-      - nvidia-nvjitlink-cu12==12.2.140
-      - nvidia-nvtx-cu12==12.1.105
-      - optimum==1.17.1
-      - packaging==23.2
-      - pandas==2.1.1
-      - peft==0.5.0
-      - protobuf==4.24.4
-      - psutil==5.9.5
-      - pyarrow==13.0.0
-      - python-dateutil==2.8.2
-      - pytz==2023.3.post1
-      - pyyaml==6.0.1
-      - regex==2023.10.3
-      - requests==2.31.0
-      - rouge==1.0.1
-      - safetensors>=0.3.3
-      - six==1.16.0
-      - sympy==1.12
-      - tokenizers==0.14.0
-      - tqdm==4.66.1
-      - torch==2.2.1
-      - torchvision==0.17.1
-      - transformers==4.34.0
-      - transformers_stream_generator==0.0.5
-      - triton==2.1.0
-      - typing-extensions==4.8.0
-      - tzdata==2023.3
-      - urllib3==2.0.6
-      - xxhash==3.4.1
-      - yarl==1.9.2
--- a/backend/python/autogptq/autogptq.py
+++ b/backend/python/autogptq/autogptq.py
--- a/backend/python/autogptq/install.sh
+++ b/backend/python/autogptq/install.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+set -e
+
+source $(dirname $0)/../common/libbackend.sh
+
+# This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links.
+# This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match.
+# We need uv to continue falling through to the pypi default index to find optimum[openvino] in the pypi index
+# the --upgrade actually allows us to *downgrade* torch to the version provided in the Intel pip index
+if [ "x${BUILD_PROFILE}" == "xintel" ]; then
+    EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
+fi
+
+installRequirements
--- a/backend/python/autogptq/requirements-intel.txt
+++ b/backend/python/autogptq/requirements-intel.txt
@@ -0,0 +1,4 @@
+--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
+intel-extension-for-pytorch
+torch
+optimum[openvino]
--- a/backend/python/autogptq/requirements.txt
+++ b/backend/python/autogptq/requirements.txt
@@ -0,0 +1,7 @@
+accelerate
+auto-gptq==0.7.1
+grpcio==1.63.0
+protobuf
+torch
+certifi
+transformers
--- a/backend/python/autogptq/run.sh
+++ b/backend/python/autogptq/run.sh
@@ -1,14 +1,4 @@
 #!/bin/bash
+source $(dirname $0)/../common/libbackend.sh

-##
-## A bash script wrapper that runs the autogptq server with conda
-
-export PATH=$PATH:/opt/conda/bin
-
-# Activate conda environment
-source activate transformers
-
-# get the directory where the bash script is located
-DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
-
-python $DIR/autogptq.py $@
+startBackend $@
--- a/backend/python/autogptq/test.sh
+++ b/backend/python/autogptq/test.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+set -e
+
+source $(dirname $0)/../common/libbackend.sh
+
+runUnittests
--- a/backend/python/bark/Makefile
+++ b/backend/python/bark/Makefile
@@ -1,6 +1,6 @@
 .PHONY: ttsbark
 ttsbark: protogen
-	$(MAKE) -C ../common-env/transformers
+	bash install.sh

 .PHONY: run
 run: protogen
@@ -22,4 +22,8 @@ protogen-clean:
 	$(RM) backend_pb2_grpc.py backend_pb2.py

 backend_pb2_grpc.py backend_pb2.py:
-	python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
+	python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
+
+.PHONY: clean
+clean: protogen-clean
+	rm -rf venv __pycache__
--- a/backend/python/bark/backend.py
+++ b/backend/python/bark/backend.py
--- a/backend/python/bark/install.sh
+++ b/backend/python/bark/install.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+set -e
+
+source $(dirname $0)/../common/libbackend.sh
+
+# This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links.
+# This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match.
+# We need uv to continue falling through to the pypi default index to find optimum[openvino] in the pypi index
+# the --upgrade actually allows us to *downgrade* torch to the version provided in the Intel pip index
+if [ "x${BUILD_PROFILE}" == "xintel" ]; then
+    EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
+fi
+
+installRequirements
--- a/backend/python/bark/requirements-intel.txt
+++ b/backend/python/bark/requirements-intel.txt
@@ -0,0 +1,5 @@
+--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
+intel-extension-for-pytorch
+torch
+torchaudio
+optimum[openvino]
--- a/backend/python/bark/requirements.txt
+++ b/backend/python/bark/requirements.txt
@@ -0,0 +1,6 @@
+accelerate
+bark==0.1.5
+grpcio==1.63.0
+protobuf
+certifi
+transformers
--- a/backend/python/bark/run.sh
+++ b/backend/python/bark/run.sh
@@ -1,14 +1,4 @@
 #!/bin/bash
+source $(dirname $0)/../common/libbackend.sh

-##
-## A bash script wrapper that runs the ttsbark server with conda
-
-export PATH=$PATH:/opt/conda/bin
-
-# Activate conda environment
-source activate transformers
-
-# get the directory where the bash script is located
-DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
-
-python $DIR/ttsbark.py $@
+startBackend $@
--- a/backend/python/bark/test.py
+++ b/backend/python/bark/test.py
@@ -18,7 +18,7 @@ class TestBackendServicer(unittest.TestCase):
        """
        This method sets up the gRPC service by starting the server
        """
-        self.service = subprocess.Popen(["python3", "ttsbark.py", "--addr", "localhost:50051"])
+        self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"])
        time.sleep(10)

    def tearDown(self) -> None:
--- a/backend/python/bark/test.sh
+++ b/backend/python/bark/test.sh
@@ -1,11 +1,6 @@
 #!/bin/bash
-##
-## A bash script wrapper that runs the bark server with conda
+set -e

-# Activate conda environment
-source activate transformers
+source $(dirname $0)/../common/libbackend.sh

-# get the directory where the bash script is located
-DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
-
-python -m unittest $DIR/test.py
+runUnittests
--- a/backend/python/common-env/transformers/Makefile
+++ b/backend/python/common-env/transformers/Makefile
@@ -1,21 +0,0 @@
-CONDA_ENV_PATH = "transformers.yml"
-
-ifeq ($(BUILD_TYPE), cublas)
-	CONDA_ENV_PATH = "transformers-nvidia.yml"
-endif
-
-ifeq ($(BUILD_TYPE), hipblas)
-	CONDA_ENV_PATH = "transformers-rocm.yml"
-endif
-
-# Intel GPU are supposed to have dependencies installed in the main python
-# environment, so we skip conda installation for SYCL builds.
-# https://github.com/intel/intel-extension-for-pytorch/issues/538
-ifneq (,$(findstring sycl,$(BUILD_TYPE)))
-export SKIP_CONDA=1
-endif
-
-.PHONY: transformers
-transformers:
-	@echo "Installing $(CONDA_ENV_PATH)..."
-	bash install.sh $(CONDA_ENV_PATH)
--- a/backend/python/common-env/transformers/install.sh
+++ b/backend/python/common-env/transformers/install.sh
@@ -1,44 +0,0 @@
-#!/bin/bash
-set -ex
-
-SKIP_CONDA=${SKIP_CONDA:-0}
-REQUIREMENTS_FILE=$1
-
-# Check if environment exist
-conda_env_exists(){
-    ! conda list --name "${@}" >/dev/null 2>/dev/null
-}
-
-if [ $SKIP_CONDA -eq 1 ]; then
-    echo "Skipping conda environment installation"
-else
-    export PATH=$PATH:/opt/conda/bin
-    if conda_env_exists "transformers" ; then
-        echo "Creating virtual environment..."
-        conda env create --name transformers --file $REQUIREMENTS_FILE
-        echo "Virtual environment created."
-    else 
-        echo "Virtual environment already exists."
-    fi
-fi
-
-if [ -d "/opt/intel" ]; then
-    # Intel GPU: If the directory exists, we assume we are using the intel image
-    # (no conda env)
-    # https://github.com/intel/intel-extension-for-pytorch/issues/538
-    pip install intel-extension-for-transformers datasets sentencepiece tiktoken neural_speed optimum[openvino]
-fi
-
-# If we didn't skip conda, activate the environment
-# to install FlashAttention
-if [ $SKIP_CONDA -eq 0 ]; then
-    source activate transformers
-fi
-if [[ $REQUIREMENTS_FILE =~ -nvidia.yml$ ]]; then
-    #TODO: FlashAttention is supported on nvidia and ROCm, but ROCm install can't be done this easily
-    pip install flash-attn --no-build-isolation
-fi
-
-if [ "$PIP_CACHE_PURGE" = true ] ; then
-    pip cache purge
-fi
--- a/backend/python/common-env/transformers/transformers-nvidia.yml
+++ b/backend/python/common-env/transformers/transformers-nvidia.yml
@@ -1,125 +0,0 @@
-name: transformers
-channels:
-  - defaults
-dependencies:
-  - _libgcc_mutex=0.1=main
-  - _openmp_mutex=5.1=1_gnu
-  - bzip2=1.0.8=h7b6447c_0
-  - ca-certificates=2023.08.22=h06a4308_0
-  - ld_impl_linux-64=2.38=h1181459_1
-  - libffi=3.4.4=h6a678d5_0
-  - libgcc-ng=11.2.0=h1234567_1
-  - libgomp=11.2.0=h1234567_1
-  - libstdcxx-ng=11.2.0=h1234567_1
-  - libuuid=1.41.5=h5eee18b_0
-  - ncurses=6.4=h6a678d5_0
-  - openssl=3.0.11=h7f8727e_2
-  - pip=23.2.1=py311h06a4308_0
-  - python=3.11.5=h955ad1f_0
-  - readline=8.2=h5eee18b_0
-  - setuptools=68.0.0=py311h06a4308_0
-  - sqlite=3.41.2=h5eee18b_0
-  - tk=8.6.12=h1ccaba5_0
-  - wheel=0.41.2=py311h06a4308_0
-  - xz=5.4.2=h5eee18b_0
-  - zlib=1.2.13=h5eee18b_0
-  - pip:
-      - accelerate==0.27.0
-      - aiohttp==3.8.5
-      - aiosignal==1.3.1
-      - async-timeout==4.0.3
-      - auto-gptq==0.7.1
-      - attrs==23.1.0
-      - bark==0.1.5
-      - bitsandbytes==0.43.0
-      - boto3==1.28.61
-      - botocore==1.31.61
-      - certifi==2023.7.22
-      - TTS==0.22.0
-      - charset-normalizer==3.3.0
-      - datasets==2.14.5
-      - sentence-transformers==2.5.1 # Updated Version
-      - sentencepiece==0.1.99
-      - dill==0.3.7
-      - einops==0.7.0
-      - encodec==0.1.1
-      - filelock==3.12.4
-      - frozenlist==1.4.0
-      - fsspec==2023.6.0
-      - funcy==2.0
-      - grpcio==1.59.0
-      - huggingface-hub
-      - idna==3.4
-      - jinja2==3.1.2
-      - jmespath==1.0.1
-      - markupsafe==2.1.3
-      - mpmath==1.3.0
-      - multidict==6.0.4
-      - multiprocess==0.70.15
-      - networkx
-      - numpy==1.26.0
-      - nvidia-cublas-cu12==12.1.3.1
-      - nvidia-cuda-cupti-cu12==12.1.105
-      - nvidia-cuda-nvrtc-cu12==12.1.105
-      - nvidia-cuda-runtime-cu12==12.1.105
-      - nvidia-cudnn-cu12==8.9.2.26
-      - nvidia-cufft-cu12==11.0.2.54
-      - nvidia-curand-cu12==10.3.2.106
-      - nvidia-cusolver-cu12==11.4.5.107
-      - nvidia-cusparse-cu12==12.1.0.106
-      - nvidia-nccl-cu12==2.18.1
-      - nvidia-nvjitlink-cu12==12.2.140
-      - nvidia-nvtx-cu12==12.1.105
-      - optimum==1.17.1
-      - packaging==23.2
-      - pandas
-      - peft==0.5.0
-      - protobuf==4.24.4
-      - psutil==5.9.5
-      - pyarrow==13.0.0
-      - python-dateutil==2.8.2
-      - pytz==2023.3.post1
-      - pyyaml==6.0.1
-      - regex==2023.10.3
-      - requests==2.31.0
-      - rouge==1.0.1
-      - s3transfer==0.7.0
-      - safetensors>=0.4.1
-      - scipy==1.12.0 # Updated Version
-      - six==1.16.0
-      - sympy==1.12
-      - tokenizers
-      - torch==2.1.2
-      - torchvision==0.16.2
-      - torchaudio==2.1.2
-      - tqdm==4.66.1
-      - triton==2.1.0
-      - typing-extensions==4.8.0
-      - tzdata==2023.3
-      - urllib3==1.26.17
-      - xxhash==3.4.1
-      - yarl==1.9.2
-      - soundfile
-      - langid
-      - wget
-      - unidecode
-      - pyopenjtalk-prebuilt
-      - pypinyin
-      - inflect
-      - cn2an
-      - jieba
-      - eng_to_ipa
-      - openai-whisper
-      - matplotlib
-      - gradio==3.41.2
-      - nltk
-      - sudachipy
-      - sudachidict_core
-      - vocos
-      - vllm>=0.4.0
-      - transformers>=4.38.2  # Updated Version
-      - transformers_stream_generator==0.0.5
-      - xformers==0.0.23.post1  
-      - rerankers[transformers]
-      - pydantic
-prefix: /opt/conda/envs/transformers
--- a/backend/python/common-env/transformers/transformers-rocm.yml
+++ b/backend/python/common-env/transformers/transformers-rocm.yml
@@ -1,113 +0,0 @@
-name: transformers
-channels:
-  - defaults
-dependencies:
-  - _libgcc_mutex=0.1=main
-  - _openmp_mutex=5.1=1_gnu
-  - bzip2=1.0.8=h7b6447c_0
-  - ca-certificates=2023.08.22=h06a4308_0
-  - ld_impl_linux-64=2.38=h1181459_1
-  - libffi=3.4.4=h6a678d5_0
-  - libgcc-ng=11.2.0=h1234567_1
-  - libgomp=11.2.0=h1234567_1
-  - libstdcxx-ng=11.2.0=h1234567_1
-  - libuuid=1.41.5=h5eee18b_0
-  - ncurses=6.4=h6a678d5_0
-  - openssl=3.0.11=h7f8727e_2
-  - pip=23.2.1=py311h06a4308_0
-  - python=3.11.5=h955ad1f_0
-  - readline=8.2=h5eee18b_0
-  - setuptools=68.0.0=py311h06a4308_0
-  - sqlite=3.41.2=h5eee18b_0
-  - tk=8.6.12=h1ccaba5_0
-  - wheel=0.41.2=py311h06a4308_0
-  - xz=5.4.2=h5eee18b_0
-  - zlib=1.2.13=h5eee18b_0
-  - pip:
-      - --pre
-      - --extra-index-url https://download.pytorch.org/whl/nightly/
-      - accelerate==0.27.0
-      - auto-gptq==0.7.1
-      - aiohttp==3.8.5
-      - aiosignal==1.3.1
-      - async-timeout==4.0.3
-      - attrs==23.1.0
-      - bark==0.1.5
-      - boto3==1.28.61
-      - botocore==1.31.61
-      - certifi==2023.7.22
-      - TTS==0.22.0
-      - charset-normalizer==3.3.0
-      - datasets==2.14.5
-      - sentence-transformers==2.5.1 # Updated Version
-      - sentencepiece==0.1.99
-      - dill==0.3.7
-      - einops==0.7.0
-      - encodec==0.1.1
-      - filelock==3.12.4
-      - frozenlist==1.4.0
-      - fsspec==2023.6.0
-      - funcy==2.0
-      - grpcio==1.59.0
-      - huggingface-hub
-      - idna==3.4
-      - jinja2==3.1.2
-      - jmespath==1.0.1
-      - markupsafe==2.1.3
-      - mpmath==1.3.0
-      - multidict==6.0.4
-      - multiprocess==0.70.15
-      - networkx
-      - numpy==1.26.0
-      - packaging==23.2
-      - pandas
-      - peft==0.5.0
-      - protobuf==4.24.4
-      - psutil==5.9.5
-      - pyarrow==13.0.0
-      - python-dateutil==2.8.2
-      - pytz==2023.3.post1
-      - pyyaml==6.0.1
-      - regex==2023.10.3
-      - requests==2.31.0
-      - rouge==1.0.1
-      - s3transfer==0.7.0
-      - safetensors>=0.4.1
-      - scipy==1.12.0 # Updated Version
-      - six==1.16.0
-      - sympy==1.12
-      - tokenizers
-      - torch
-      - torchaudio
-      - tqdm==4.66.1
-      - triton==2.1.0
-      - typing-extensions==4.8.0
-      - tzdata==2023.3
-      - urllib3==1.26.17
-      - xxhash==3.4.1
-      - yarl==1.9.2
-      - soundfile
-      - langid
-      - wget
-      - unidecode
-      - optimum==1.17.1
-      - pyopenjtalk-prebuilt
-      - pypinyin
-      - inflect
-      - cn2an
-      - jieba
-      - eng_to_ipa
-      - openai-whisper
-      - matplotlib
-      - gradio==3.41.2
-      - nltk
-      - sudachipy
-      - sudachidict_core
-      - vocos
-      - vllm>=0.4.0
-      - transformers>=4.38.2  # Updated Version
-      - transformers_stream_generator==0.0.5
-      - xformers==0.0.23.post1
-      - rerankers[transformers]
-      - pydantic
-prefix: /opt/conda/envs/transformers
--- a/backend/python/common-env/transformers/transformers.yml
+++ b/backend/python/common-env/transformers/transformers.yml
@@ -1,117 +0,0 @@
-name: transformers
-channels:
-  - defaults
-dependencies:
-  - _libgcc_mutex=0.1=main
-  - _openmp_mutex=5.1=1_gnu
-  - bzip2=1.0.8=h7b6447c_0
-  - ca-certificates=2023.08.22=h06a4308_0
-  - ld_impl_linux-64=2.38=h1181459_1
-  - libffi=3.4.4=h6a678d5_0
-  - libgcc-ng=11.2.0=h1234567_1
-  - libgomp=11.2.0=h1234567_1
-  - libstdcxx-ng=11.2.0=h1234567_1
-  - libuuid=1.41.5=h5eee18b_0
-  - ncurses=6.4=h6a678d5_0
-  - openssl=3.0.11=h7f8727e_2
-  - pip=23.2.1=py311h06a4308_0
-  - python=3.11.5=h955ad1f_0
-  - readline=8.2=h5eee18b_0
-  - setuptools=68.0.0=py311h06a4308_0
-  - sqlite=3.41.2=h5eee18b_0
-  - tk=8.6.12=h1ccaba5_0
-  - wheel=0.41.2=py311h06a4308_0
-  - xz=5.4.2=h5eee18b_0
-  - zlib=1.2.13=h5eee18b_0
-  - pip:
-      - accelerate==0.27.0
-      - aiohttp==3.8.5
-      - aiosignal==1.3.1
-      - auto-gptq==0.7.1
-      - async-timeout==4.0.3
-      - attrs==23.1.0
-      - bark==0.1.5
-      - boto3==1.28.61
-      - botocore==1.31.61
-      - certifi==2023.7.22
-      - coloredlogs==15.0.1
-      - TTS==0.22.0
-      - charset-normalizer==3.3.0
-      - datasets==2.14.5
-      - sentence-transformers==2.5.1 # Updated Version
-      - sentencepiece==0.1.99
-      - dill==0.3.7
-      - einops==0.7.0
-      - encodec==0.1.1
-      - filelock==3.12.4
-      - frozenlist==1.4.0
-      - fsspec==2023.6.0
-      - funcy==2.0
-      - grpcio==1.59.0
-      - huggingface-hub
-      - humanfriendly==10.0
-      - idna==3.4
-      - jinja2==3.1.2
-      - jmespath==1.0.1
-      - markupsafe==2.1.3
-      - mpmath==1.3.0
-      - multidict==6.0.4
-      - multiprocess==0.70.15
-      - networkx
-      - numpy==1.26.0
-      - onnx==1.15.0
-      - openvino==2024.0.0
-      - openvino-telemetry==2023.2.1
-      - optimum[openvino]==1.17.1
-      - packaging==23.2
-      - pandas
-      - peft==0.5.0
-      - protobuf==4.24.4
-      - psutil==5.9.5
-      - pyarrow==13.0.0
-      - python-dateutil==2.8.2
-      - pytz==2023.3.post1
-      - pyyaml==6.0.1
-      - regex==2023.10.3
-      - requests==2.31.0
-      - rouge==1.0.1
-      - s3transfer==0.7.0
-      - safetensors>=0.4.1
-      - scipy==1.12.0 # Updated Version
-      - six==1.16.0
-      - sympy==1.12
-      - tokenizers
-      - torch==2.1.2
-      - torchvision==0.16.2
-      - torchaudio==2.1.2
-      - tqdm==4.66.1
-      - triton==2.1.0
-      - typing-extensions==4.8.0
-      - tzdata==2023.3
-      - urllib3==1.26.17
-      - xxhash==3.4.1
-      - yarl==1.9.2
-      - soundfile
-      - langid
-      - wget
-      - unidecode
-      - pyopenjtalk-prebuilt
-      - pypinyin
-      - inflect
-      - cn2an
-      - jieba
-      - eng_to_ipa
-      - openai-whisper
-      - matplotlib
-      - gradio==3.41.2
-      - nltk
-      - sudachipy
-      - sudachidict_core
-      - vocos
-      - vllm>=0.4.0
-      - transformers>=4.38.2  # Updated Version
-      - transformers_stream_generator==0.0.5
-      - xformers==0.0.23.post1
-      - rerankers[transformers]
-      - pydantic
-prefix: /opt/conda/envs/transformers
--- a/backend/python/common/libbackend.sh
+++ b/backend/python/common/libbackend.sh
@@ -0,0 +1,213 @@
+
+
+# init handles the setup of the library
+# 
+# use the library by adding the following line to a script:
+# source $(dirname $0)/../common/libbackend.sh
+#
+# If you want to limit what targets a backend can be used on, set the variable LIMIT_TARGETS to a
+# space separated list of valid targets BEFORE sourcing the library, for example to only allow a backend
+# to be used on CUDA and CPU backends:
+#
+# LIMIT_TARGETS="cublas cpu"
+# source $(dirname $0)/../common/libbackend.sh
+#
+# You can use any valid BUILD_TYPE or BUILD_PROFILE, if you need to limit a backend to CUDA 12 only:
+#
+# LIMIT_TARGETS="cublas12"
+# source $(dirname $0)/../common/libbackend.sh
+#
+function init() {
+    BACKEND_NAME=${PWD##*/}
+    MY_DIR=$(realpath `dirname $0`)
+    BUILD_PROFILE=$(getBuildProfile)
+
+    # If a backend has defined a list of valid build profiles...
+    if [ ! -z "${LIMIT_TARGETS}" ]; then
+        isValidTarget=$(checkTargets ${LIMIT_TARGETS})
+        if [ ${isValidTarget} != true ]; then
+            echo "${BACKEND_NAME} can only be used on the following targets: ${LIMIT_TARGETS}"
+            exit 0
+        fi
+    fi
+
+    echo "Initializing libbackend for ${BACKEND_NAME}"
+}
+
+# getBuildProfile will inspect the system to determine which build profile is appropriate:
+# returns one of the following:
+# - cublas11
+# - cublas12
+# - hipblas
+# - intel
+function getBuildProfile() {
+    # First check if we are a cublas build, and if so report the correct build profile
+    if [ x"${BUILD_TYPE}" == "xcublas" ]; then
+        if [ ! -z ${CUDA_MAJOR_VERSION} ]; then
+            # If we have been given a CUDA version, we trust it
+            echo ${BUILD_TYPE}${CUDA_MAJOR_VERSION}
+        else
+            # We don't know what version of cuda we are, so we report ourselves as a generic cublas
+            echo ${BUILD_TYPE}
+        fi
+        return 0
+    fi
+
+    # If /opt/intel exists, then we are doing an intel/ARC build
+    if [ -d "/opt/intel" ]; then
+        echo "intel"
+        return 0
+    fi
+
+    # If for any other values of BUILD_TYPE, we don't need any special handling/discovery
+    if [ ! -z ${BUILD_TYPE} ]; then
+        echo ${BUILD_TYPE}
+        return 0
+    fi
+
+    # If there is no BUILD_TYPE set at all, set a build-profile value of CPU, we aren't building for any GPU targets
+    echo "cpu"
+}
+
+# ensureVenv makes sure that the venv for the backend both exists, and is activated.
+#
+# This function is idempotent, so you can call it as many times as you want and it will
+# always result in an activated virtual environment
+function ensureVenv() {
+    if [ ! -d "${MY_DIR}/venv" ]; then
+        uv venv ${MY_DIR}/venv
+        echo "virtualenv created"
+    fi
+    
+    if [ "x${VIRTUAL_ENV}" != "x${MY_DIR}/venv" ]; then
+        source ${MY_DIR}/venv/bin/activate
+        echo "virtualenv activated"
+    fi
+
+    echo "activated virtualenv has been ensured"
+}
+
+# installRequirements looks for several requirements files and if they exist runs the install for them in order
+#
+#  - requirements-install.txt
+#  - requirements.txt
+#  - requirements-${BUILD_TYPE}.txt
+#  - requirements-${BUILD_PROFILE}.txt
+#
+# BUILD_PROFILE is a pore specific version of BUILD_TYPE, ex: cuda11 or cuda12
+# it can also include some options that we do not have BUILD_TYPES for, ex: intel
+#
+# NOTE: for BUILD_PROFILE==intel, this function does NOT automatically use the Intel python package index.
+# you may want to add the following line to a requirements-intel.txt if you use one:
+#
+# --index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
+#
+# If you need to add extra flags into the pip install command you can do so by setting the variable EXTRA_PIP_INSTALL_FLAGS
+# before calling installRequirements.  For example:
+#
+# source $(dirname $0)/../common/libbackend.sh
+# EXTRA_PIP_INSTALL_FLAGS="--no-build-isolation"
+# installRequirements
+function installRequirements() {
+    ensureVenv
+
+    # These are the requirements files we will attempt to install, in order
+    declare -a requirementFiles=(
+        "${MY_DIR}/requirements-install.txt"
+        "${MY_DIR}/requirements.txt"
+        "${MY_DIR}/requirements-${BUILD_TYPE}.txt"
+    )
+
+    if [ "x${BUILD_TYPE}" != "x${BUILD_PROFILE}" ]; then
+        requirementFiles+=("${MY_DIR}/requirements-${BUILD_PROFILE}.txt")
+    fi
+
+    for reqFile in ${requirementFiles[@]}; do
+        if [ -f ${reqFile} ]; then
+            echo "starting requirements install for ${reqFile}"
+            uv pip install ${EXTRA_PIP_INSTALL_FLAGS} --requirement ${reqFile}
+            echo "finished requirements install for ${reqFile}"
+        fi
+    done
+}
+
+# startBackend discovers and runs the backend GRPC server
+#
+# You can specify a specific backend file to execute by setting BACKEND_FILE before calling startBackend.
+# example:
+#
+# source ../common/libbackend.sh
+# BACKEND_FILE="${MY_DIR}/source/backend.py"
+# startBackend $@
+#
+# valid filenames for autodiscovered backend servers are:
+#  - server.py
+#  - backend.py
+#  - ${BACKEND_NAME}.py
+function startBackend() {
+    ensureVenv
+
+    if [ ! -z ${BACKEND_FILE} ]; then
+        python ${BACKEND_FILE} $@
+    elif [ -e "${MY_DIR}/server.py" ]; then
+        python ${MY_DIR}/server.py $@
+    elif [ -e "${MY_DIR}/backend.py" ]; then
+        python ${MY_DIR}/backend.py $@
+    elif [ -e "${MY_DIR}/${BACKEND_NAME}.py" ]; then
+        python ${MY_DIR}/${BACKEND_NAME}.py $@
+    fi
+}
+
+# runUnittests discovers and runs python unittests
+#
+# You can specify a specific test file to use by setting TEST_FILE before calling runUnittests.
+# example:
+#
+# source ../common/libbackend.sh
+# TEST_FILE="${MY_DIR}/source/test.py"
+# runUnittests $@
+#
+# be default a file named test.py in the backends directory will be used
+function runUnittests() {
+    ensureVenv
+
+    if [ ! -z ${TEST_FILE} ]; then
+        testDir=$(dirname `realpath ${TEST_FILE}`)
+        testFile=$(basename ${TEST_FILE})
+        pushd ${testDir}
+        python -m unittest ${testFile}
+        popd
+    elif [ -f "${MY_DIR}/test.py" ]; then
+        pushd ${MY_DIR}
+        python -m unittest test.py
+        popd
+    else
+        echo "no tests defined for ${BACKEND_NAME}"
+    fi
+}
+
+##################################################################################
+# Below here are helper functions not intended to be used outside of the library #
+##################################################################################
+
+# checkTargets determines if the current BUILD_TYPE or BUILD_PROFILE is in a list of valid targets
+function checkTargets() {
+    # Collect all provided targets into a variable and...
+    targets=$@
+    # ...convert it into an array
+    declare -a targets=($targets)
+
+    for target in ${targets[@]}; do
+        if [ "x${BUILD_TYPE}" == "x${target}" ]; then
+            echo true
+            return 0
+        fi
+        if [ "x${BUILD_PROFILE}" == "x${target}" ]; then
+            echo true
+            return 0
+        fi
+    done
+    echo false
+}
+
+init
--- a/backend/python/common/template/Makefile
+++ b/backend/python/common/template/Makefile
@@ -0,0 +1,19 @@
+.DEFAULT_GOAL := install
+
+.PHONY: install
+install: protogen
+	bash install.sh
+
+.PHONY: protogen
+protogen: backend_pb2_grpc.py backend_pb2.py
+
+.PHONY: protogen-clean
+protogen-clean:
+	$(RM) backend_pb2_grpc.py backend_pb2.py
+
+backend_pb2_grpc.py backend_pb2.py:
+	python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
+
+.PHONY: clean
+clean: protogen-clean
+	rm -rf venv __pycache__
--- a/backend/python/common/template/backend.py
+++ b/backend/python/common/template/backend.py
@@ -0,0 +1,4 @@
+#!/usr/bin/env python3
+import grpc
+import backend_pb2
+import backend_pb2_grpc
--- a/backend/python/common/template/install.sh
+++ b/backend/python/common/template/install.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+set -e
+
+source $(dirname $0)/../common/libbackend.sh
+
+# This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links.
+# This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match.
+# We need uv to continue falling through to the pypi default index to find optimum[openvino] in the pypi index
+# the --upgrade actually allows us to *downgrade* torch to the version provided in the Intel pip index
+if [ "x${BUILD_PROFILE}" == "xintel" ]; then
+    EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
+fi
+
+installRequirements
--- a/backend/python/common/template/requirements-intel.txt
+++ b/backend/python/common/template/requirements-intel.txt
@@ -0,0 +1,4 @@
+--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
+intel-extension-for-pytorch
+torch
+optimum[openvino]
--- a/backend/python/common/template/requirements.txt
+++ b/backend/python/common/template/requirements.txt
@@ -0,0 +1,2 @@
+grpcio==1.63.0
+protobuf
--- a/backend/python/common/template/run.sh
+++ b/backend/python/common/template/run.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+source $(dirname $0)/../common/libbackend.sh
+
+startBackend $@
--- a/backend/python/common/template/test.sh
+++ b/backend/python/common/template/test.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+set -e
+
+source $(dirname $0)/../common/libbackend.sh
+
+runUnittests
--- a/backend/python/coqui/Makefile
+++ b/backend/python/coqui/Makefile
@@ -1,6 +1,6 @@
 .PHONY: coqui
 coqui: protogen
-	$(MAKE) -C ../common-env/transformers
+	bash install.sh

 .PHONY: run
 run: protogen
@@ -22,4 +22,8 @@ protogen-clean:
 	$(RM) backend_pb2_grpc.py backend_pb2.py

 backend_pb2_grpc.py backend_pb2.py:
-	python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
+	python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
+
+.PHONY: clean
+clean: protogen-clean
+	rm -rf venv __pycache__
--- a/backend/python/coqui/coqui_server.py
+++ b/backend/python/coqui/coqui_server.py
--- a/backend/python/coqui/install.sh
+++ b/backend/python/coqui/install.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+set -e
+
+source $(dirname $0)/../common/libbackend.sh
+
+# This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links.
+# This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match.
+# We need uv to continue falling through to the pypi default index to find optimum[openvino] in the pypi index
+# the --upgrade actually allows us to *downgrade* torch to the version provided in the Intel pip index
+if [ "x${BUILD_PROFILE}" == "xintel" ]; then
+    EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
+fi
+
+installRequirements
--- a/backend/python/coqui/requirements-intel.txt
+++ b/backend/python/coqui/requirements-intel.txt
@@ -0,0 +1,5 @@
+--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
+intel-extension-for-pytorch
+torch
+torchaudio
+optimum[openvino]
--- a/backend/python/coqui/requirements.txt
+++ b/backend/python/coqui/requirements.txt
@@ -0,0 +1,6 @@
+accelerate
+TTS==0.22.0
+grpcio==1.63.0
+protobuf
+certifi
+transformers
--- a/backend/python/coqui/run.sh
+++ b/backend/python/coqui/run.sh
@@ -1,14 +1,4 @@
 #!/bin/bash
+source $(dirname $0)/../common/libbackend.sh

-##
-## A bash script wrapper that runs the ttsbark server with conda
-
-export PATH=$PATH:/opt/conda/bin
-
-# Activate conda environment
-source activate transformers
-
-# get the directory where the bash script is located
-DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
-
-python $DIR/coqui_server.py $@
+startBackend $@
--- a/backend/python/coqui/test.py
+++ b/backend/python/coqui/test.py
@@ -18,7 +18,7 @@ class TestBackendServicer(unittest.TestCase):
        """
        This method sets up the gRPC service by starting the server
        """
-        self.service = subprocess.Popen(["python3", "coqui_server.py", "--addr", "localhost:50051"])
+        self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"])
        time.sleep(10)

    def tearDown(self) -> None:
--- a/backend/python/coqui/test.sh
+++ b/backend/python/coqui/test.sh
@@ -1,11 +1,6 @@
 #!/bin/bash
-##
-## A bash script wrapper that runs the bark server with conda
+set -e

-# Activate conda environment
-source activate transformers
+source $(dirname $0)/../common/libbackend.sh

-# get the directory where the bash script is located
-DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
-
-python -m unittest $DIR/test.py
+runUnittests
--- a/backend/python/diffusers/Makefile
+++ b/backend/python/diffusers/Makefile
@@ -13,8 +13,7 @@ endif

 .PHONY: diffusers
 diffusers: protogen
-	@echo "Installing $(CONDA_ENV_PATH)..."
-	bash install.sh $(CONDA_ENV_PATH)
+	bash install.sh

 .PHONY: run
 run: protogen
@@ -33,4 +32,8 @@ protogen-clean:
 	$(RM) backend_pb2_grpc.py backend_pb2.py

 backend_pb2_grpc.py backend_pb2.py:
-	python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
+	python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
+
+.PHONY: clean
+clean: protogen-clean
+	rm -rf venv __pycache__
--- a/backend/python/diffusers/backend_diffusers.py
+++ b/backend/python/diffusers/backend_diffusers.py
--- a/backend/python/diffusers/diffusers-rocm.yml
+++ b/backend/python/diffusers/diffusers-rocm.yml
@@ -1,65 +0,0 @@
-name: diffusers
-channels:
-  - defaults
-dependencies:
-  - _libgcc_mutex=0.1=main
-  - _openmp_mutex=5.1=1_gnu
-  - bzip2=1.0.8=h7b6447c_0
-  - ca-certificates=2023.08.22=h06a4308_0
-  - ld_impl_linux-64=2.38=h1181459_1
-  - libffi=3.4.4=h6a678d5_0
-  - libgcc-ng=11.2.0=h1234567_1
-  - libgomp=11.2.0=h1234567_1
-  - libstdcxx-ng=11.2.0=h1234567_1
-  - libuuid=1.41.5=h5eee18b_0
-  - ncurses=6.4=h6a678d5_0
-  - openssl=3.0.11=h7f8727e_2
-  - pip=23.2.1=py311h06a4308_0
-  - python=3.11.5=h955ad1f_0
-  - readline=8.2=h5eee18b_0
-  - setuptools=68.0.0=py311h06a4308_0
-  - sqlite=3.41.2=h5eee18b_0
-  - tk=8.6.12=h1ccaba5_0
-  - tzdata=2023c=h04d1e81_0
-  - wheel=0.41.2=py311h06a4308_0
-  - xz=5.4.2=h5eee18b_0
-  - zlib=1.2.13=h5eee18b_0
-  - pip:
-      - --pre
-      - --extra-index-url https://download.pytorch.org/whl/nightly/
-      - accelerate>=0.11.0
-      - certifi==2023.7.22
-      - charset-normalizer==3.3.0
-      - compel==2.0.2
-      - diffusers==0.24.0
-      - filelock==3.12.4
-      - fsspec==2023.9.2
-      - grpcio==1.59.0
-      - huggingface-hub>=0.19.4
-      - idna==3.4
-      - importlib-metadata==6.8.0
-      - jinja2==3.1.2
-      - markupsafe==2.1.3
-      - mpmath==1.3.0
-      - networkx==3.1
-      - numpy==1.26.0
-      - omegaconf
-      - packaging==23.2
-      - pillow==10.0.1
-      - protobuf==4.24.4
-      - psutil==5.9.5
-      - pyparsing==3.1.1
-      - pyyaml==6.0.1
-      - regex==2023.10.3
-      - requests==2.31.0
-      - safetensors==0.4.0
-      - sympy==1.12
-      - tqdm==4.66.1
-      - transformers>=4.25.1
-      - triton==2.1.0
-      - typing-extensions==4.8.0
-      - urllib3==2.0.6
-      - zipp==3.17.0
-      - torch
-      - opencv-python
-prefix: /opt/conda/envs/diffusers
--- a/backend/python/diffusers/diffusers.yml
+++ b/backend/python/diffusers/diffusers.yml
@@ -1,75 +0,0 @@
-name: diffusers
-channels:
-  - defaults
-dependencies:
-  - _libgcc_mutex=0.1=main
-  - _openmp_mutex=5.1=1_gnu
-  - bzip2=1.0.8=h7b6447c_0
-  - ca-certificates=2023.08.22=h06a4308_0
-  - ld_impl_linux-64=2.38=h1181459_1
-  - libffi=3.4.4=h6a678d5_0
-  - libgcc-ng=11.2.0=h1234567_1
-  - libgomp=11.2.0=h1234567_1
-  - libstdcxx-ng=11.2.0=h1234567_1
-  - libuuid=1.41.5=h5eee18b_0
-  - ncurses=6.4=h6a678d5_0
-  - openssl=3.0.11=h7f8727e_2
-  - pip=23.2.1=py311h06a4308_0
-  - python=3.11.5=h955ad1f_0
-  - readline=8.2=h5eee18b_0
-  - setuptools=68.0.0=py311h06a4308_0
-  - sqlite=3.41.2=h5eee18b_0
-  - tk=8.6.12=h1ccaba5_0
-  - tzdata=2023c=h04d1e81_0
-  - wheel=0.41.2=py311h06a4308_0
-  - xz=5.4.2=h5eee18b_0
-  - zlib=1.2.13=h5eee18b_0
-  - pip:
-      - accelerate>=0.11.0
-      - certifi==2023.7.22
-      - charset-normalizer==3.3.0
-      - compel==2.0.2
-      - diffusers==0.24.0
-      - filelock==3.12.4
-      - fsspec==2023.9.2
-      - grpcio==1.59.0
-      - huggingface-hub>=0.19.4
-      - idna==3.4
-      - importlib-metadata==6.8.0
-      - jinja2==3.1.2
-      - markupsafe==2.1.3
-      - mpmath==1.3.0
-      - networkx==3.1
-      - numpy==1.26.0
-      - nvidia-cublas-cu12==12.1.3.1
-      - nvidia-cuda-cupti-cu12==12.1.105
-      - nvidia-cuda-nvrtc-cu12==12.1.105
-      - nvidia-cuda-runtime-cu12==12.1.105
-      - nvidia-cudnn-cu12==8.9.2.26
-      - nvidia-cufft-cu12==11.0.2.54
-      - nvidia-curand-cu12==10.3.2.106
-      - nvidia-cusolver-cu12==11.4.5.107
-      - nvidia-cusparse-cu12==12.1.0.106
-      - nvidia-nccl-cu12==2.18.1
-      - nvidia-nvjitlink-cu12==12.2.140
-      - nvidia-nvtx-cu12==12.1.105
-      - omegaconf
-      - packaging==23.2
-      - pillow==10.0.1
-      - protobuf==4.24.4
-      - psutil==5.9.5
-      - pyparsing==3.1.1
-      - pyyaml==6.0.1
-      - regex==2023.10.3
-      - requests==2.31.0
-      - safetensors==0.4.0
-      - sympy==1.12
-      - torch==2.1.0
-      - tqdm==4.66.1
-      - transformers>=4.25.1
-      - triton==2.1.0
-      - typing-extensions==4.8.0
-      - urllib3==2.0.6
-      - zipp==3.17.0
-      - opencv-python
-prefix: /opt/conda/envs/diffusers
--- a/backend/python/diffusers/install.sh
+++ b/backend/python/diffusers/install.sh
@@ -1,50 +1,14 @@
 #!/bin/bash
-set -ex
+set -e

-SKIP_CONDA=${SKIP_CONDA:-0}
+source $(dirname $0)/../common/libbackend.sh

-# Check if environment exist
-conda_env_exists(){
-    ! conda list --name "${@}" >/dev/null 2>/dev/null
-}
-
-if [ $SKIP_CONDA -eq 1 ]; then
-    echo "Skipping conda environment installation"
-else
-    export PATH=$PATH:/opt/conda/bin
-    if conda_env_exists "diffusers" ; then
-        echo "Creating virtual environment..."
-        conda env create --name diffusers --file $1
-        echo "Virtual environment created."
-    else 
-        echo "Virtual environment already exists."
-    fi
+# This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links.
+# This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match.
+# We need uv to continue falling through to the pypi default index to find optimum[openvino] in the pypi index
+# the --upgrade actually allows us to *downgrade* torch to the version provided in the Intel pip index
+if [ "x${BUILD_PROFILE}" == "xintel" ]; then
+    EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
 fi

-if [ -d "/opt/intel" ]; then
-    # Intel GPU: If the directory exists, we assume we are using the Intel image
-    # https://github.com/intel/intel-extension-for-pytorch/issues/538
-    pip install torch==2.1.0a0 \
-                torchvision==0.16.0a0 \
-                torchaudio==2.1.0a0 \
-                intel-extension-for-pytorch==2.1.10+xpu \
-                --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
-    
-    pip install google-api-python-client \
-                grpcio \
-                grpcio-tools \
-                diffusers==0.24.0 \
-                transformers>=4.25.1 \
-                accelerate \
-                compel==2.0.2 \
-                Pillow
-fi
-
-if [ "$PIP_CACHE_PURGE" = true ] ; then
-    if [ $SKIP_CONDA -ne 1 ]; then
-        # Activate conda environment
-        source activate diffusers
-    fi
-
-    pip cache purge
-fi
+installRequirements
--- a/backend/python/diffusers/requirements-intel.txt
+++ b/backend/python/diffusers/requirements-intel.txt
@@ -0,0 +1,5 @@
+--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
+intel-extension-for-pytorch
+torch
+torchvision
+optimum[openvino]
--- a/backend/python/diffusers/requirements.txt
+++ b/backend/python/diffusers/requirements.txt
@@ -0,0 +1,10 @@
+accelerate
+compel
+diffusers
+grpcio==1.63.0
+opencv-python
+pillow
+protobuf
+torch
+transformers
+certifi
--- a/backend/python/diffusers/run.sh
+++ b/backend/python/diffusers/run.sh
@@ -1,19 +1,4 @@
 #!/bin/bash
+source $(dirname $0)/../common/libbackend.sh

-##
-## A bash script wrapper that runs the diffusers server with conda
-
-if [ -d "/opt/intel" ]; then
-    # Assumes we are using the Intel oneAPI container image
-    # https://github.com/intel/intel-extension-for-pytorch/issues/538
-    export XPU=1
-else
-    export PATH=$PATH:/opt/conda/bin
-    # Activate conda environment
-    source activate diffusers
-fi
-
-# get the directory where the bash script is located
-DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
-
-python $DIR/backend_diffusers.py $@
+startBackend $@
--- a/backend/python/diffusers/test.py
+++ b/backend/python/diffusers/test.py
@@ -18,7 +18,7 @@ class TestBackendServicer(unittest.TestCase):
        """
        This method sets up the gRPC service by starting the server
        """
-        self.service = subprocess.Popen(["python3", "backend_diffusers.py", "--addr", "localhost:50051"])
+        self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"])

    def tearDown(self) -> None:
        """
--- a/backend/python/diffusers/test.sh
+++ b/backend/python/diffusers/test.sh
@@ -1,14 +1,6 @@
 #!/bin/bash
+set -e

-##
-## A bash script wrapper that runs the diffusers server with conda
+source $(dirname $0)/../common/libbackend.sh

-export PATH=$PATH:/opt/conda/bin
-
-# Activate conda environment
-source activate diffusers
-
-# get the directory where the bash script is located
-DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
-
-python -m unittest $DIR/test.py
+runUnittests
--- a/backend/python/exllama/.gitignore
+++ b/backend/python/exllama/.gitignore
@@ -0,0 +1 @@
+source
--- a/backend/python/exllama/Makefile
+++ b/backend/python/exllama/Makefile
@@ -18,4 +18,8 @@ protogen-clean:
 	$(RM) backend_pb2_grpc.py backend_pb2.py

 backend_pb2_grpc.py backend_pb2.py:
-	python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
+	python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
+
+.PHONY: clean
+clean: protogen-clean
+	$(RM) -r venv source __pycache__
--- a/backend/python/exllama/backend.py
+++ b/backend/python/exllama/backend.py
@@ -14,9 +14,9 @@ import torch
 import torch.nn.functional as F
 from torch import version as torch_version

-from tokenizer import ExLlamaTokenizer
-from generator import ExLlamaGenerator
-from model import ExLlama, ExLlamaCache, ExLlamaConfig
+from source.tokenizer import ExLlamaTokenizer
+from source.generator import ExLlamaGenerator
+from source.model import ExLlama, ExLlamaCache, ExLlamaConfig

 _ONE_DAY_IN_SECONDS = 60 * 60 * 24

--- a/backend/python/exllama/exllama.yml
+++ b/backend/python/exllama/exllama.yml
@@ -1,56 +0,0 @@
-name: exllama
-channels:
-  - defaults
-dependencies:
-  - _libgcc_mutex=0.1=main
-  - _openmp_mutex=5.1=1_gnu
-  - bzip2=1.0.8=h7b6447c_0
-  - ca-certificates=2023.08.22=h06a4308_0
-  - ld_impl_linux-64=2.38=h1181459_1
-  - libffi=3.4.4=h6a678d5_0
-  - libgcc-ng=11.2.0=h1234567_1
-  - libgomp=11.2.0=h1234567_1
-  - libstdcxx-ng=11.2.0=h1234567_1
-  - libuuid=1.41.5=h5eee18b_0
-  - ncurses=6.4=h6a678d5_0
-  - openssl=3.0.11=h7f8727e_2
-  - pip=23.2.1=py311h06a4308_0
-  - python=3.11.5=h955ad1f_0
-  - readline=8.2=h5eee18b_0
-  - setuptools=68.0.0=py311h06a4308_0
-  - sqlite=3.41.2=h5eee18b_0
-  - tk=8.6.12=h1ccaba5_0
-  - tzdata=2023c=h04d1e81_0
-  - wheel=0.41.2=py311h06a4308_0
-  - xz=5.4.2=h5eee18b_0
-  - zlib=1.2.13=h5eee18b_0
-  - pip:
-      - filelock==3.12.4
-      - fsspec==2023.9.2
-      - grpcio==1.59.0
-      - jinja2==3.1.2
-      - markupsafe==2.1.3
-      - mpmath==1.3.0
-      - networkx==3.1
-      - ninja==1.11.1
-      - protobuf==4.24.4
-      - nvidia-cublas-cu12==12.1.3.1
-      - nvidia-cuda-cupti-cu12==12.1.105
-      - nvidia-cuda-nvrtc-cu12==12.1.105
-      - nvidia-cuda-runtime-cu12==12.1.105
-      - nvidia-cudnn-cu12==8.9.2.26
-      - nvidia-cufft-cu12==11.0.2.54
-      - nvidia-curand-cu12==10.3.2.106
-      - nvidia-cusolver-cu12==11.4.5.107
-      - nvidia-cusparse-cu12==12.1.0.106
-      - nvidia-nccl-cu12==2.18.1
-      - nvidia-nvjitlink-cu12==12.2.140
-      - nvidia-nvtx-cu12==12.1.105
-      - safetensors==0.3.2
-      - sentencepiece==0.1.99
-      - sympy==1.12
-      - torch==2.1.0
-      - triton==2.1.0
-      - typing-extensions==4.8.0
-      - numpy
-prefix: /opt/conda/envs/exllama
--- a/backend/python/exllama/install.sh
+++ b/backend/python/exllama/install.sh
@@ -1,32 +1,13 @@
 #!/bin/bash
-set -ex
+set -e

-export PATH=$PATH:/opt/conda/bin
+LIMIT_TARGETS="cublas"

-if [ "$BUILD_TYPE" != "cublas" ]; then
-    echo "[exllama] Attention!!! Nvidia GPU is required - skipping installation"
-    exit 0
-fi
+source $(dirname $0)/../common/libbackend.sh

-# Check if environment exist
-conda_env_exists(){
-    ! conda list --name "${@}" >/dev/null 2>/dev/null
-}
+installRequirements

-if conda_env_exists "exllama" ; then
-    echo "Creating virtual environment..."
-    conda env create --name exllama --file $1
-    echo "Virtual environment created."
-else
-    echo "Virtual environment already exists."
-fi
+git clone https://github.com/turboderp/exllama $MY_DIR/source
+uv pip install ${BUILD_ISOLATION_FLAG} --requirement ${MY_DIR}/source/requirements.txt

-source activate exllama
-
-git clone https://github.com/turboderp/exllama $CONDA_PREFIX/exllama && pushd $CONDA_PREFIX/exllama && pip install -r requirements.txt && popd
-
-cp -rfv $CONDA_PREFIX/exllama/* ./
-
-if [ "$PIP_CACHE_PURGE" = true ] ; then
-    pip cache purge
-fi
+cp -v ./*py $MY_DIR/source/
--- a/backend/python/exllama/requirements.txt
+++ b/backend/python/exllama/requirements.txt
@@ -0,0 +1,6 @@
+grpcio==1.63.0
+protobuf
+torch
+transformers
+certifi
+setuptools
--- a/backend/python/exllama/run.sh
+++ b/backend/python/exllama/run.sh
@@ -1,15 +1,7 @@
 #!/bin/bash
+LIMIT_TARGETS="cublas"
+BACKEND_FILE="${MY_DIR}/source/backend.py"

-##
-## A bash script wrapper that runs the exllama server with conda
-export PATH=$PATH:/opt/conda/bin
+source $(dirname $0)/../common/libbackend.sh

-# Activate conda environment
-source activate exllama
-
-# get the directory where the bash script is located
-DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
-
-cd $DIR
-
-python $DIR/exllama.py $@
+startBackend $@
--- a/backend/python/exllama/test.sh
+++ b/backend/python/exllama/test.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+set -e
+
+source $(dirname $0)/../common/libbackend.sh
+
+runUnittests
--- a/backend/python/exllama2/.gitignore
+++ b/backend/python/exllama2/.gitignore
@@ -0,0 +1 @@
+source
--- a/backend/python/exllama2/Makefile
+++ b/backend/python/exllama2/Makefile
@@ -1,6 +1,5 @@
 .PHONY: exllama2
 exllama2: protogen
-	$(MAKE) -C ../common-env/transformers
 	bash install.sh

 .PHONY: run
@@ -17,4 +16,8 @@ protogen-clean:
 	$(RM) backend_pb2_grpc.py backend_pb2.py

 backend_pb2_grpc.py backend_pb2.py:
-	python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
+	python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
+
+.PHONY: clean
+clean: protogen-clean
+	$(RM) -r venv source __pycache__
--- a/backend/python/exllama2/exllama2_backend.py
+++ b/backend/python/exllama2/exllama2_backend.py
--- a/backend/python/exllama2/exllama2.yml
+++ b/backend/python/exllama2/exllama2.yml
@@ -1,57 +0,0 @@
-name: exllama2
-channels:
-  - defaults
-dependencies:
-  - _libgcc_mutex=0.1=main
-  - _openmp_mutex=5.1=1_gnu
-  - bzip2=1.0.8=h7b6447c_0
-  - ca-certificates=2023.08.22=h06a4308_0
-  - ld_impl_linux-64=2.38=h1181459_1
-  - libffi=3.4.4=h6a678d5_0
-  - libgcc-ng=11.2.0=h1234567_1
-  - libgomp=11.2.0=h1234567_1
-  - libstdcxx-ng=11.2.0=h1234567_1
-  - libuuid=1.41.5=h5eee18b_0
-  - ncurses=6.4=h6a678d5_0
-  - openssl=3.0.11=h7f8727e_2
-  - pip=23.2.1=py311h06a4308_0
-  - python=3.11.5=h955ad1f_0
-  - readline=8.2=h5eee18b_0
-  - setuptools=68.0.0=py311h06a4308_0
-  - sqlite=3.41.2=h5eee18b_0
-  - tk=8.6.12=h1ccaba5_0
-  - tzdata=2023c=h04d1e81_0
-  - wheel=0.41.2=py311h06a4308_0
-  - xz=5.4.2=h5eee18b_0
-  - zlib=1.2.13=h5eee18b_0
-  - pip:
-      - filelock==3.12.4
-      - fsspec==2023.9.2
-      - grpcio==1.59.0
-      - markupsafe==2.1.3
-      - mpmath==1.3.0
-      - networkx==3.1
-      - protobuf==4.24.4
-      - nvidia-cublas-cu12==12.1.3.1
-      - nvidia-cuda-cupti-cu12==12.1.105
-      - nvidia-cuda-nvrtc-cu12==12.1.105
-      - nvidia-cuda-runtime-cu12==12.1.105
-      - nvidia-cudnn-cu12==8.9.2.26
-      - nvidia-cufft-cu12==11.0.2.54
-      - nvidia-curand-cu12==10.3.2.106
-      - nvidia-cusolver-cu12==11.4.5.107
-      - nvidia-cusparse-cu12==12.1.0.106
-      - nvidia-nccl-cu12==2.18.1
-      - nvidia-nvjitlink-cu12==12.2.140
-      - nvidia-nvtx-cu12==12.1.105
-      - pandas
-      - numpy
-      - ninja
-      - fastparquet
-      - torch>=2.1.0
-      - safetensors>=0.3.2
-      - sentencepiece>=0.1.97
-      - pygments
-      - websockets
-      - regex
-prefix: /opt/conda/envs/exllama2
--- a/backend/python/exllama2/install.sh
+++ b/backend/python/exllama2/install.sh
@@ -1,32 +1,16 @@
 #!/bin/bash
 set -e
-##
-## A bash script installs the required dependencies of VALL-E-X and prepares the environment
-export SHA=c0ddebaaaf8ffd1b3529c2bb654e650bce2f790f

-if [ "$BUILD_TYPE" != "cublas" ]; then
-    echo "[exllamav2] Attention!!! Nvidia GPU is required - skipping installation"
-    exit 0
-fi
+LIMIT_TARGETS="cublas"
+EXTRA_PIP_INSTALL_FLAGS="--no-build-isolation"
+EXLLAMA2_VERSION=c0ddebaaaf8ffd1b3529c2bb654e650bce2f790f

-export PATH=$PATH:/opt/conda/bin
-source activate transformers
+source $(dirname $0)/../common/libbackend.sh

-echo $CONDA_PREFIX
+installRequirements

-git clone https://github.com/turboderp/exllamav2 $CONDA_PREFIX/exllamav2
+git clone https://github.com/turboderp/exllamav2 $MY_DIR/source
+pushd ${MY_DIR}/source && git checkout -b build ${EXLLAMA2_VERSION} && popd

-pushd $CONDA_PREFIX/exllamav2
-
-git checkout -b build $SHA
-
-# TODO: this needs to be pinned within the conda environments
-pip install -r requirements.txt
-
-popd
-
-cp -rfv $CONDA_PREFIX/exllamav2/* ./  
-
-if [ "$PIP_CACHE_PURGE" = true ] ; then
-    pip cache purge
-fi
+# This installs exllamav2 in JIT mode so it will compile the appropriate torch extension at runtime
+EXLLAMA_NOCOMPILE= uv pip install ${EXTRA_PIP_INSTALL_FLAGS} ${MY_DIR}/source/
--- a/backend/python/exllama2/requirements-install.txt
+++ b/backend/python/exllama2/requirements-install.txt
@@ -0,0 +1,4 @@
+# This is here to trigger the install script to add --no-build-isolation to the uv pip install commands
+# exllama2 does not specify it's build requirements per PEP517, so we need to provide some things ourselves
+wheel
+setuptools
--- a/backend/python/exllama2/requirements.txt
+++ b/backend/python/exllama2/requirements.txt
@@ -0,0 +1,7 @@
+accelerate
+grpcio==1.63.0
+protobuf
+certifi
+torch
+wheel
+setuptools
--- a/backend/python/exllama2/run.sh
+++ b/backend/python/exllama2/run.sh
@@ -1,16 +1,6 @@
 #!/bin/bash
+LIMIT_TARGETS="cublas"

-##
-## A bash script wrapper that runs the exllama server with conda
+source $(dirname $0)/../common/libbackend.sh

-export PATH=$PATH:/opt/conda/bin
-
-# Activate conda environment
-source activate transformers
-
-# get the directory where the bash script is located
-DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
-
-cd $DIR
-
-python $DIR/exllama2_backend.py $@
+startBackend $@
--- a/backend/python/exllama2/test.sh
+++ b/backend/python/exllama2/test.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+set -e
+
+source $(dirname $0)/../common/libbackend.sh
+
+runUnittests
--- a/backend/python/mamba/Makefile
+++ b/backend/python/mamba/Makefile
@@ -1,7 +1,6 @@
 .PHONY: mamba
 mamba: protogen
-	$(MAKE) -C ../common-env/transformers
-	bash install.sh
+	bash install.sh 

 .PHONY: run
 run: protogen
@@ -23,4 +22,8 @@ protogen-clean:
 	$(RM) backend_pb2_grpc.py backend_pb2.py

 backend_pb2_grpc.py backend_pb2.py:
-	python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
+	python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
+
+.PHONY: clean
+clean: protogen-clean
+	$(RM) -r venv __pycache__
--- a/backend/python/mamba/backend_mamba.py
+++ b/backend/python/mamba/backend_mamba.py
--- a/backend/python/mamba/install.sh
+++ b/backend/python/mamba/install.sh
@@ -1,22 +1,9 @@
 #!/bin/bash
 set -e
-##
-## A bash script installs the required dependencies of VALL-E-X and prepares the environment

-if [ "$BUILD_TYPE" != "cublas" ]; then
-    echo "[mamba] Attention!!! nvcc is required - skipping installation"
-    exit 0
-fi
+LIMIT_TARGETS="cublas"
+EXTRA_PIP_INSTALL_FLAGS="--no-build-isolation"

-export PATH=$PATH:/opt/conda/bin
+source $(dirname $0)/../common/libbackend.sh

-# Activate conda environment
-source activate transformers
-
-echo $CONDA_PREFIX
-
-pip install causal-conv1d==1.0.0 mamba-ssm==1.0.1
-
-if [ "$PIP_CACHE_PURGE" = true ] ; then
-    pip cache purge
-fi
+installRequirements
--- a/backend/python/mamba/requirements-install.txt
+++ b/backend/python/mamba/requirements-install.txt
@@ -0,0 +1,7 @@
+# mabma does not specify it's build dependencies per PEP517, so we need to disable build isolation
+# this also means that we need to install the basic build dependencies into the venv ourselves
+# https://github.com/Dao-AILab/causal-conv1d/issues/24
+packaging
+setuptools
+wheel
+torch==2.2.0
--- a/backend/python/mamba/requirements.txt
+++ b/backend/python/mamba/requirements.txt
@@ -0,0 +1,6 @@
+causal-conv1d==1.2.0.post2
+mamba-ssm==1.2.0.post1
+grpcio==1.63.0
+protobuf
+certifi
+transformers
--- a/backend/python/mamba/run.sh
+++ b/backend/python/mamba/run.sh
@@ -1,14 +1,6 @@
 #!/bin/bash
+LIMIT_TARGETS="cublas"

-##
-## A bash script wrapper that runs the diffusers server with conda
+source $(dirname $0)/../common/libbackend.sh

-export PATH=$PATH:/opt/conda/bin
-
-# Activate conda environment
-source activate transformers
-
-# get the directory where the bash script is located
-DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
-
-python $DIR/backend_mamba.py $@
+startBackend $@
--- a/backend/python/mamba/test_backend_mamba.py
+++ b/backend/python/mamba/test_backend_mamba.py
@@ -20,7 +20,7 @@ class TestBackendServicer(unittest.TestCase):
    This class contains methods to test the startup and shutdown of the gRPC service.
    """
    def setUp(self):
-        self.service = subprocess.Popen(["python", "backend_vllm.py", "--addr", "localhost:50051"])
+        self.service = subprocess.Popen(["python", "backend.py", "--addr", "localhost:50051"])
        time.sleep(10)

    def tearDown(self) -> None:
--- a/Show More
+++ b/Show More