fix(ci-gmock): try again by injecting gmock in CMakefiles

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-05-24 08:38:02 -04:00 · 2024-07-10 23:04:19 +02:00
202 changed files with 2071 additions and 7857 deletions
--- a/.github/bump_deps.sh
+++ b/.github/bump_deps.sh
@@ -6,17 +6,4 @@ VAR=$3

 LAST_COMMIT=$(curl -s -H "Accept: application/vnd.github.VERSION.sha" "https://api.github.com/repos/$REPO/commits/$BRANCH")

-# Read $VAR from Makefile (only first match)
-set +e
-CURRENT_COMMIT="$(grep -m1 "^$VAR?=" Makefile | cut -d'=' -f2)"
-set -e
-
 sed -i Makefile -e "s/$VAR?=.*/$VAR?=$LAST_COMMIT/"
-
-if [ -z "$CURRENT_COMMIT" ]; then
-    echo "Could not find $VAR in Makefile."
-    exit 0
-fi
-
-echo "Updated $VAR from $CURRENT_COMMIT to $LAST_COMMIT." > "$REPO_message.txt"
-echo "https://github.com/$REPO/compare/$CURRENT_COMMIT..$LAST_COMMIT" >> "$REPO_message.txt"
--- a/.github/check_and_update.py
+++ b/.github/check_and_update.py
@@ -4,7 +4,7 @@ import requests
 import sys
 import os

-uri = sys.argv[1]
+uri = sys.argv[0]
 file_name = uri.split('/')[-1]

 # Function to parse the URI and determine download method
@@ -36,7 +36,6 @@ def manual_safety_check_hf(repo_id):
 download_type, repo_id_or_url = parse_uri(uri)

 new_checksum =  None
-file_path = None

 # Decide download method based on URI type
 if download_type == 'huggingface':
--- a/.github/checksum_checker.sh
+++ b/.github/checksum_checker.sh
@@ -14,10 +14,10 @@ function check_and_update_checksum() {
    idx="$5"

    # Download the file and calculate new checksum using Python
-    new_checksum=$(python3 ./.github/check_and_update.py $uri)
+    new_checksum=$(python3 ./check_and_update.py $uri)
    result=$?

-    if [[ $result -eq 5 ]]; then
+    if [[ result -eq 5]]; then
        echo "Contaminated entry detected, deleting entry for $model_name..."
        yq eval -i "del([$idx])" "$input_yaml"
        return
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -1,10 +1,6 @@
 # https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
 version: 2
 updates:
-  - package-ecosystem: "gitsubmodule"
-    directory: "/"
-    schedule:
-      interval: "weekly"
  - package-ecosystem: "gomod"
    directory: "/"
    schedule:
@@ -27,111 +23,3 @@ updates:
    schedule:
      # Check for updates to GitHub Actions every weekday
      interval: "weekly"
-  - package-ecosystem: "pip"
-    directory: "/backend/python/autogptq"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "pip"
-    directory: "/backend/python/bark"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "pip"
-    directory: "/backend/python/common/template"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "pip"
-    directory: "/backend/python/coqui"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "pip"
-    directory: "/backend/python/diffusers"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "pip"
-    directory: "/backend/python/exllama"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "pip"
-    directory: "/backend/python/exllama2"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "pip"
-    directory: "/backend/python/mamba"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "pip"
-    directory: "/backend/python/openvoice"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "pip"
-    directory: "/backend/python/parler-tts"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "pip"
-    directory: "/backend/python/petals"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "pip"
-    directory: "/backend/python/rerankers"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "pip"
-    directory: "/backend/python/sentencetransformers"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "pip"
-    directory: "/backend/python/transformers"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "pip"
-    directory: "/backend/python/transformers-musicgen"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "pip"
-    directory: "/backend/python/vall-e-x"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "pip"
-    directory: "/backend/python/vllm"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "pip"
-    directory: "/examples/chainlit"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "pip"
-    directory: "/examples/functions"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "pip"
-    directory: "/examples/langchain/langchainpy-localai-example"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "pip"
-    directory: "/examples/langchain-chroma"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "pip"
-    directory: "/examples/streamlit-bot"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "docker"
-    directory: "/examples/k8sgpt"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "docker"
-    directory: "/examples/kubernetes"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "docker"
-    directory: "/examples/langchain"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "gomod"
-    directory: "/examples/semantic-todo"
-    schedule:
-      interval: "weekly"
-  - package-ecosystem: "docker"
-    directory: "/examples/telegram-bot"
-    schedule:
-      interval: "weekly"
--- a/.github/workflows/bump_deps.yaml
+++ b/.github/workflows/bump_deps.yaml
@@ -9,6 +9,9 @@ jobs:
      fail-fast: false
      matrix:
        include:
+          - repository: "go-skynet/go-llama.cpp"
+            variable: "GOLLAMA_VERSION"
+            branch: "master"
          - repository: "ggerganov/llama.cpp"
            variable: "CPPLLAMA_VERSION"
            branch: "master"
@@ -27,6 +30,9 @@ jobs:
          - repository: "go-skynet/bloomz.cpp"
            variable: "BLOOMZ_VERSION"
            branch: "main"
+          - repository: "nomic-ai/gpt4all"
+            variable: "GPT4ALL_VERSION"
+            branch: "main"
          - repository: "mudler/go-ggllm.cpp"
            variable: "GOGGLLM_VERSION"
            branch: "master"
@@ -40,23 +46,17 @@ jobs:
    steps:
      - uses: actions/checkout@v4
      - name: Bump dependencies 🔧
-        id: bump
        run: |
          bash .github/bump_deps.sh ${{ matrix.repository }} ${{ matrix.branch }} ${{ matrix.variable }}
-          {
-            echo 'message<<EOF'
-            cat "${{ matrix.repository }}_message.txt"
-            echo EOF
-          } >> "$GITHUB_OUTPUT"
      - name: Create Pull Request
        uses: peter-evans/create-pull-request@v6
        with:
          token: ${{ secrets.UPDATE_BOT_TOKEN }}
          push-to-fork: ci-forks/LocalAI
          commit-message: ':arrow_up: Update ${{ matrix.repository }}'
-          title: 'chore: :arrow_up: Update ${{ matrix.repository }}'
+          title: ':arrow_up: Update ${{ matrix.repository }}'
          branch: "update/${{ matrix.variable }}"
-          body:  ${{ steps.bump.outputs.message }}
+          body: Bump of ${{ matrix.repository }} version
          signoff: true


--- a/.github/workflows/bump_docs.yaml
+++ b/.github/workflows/bump_docs.yaml
@@ -22,7 +22,7 @@ jobs:
          token: ${{ secrets.UPDATE_BOT_TOKEN }}
          push-to-fork: ci-forks/LocalAI
          commit-message: ':arrow_up: Update docs version ${{ matrix.repository }}'
-          title: 'docs: :arrow_up: update docs version ${{ matrix.repository }}'
+          title: ':arrow_up: Update docs version ${{ matrix.repository }}'
          branch: "update/docs"
          body: Bump of ${{ matrix.repository }} version inside docs
          signoff: true
--- a/.github/workflows/checksum_checker.yaml
+++ b/.github/workflows/checksum_checker.yaml
@@ -20,12 +20,12 @@ jobs:
        run: |
          sudo apt-get update
          sudo apt-get install -y pip wget
-          sudo pip install --upgrade pip
+          sudo pip install --upgrade pip 
          pip install huggingface_hub
      - name: 'Setup yq'
        uses: dcarbone/install-yq-action@v1.1.1
        with:
-          version: 'v4.44.2'
+          version: 'v4.43.1'
          download-compressed: true
          force: true

@@ -41,7 +41,7 @@ jobs:
          token: ${{ secrets.UPDATE_BOT_TOKEN }}
          push-to-fork: ci-forks/LocalAI
          commit-message: ':arrow_up: Checksum updates in gallery/index.yaml'
-          title: 'chore(model-gallery): :arrow_up: update checksum'
+          title: 'models(gallery): :arrow_up: update checksum'
          branch: "update/checksum"
          body: Updating checksums in gallery/index.yaml
          signoff: true
--- a/.github/workflows/dependabot_auto.yml
+++ b/.github/workflows/dependabot_auto.yml
@@ -14,7 +14,7 @@ jobs:
    steps:
      - name: Dependabot metadata
        id: metadata
-        uses: dependabot/fetch-metadata@v2.2.0
+        uses: dependabot/fetch-metadata@v2.1.0
        with:
          github-token: "${{ secrets.GITHUB_TOKEN }}"
          skip-commit-verification: true
--- a/.github/workflows/disabled/comment-pr.yaml
+++ b/.github/workflows/disabled/comment-pr.yaml
@@ -1,83 +0,0 @@
-name: Comment PRs
-on:
-  pull_request_target:
-
-jobs:
-  comment-pr:
-    env:
-        MODEL_NAME: hermes-2-theta-llama-3-8b
-    runs-on: ubuntu-latest
-    steps:
-    - name: Checkout code
-      uses: actions/checkout@v3
-      with:
-        ref: "${{ github.event.pull_request.merge_commit_sha }}"
-        fetch-depth: 0 # needed to checkout all branches for this Action to work
-    - uses: mudler/localai-github-action@v1
-      with:
-        model: 'hermes-2-theta-llama-3-8b' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
-      # Check the PR diff using the current branch and the base branch of the PR
-    - uses: GrantBirki/git-diff-action@v2.7.0
-      id: git-diff-action
-      with:
-            json_diff_file_output: diff.json
-            raw_diff_file_output: diff.txt
-            file_output_only: "true"
-            base_branch: ${{ github.event.pull_request.base.sha }}
-    - name: Show diff
-      env:
-        DIFF: ${{ steps.git-diff-action.outputs.raw-diff-path }}
-      run: |
-            cat $DIFF
-    - name: Summarize
-      env:
-        DIFF: ${{ steps.git-diff-action.outputs.raw-diff-path }}
-      id: summarize
-      run: |
-            input="$(cat $DIFF)"
-
-            # Define the LocalAI API endpoint
-            API_URL="http://localhost:8080/chat/completions"
-
-            # Create a JSON payload using jq to handle special characters
-            json_payload=$(jq -n --arg input "$input" '{
-            model: "'$MODEL_NAME'",
-            messages: [
-                {
-                role: "system",
-                content: "You are LocalAI-bot in Github that helps understanding PRs and assess complexity. Explain what has changed in this PR diff and why"
-                },
-                {
-                role: "user",
-                content: $input
-                }
-            ]
-            }')
-
-            # Send the request to LocalAI
-            response=$(curl -s -X POST $API_URL \
-            -H "Content-Type: application/json" \
-            -d "$json_payload")
-
-            # Extract the summary from the response
-            summary="$(echo $response | jq -r '.choices[0].message.content')"
-
-            # Print the summary
-            #  -H "Authorization: Bearer $API_KEY" \
-            echo "Summary:"
-            echo "$summary"
-            echo "payload sent"
-            echo "$json_payload"
-            {
-                echo 'message<<EOF'
-                echo "$summary"
-                echo EOF
-              } >> "$GITHUB_OUTPUT"
-            docker logs --tail 10 local-ai
-    - uses: mshick/add-pr-comment@v2
-      if: always()
-      with:
-          repo-token: ${{ secrets.UPDATE_BOT_TOKEN }}
-          message: ${{ steps.summarize.outputs.message }}
-          message-failure: |
-            Uh oh! Could not analyze this PR, maybe it's too big?
--- a/.github/workflows/generate_grpc_cache.yaml
+++ b/.github/workflows/generate_grpc_cache.yaml
@@ -75,7 +75,7 @@ jobs:
        uses: actions/checkout@v4

      - name: Cache GRPC
-        uses: docker/build-push-action@v6
+        uses: docker/build-push-action@v5
        with:
          builder: ${{ steps.buildx.outputs.name }}
          # The build-args MUST be an EXACT match between the image cache and other workflow steps that want to use that cache.
@@ -84,11 +84,11 @@ jobs:
          build-args: |
            GRPC_BASE_IMAGE=${{ matrix.grpc-base-image }}
            GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
-            GRPC_VERSION=v1.65.0
+            GRPC_VERSION=v1.64.0
          context: .
          file: ./Dockerfile
          cache-to: type=gha,ignore-error=true
          cache-from: type=gha
          target: grpc
          platforms: ${{ matrix.platforms }}
-          push: false
+          push: false
--- a/.github/workflows/generate_intel_image.yaml
+++ b/.github/workflows/generate_intel_image.yaml
@@ -15,7 +15,7 @@ jobs:
    strategy:
      matrix:
        include:
-          - base-image: intel/oneapi-basekit:2024.2.0-devel-ubuntu22.04
+          - base-image: intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04
            runs-on: 'ubuntu-latest'
            platforms: 'linux/amd64'
    runs-on: ${{matrix.runs-on}}
@@ -46,7 +46,7 @@ jobs:
        uses: actions/checkout@v4

      - name: Cache Intel images
-        uses: docker/build-push-action@v6
+        uses: docker/build-push-action@v5
        with:
          builder: ${{ steps.buildx.outputs.name }}
          build-args: |
--- a/.github/workflows/image-pr.yml
+++ b/.github/workflows/image-pr.yml
@@ -35,19 +35,18 @@ jobs:
      max-parallel: ${{ github.event_name != 'pull_request' && 4 || 8 }}
      matrix:
        include:
-          # This is basically covered by the AIO test
-          # - build-type: ''
-          #   platforms: 'linux/amd64'
-          #   tag-latest: 'false'
-          #   tag-suffix: '-ffmpeg'
-          #   ffmpeg: 'true'
-          #   image-type: 'extras'
-          #   runs-on: 'arc-runner-set'
-          #   base-image: "ubuntu:22.04"
-          #   makeflags: "--jobs=3 --output-sync=target"
+          - build-type: ''
+            platforms: 'linux/amd64'
+            tag-latest: 'false'
+            tag-suffix: '-ffmpeg'
+            ffmpeg: 'true'
+            image-type: 'extras'
+            runs-on: 'arc-runner-set'
+            base-image: "ubuntu:22.04"
+            makeflags: "--jobs=3 --output-sync=target"
          - build-type: 'cublas'
            cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "4"
            platforms: 'linux/amd64'
            tag-latest: 'false'
            tag-suffix: '-cublas-cuda12-ffmpeg'
@@ -56,85 +55,85 @@ jobs:
            runs-on: 'arc-runner-set'
            base-image: "ubuntu:22.04"
            makeflags: "--jobs=3 --output-sync=target"
-          # - build-type: 'hipblas'
-          #   platforms: 'linux/amd64'
-          #   tag-latest: 'false'
-          #   tag-suffix: '-hipblas'
-          #   ffmpeg: 'false'
-          #   image-type: 'extras'
-          #   base-image: "rocm/dev-ubuntu-22.04:6.1"
-          #   grpc-base-image: "ubuntu:22.04"
-          #   runs-on: 'arc-runner-set'
-          #   makeflags: "--jobs=3 --output-sync=target"
-          # - build-type: 'sycl_f16'
-          #   platforms: 'linux/amd64'
-          #   tag-latest: 'false'
-          #   base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
-          #   grpc-base-image: "ubuntu:22.04"
-          #   tag-suffix: 'sycl-f16-ffmpeg'
-          #   ffmpeg: 'true'
-          #   image-type: 'extras'
-          #   runs-on: 'arc-runner-set'
-          #   makeflags: "--jobs=3 --output-sync=target"
-  # core-image-build:
-  #   uses: ./.github/workflows/image_build.yml
-  #   with:
-  #     tag-latest: ${{ matrix.tag-latest }}
-  #     tag-suffix: ${{ matrix.tag-suffix }}
-  #     ffmpeg: ${{ matrix.ffmpeg }}
-  #     image-type: ${{ matrix.image-type }}
-  #     build-type: ${{ matrix.build-type }}
-  #     cuda-major-version: ${{ matrix.cuda-major-version }}
-  #     cuda-minor-version: ${{ matrix.cuda-minor-version }}
-  #     platforms: ${{ matrix.platforms }}
-  #     runs-on: ${{ matrix.runs-on }}
-  #     base-image: ${{ matrix.base-image }}
-  #     grpc-base-image: ${{ matrix.grpc-base-image }}
-  #     makeflags: ${{ matrix.makeflags }}
-  #   secrets:
-  #     dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
-  #     dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
-  #     quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
-  #     quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
-  #   strategy:
-  #     matrix:
-  #       include:
-          # - build-type: ''
-          #   platforms: 'linux/amd64'
-          #   tag-latest: 'false'
-          #   tag-suffix: '-ffmpeg-core'
-          #   ffmpeg: 'true'
-          #   image-type: 'core'
-          #   runs-on: 'ubuntu-latest'
-          #   base-image: "ubuntu:22.04"
-          #   makeflags: "--jobs=4 --output-sync=target"
-          # - build-type: 'sycl_f16'
-          #   platforms: 'linux/amd64'
-          #   tag-latest: 'false'
-          #   base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
-          #   grpc-base-image: "ubuntu:22.04"
-          #   tag-suffix: 'sycl-f16-ffmpeg-core'
-          #   ffmpeg: 'true'
-          #   image-type: 'core'
-          #   runs-on: 'arc-runner-set'
-          #   makeflags: "--jobs=3 --output-sync=target"
-          # - build-type: 'cublas'
-          #   cuda-major-version: "12"
-          #   cuda-minor-version: "0"
-          #   platforms: 'linux/amd64'
-          #   tag-latest: 'false'
-          #   tag-suffix: '-cublas-cuda12-ffmpeg-core'
-          #   ffmpeg: 'true'
-          #   image-type: 'core'
-          #   runs-on: 'ubuntu-latest'
-          #   base-image: "ubuntu:22.04"
-          #   makeflags: "--jobs=4 --output-sync=target"
-          # - build-type: 'vulkan'
-          #   platforms: 'linux/amd64'
-          #   tag-latest: 'false'
-          #   tag-suffix: '-vulkan-ffmpeg-core'
-          #   ffmpeg: 'true'
-          #   image-type: 'core'
-          #   runs-on: 'ubuntu-latest'
-          #   base-image: "ubuntu:22.04"
-          #   makeflags: "--jobs=4 --output-sync=target"
+          - build-type: 'hipblas'
+            platforms: 'linux/amd64'
+            tag-latest: 'false'
+            tag-suffix: '-hipblas'
+            ffmpeg: 'false'
+            image-type: 'extras'
+            base-image: "rocm/dev-ubuntu-22.04:6.1"
+            grpc-base-image: "ubuntu:22.04"
+            runs-on: 'arc-runner-set'
+            makeflags: "--jobs=3 --output-sync=target"
+          - build-type: 'sycl_f16'
+            platforms: 'linux/amd64'
+            tag-latest: 'false'
+            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+            grpc-base-image: "ubuntu:22.04"
+            tag-suffix: 'sycl-f16-ffmpeg'
+            ffmpeg: 'true'
+            image-type: 'extras'
+            runs-on: 'arc-runner-set'
+            makeflags: "--jobs=3 --output-sync=target"
+  core-image-build:
+    uses: ./.github/workflows/image_build.yml
+    with:
+      tag-latest: ${{ matrix.tag-latest }}
+      tag-suffix: ${{ matrix.tag-suffix }}
+      ffmpeg: ${{ matrix.ffmpeg }}
+      image-type: ${{ matrix.image-type }}
+      build-type: ${{ matrix.build-type }}
+      cuda-major-version: ${{ matrix.cuda-major-version }}
+      cuda-minor-version: ${{ matrix.cuda-minor-version }}
+      platforms: ${{ matrix.platforms }}
+      runs-on: ${{ matrix.runs-on }}
+      base-image: ${{ matrix.base-image }}
+      grpc-base-image: ${{ matrix.grpc-base-image }}
+      makeflags: ${{ matrix.makeflags }}
+    secrets:
+      dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
+      dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
+      quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
+      quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
+    strategy:
+      matrix:
+        include:
+          - build-type: ''
+            platforms: 'linux/amd64'
+            tag-latest: 'false'
+            tag-suffix: '-ffmpeg-core'
+            ffmpeg: 'true'
+            image-type: 'core'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:22.04"
+            makeflags: "--jobs=4 --output-sync=target"
+          - build-type: 'sycl_f16'
+            platforms: 'linux/amd64'
+            tag-latest: 'false'
+            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+            grpc-base-image: "ubuntu:22.04"
+            tag-suffix: 'sycl-f16-ffmpeg-core'
+            ffmpeg: 'true'
+            image-type: 'core'
+            runs-on: 'arc-runner-set'
+            makeflags: "--jobs=3 --output-sync=target"
+          - build-type: 'cublas'
+            cuda-major-version: "12"
+            cuda-minor-version: "4"
+            platforms: 'linux/amd64'
+            tag-latest: 'false'
+            tag-suffix: '-cublas-cuda12-ffmpeg-core'
+            ffmpeg: 'true'
+            image-type: 'core'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:22.04"
+            makeflags: "--jobs=4 --output-sync=target"
+          - build-type: 'vulkan'
+            platforms: 'linux/amd64'
+            tag-latest: 'false'
+            tag-suffix: '-vulkan-ffmpeg-core'
+            ffmpeg: 'true'
+            image-type: 'core'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:22.04"
+            makeflags: "--jobs=4 --output-sync=target"
--- a/.github/workflows/image.yml
+++ b/.github/workflows/image.yml
@@ -75,7 +75,7 @@ jobs:
            makeflags: "--jobs=3 --output-sync=target"
          - build-type: 'cublas'
            cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "4"
            platforms: 'linux/amd64'
            tag-latest: 'false'
            tag-suffix: '-cublas-cuda12'
@@ -100,7 +100,7 @@ jobs:
            makeflags: "--jobs=3 --output-sync=target"
          - build-type: 'cublas'
            cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "4"
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-cublas-cuda12-ffmpeg'
@@ -285,7 +285,7 @@ jobs:
            makeflags: "--jobs=4 --output-sync=target"
          - build-type: 'cublas'
            cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "4"
            platforms: 'linux/amd64'
            tag-latest: 'false'
            tag-suffix: '-cublas-cuda12-core'
@@ -307,7 +307,7 @@ jobs:
            makeflags: "--jobs=4 --output-sync=target"
          - build-type: 'cublas'
            cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "4"
            platforms: 'linux/amd64'
            tag-latest: 'false'
            tag-suffix: '-cublas-cuda12-ffmpeg-core'
--- a/.github/workflows/image_build.yml
+++ b/.github/workflows/image_build.yml
@@ -215,7 +215,7 @@ jobs:
          password: ${{ secrets.quayPassword }}

      - name: Build and push
-        uses: docker/build-push-action@v6
+        uses: docker/build-push-action@v5
        if: github.event_name != 'pull_request'
        with:
          builder: ${{ steps.buildx.outputs.name }}
@@ -232,7 +232,7 @@ jobs:
            BASE_IMAGE=${{ inputs.base-image }}
            GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }}
            GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
-            GRPC_VERSION=v1.65.0
+            GRPC_VERSION=v1.64.0
            MAKEFLAGS=${{ inputs.makeflags }}
          context: .
          file: ./Dockerfile
@@ -243,7 +243,7 @@ jobs:
          labels: ${{ steps.meta.outputs.labels }}
 ### Start testing image
      - name: Build and push
-        uses: docker/build-push-action@v6
+        uses: docker/build-push-action@v5
        if: github.event_name == 'pull_request'
        with:
          builder: ${{ steps.buildx.outputs.name }}
@@ -260,7 +260,7 @@ jobs:
            BASE_IMAGE=${{ inputs.base-image }}
            GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }}
            GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
-            GRPC_VERSION=v1.65.0
+            GRPC_VERSION=v1.64.0
            MAKEFLAGS=${{ inputs.makeflags }}
          context: .
          file: ./Dockerfile
@@ -276,7 +276,7 @@ jobs:
 ## End testing image
      - name: Build and push AIO image
        if: inputs.aio != ''
-        uses: docker/build-push-action@v6
+        uses: docker/build-push-action@v5
        with:
          builder: ${{ steps.buildx.outputs.name }}
          build-args: |
@@ -291,7 +291,7 @@ jobs:

      - name: Build and push AIO image (dockerhub)
        if: inputs.aio != ''
-        uses: docker/build-push-action@v6
+        uses: docker/build-push-action@v5
        with:
          builder: ${{ steps.buildx.outputs.name }}
          build-args: |
--- a/.github/workflows/notify-models.yaml
+++ b/.github/workflows/notify-models.yaml
@@ -14,10 +14,12 @@ jobs:
    - uses: actions/checkout@v4
      with:
        fetch-depth: 0 # needed to checkout all branches for this Action to work
-    - uses: mudler/localai-github-action@v1
-      with:
-        model: 'hermes-2-theta-llama-3-8b' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
-        # Check the PR diff using the current branch and the base branch of the PR
+    - name: Start LocalAI
+      run: |
+        echo "Starting LocalAI..."
+        docker run -e -ti -d --name local-ai -p 8080:8080 localai/localai:master-ffmpeg-core run --debug $MODEL_NAME
+        until [ "`docker inspect -f {{.State.Health.Status}} local-ai`" == "healthy" ]; do echo "Waiting for container to be ready";  docker logs --tail 10 local-ai; sleep 2; done
+      # Check the PR diff using the current branch and the base branch of the PR
    - uses: GrantBirki/git-diff-action@v2.7.0
      id: git-diff-action
      with:
--- a/.github/workflows/notify-releases.yaml
+++ b/.github/workflows/notify-releases.yaml
@@ -12,9 +12,11 @@ jobs:
        RELEASE_TITLE: ${{ github.event.release.name }}
        RELEASE_TAG_NAME: ${{ github.event.release.tag_name }}
    steps:
-    - uses: mudler/localai-github-action@v1
-      with:
-        model: 'hermes-2-theta-llama-3-8b' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
+    - name: Start LocalAI
+      run: |
+        echo "Starting LocalAI..."
+        docker run -e -ti -d --name local-ai -p 8080:8080 localai/localai:master-ffmpeg-core run --debug $MODEL_NAME
+        until [ "`docker inspect -f {{.State.Health.Status}} local-ai`" == "healthy" ]; do echo "Waiting for container to be ready";  docker logs --tail 10 local-ai; sleep 2; done
    - name: Summarize
      id: summarize
      run: |
--- a/.github/workflows/prlint.yaml
+++ b/.github/workflows/prlint.yaml
@@ -1,28 +0,0 @@
-name: Check PR style
-
-on:
-  pull_request_target:
-    types:
-      - opened
-      - reopened
-      - edited
-      - synchronize
-
-jobs:
-  title-lint:
-    runs-on: ubuntu-latest
-    permissions:
-      statuses: write
-    steps:
-      - uses: aslafy-z/conventional-pr-title-action@v3
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-#  check-pr-description:
-#    runs-on: ubuntu-latest
-#    steps:
-#      - uses: actions/checkout@v2
-#      - uses: jadrol/pr-description-checker-action@v1.0.0
-#        id: description-checker
-#        with:
-#          repo-token: ${{ secrets.GITHUB_TOKEN }}
-#          exempt-labels: no qa
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -1,15 +1,11 @@
 name: Build and Release

 on:
-  push:
-    branches:
-      - master
-    tags:
-      - 'v*'
-  pull_request:
+- push
+- pull_request

 env:
-  GRPC_VERSION: v1.65.0
+  GRPC_VERSION: v1.64.0

 permissions:
  contents: write
@@ -31,10 +27,11 @@ jobs:
        with:
          go-version: '1.21.x'
          cache: false
+
      - name: Dependencies
        run: |
          sudo apt-get update
-          sudo apt-get install build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk
+          sudo apt-get install build-essential ffmpeg protobuf-compiler ccache gawk
          sudo apt-get install -qy binutils-aarch64-linux-gnu gcc-aarch64-linux-gnu g++-aarch64-linux-gnu libgmock-dev
      - name: Install CUDA Dependencies
        run: |
@@ -54,9 +51,22 @@ jobs:
        if: steps.cache-grpc.outputs.cache-hit != 'true'
        run: |

-          git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
-          cd grpc && sed -i "216i\  TESTONLY" "third_party/abseil-cpp/absl/container/CMakeLists.txt" && mkdir -p cmake/build && \
-          cd cmake/build && cmake -DgRPC_INSTALL=ON \
+          git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --shallow-submodules https://github.com/grpc/grpc
+          GRPC_DIR=$PWD/grpc/
+          echo "include(FetchContent)" >> $GRPC_DIR/CMakeLists.txt
+          echo "FetchContent_Declare(" >> $GRPC_DIR/CMakeLists.txt
+          echo "  googletest" >> $GRPC_DIR/CMakeLists.txt
+          echo "  URL https://github.com/google/googletest/archive/03597a01ee50ed33e9dfd640b249b4be3799d395.zip" >> $GRPC_DIR/CMakeLists.txt
+          echo ")" >> $GRPC_DIR/CMakeLists.txt
+          echo "FetchContent_MakeAvailable(googletest)" >> $GRPC_DIR/CMakeLists.txt
+          GRPC_DIR=$PWD/grpc/third_party/abseil-cpp
+          echo "include(FetchContent)" >> $GRPC_DIR/CMakeLists.txt
+          echo "FetchContent_Declare(" >> $GRPC_DIR/CMakeLists.txt
+          echo "  googletest" >> $GRPC_DIR/CMakeLists.txt
+          echo "  URL https://github.com/google/googletest/archive/03597a01ee50ed33e9dfd640b249b4be3799d395.zip" >> $GRPC_DIR/CMakeLists.txt
+          echo ")" >> $GRPC_DIR/CMakeLists.txt
+          echo "FetchContent_MakeAvailable(googletest)" >> $GRPC_DIR/CMakeLists.txt
+          cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
            -DgRPC_BUILD_TESTS=OFF \
            ../.. && sudo make --jobs 5 --output-sync=target
      - name: Install gRPC
@@ -100,8 +110,8 @@ jobs:
          CROSS_TOOLCHAIN=/usr/$GNU_HOST
          CROSS_STAGING_PREFIX=$CROSS_TOOLCHAIN/stage
          CMAKE_CROSS_TOOLCHAIN=/tmp/arm.toolchain.cmake
-          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
-          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
+          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b
+          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
          export PATH=$PATH:$GOPATH/bin
          export PATH=/usr/local/cuda/bin:$PATH
          sudo rm -rf /usr/aarch64-linux-gnu/lib/libstdc++.so.6
@@ -150,7 +160,7 @@ jobs:
      - name: Dependencies
        run: |
          sudo apt-get update
-          sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk cmake libgmock-dev
+          sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache gawk cmake libgmock-dev
      - name: Intel Dependencies
        run: |
          wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null
@@ -164,7 +174,7 @@ jobs:
          sudo apt-get update
          sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
        env:
-          CUDA_VERSION: 12-5
+          CUDA_VERSION: 12-3
      - name: "Install Hipblas"
        env:
          ROCM_VERSION: "6.1"
@@ -200,8 +210,7 @@ jobs:
        if: steps.cache-grpc.outputs.cache-hit != 'true'
        run: |
          git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
-          cd grpc && sed -i "216i\  TESTONLY" "third_party/abseil-cpp/absl/container/CMakeLists.txt" && mkdir -p cmake/build && \
-          cd cmake/build && cmake -DgRPC_INSTALL=ON \
+          cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
            -DgRPC_BUILD_TESTS=OFF \
            ../.. && sudo make --jobs 5 --output-sync=target
      - name: Install gRPC
@@ -211,12 +220,13 @@ jobs:
      - name: Build
        id: build
        run: |
-          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
-          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
+          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b
+          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
          export PATH=$PATH:$GOPATH/bin
          export PATH=/usr/local/cuda/bin:$PATH
          export PATH=/opt/rocm/bin:$PATH
          source /opt/intel/oneapi/setvars.sh
+          GRPC_DIR=$PWD/backend/cpp/llama
          sudo cp /lib64/ld-linux-x86-64.so.2 ld.so
          BACKEND_LIBS="./ld.so ./sources/go-piper/piper/build/fi/lib/libfmt.a ./sources/go-piper/piper-phonemize/pi/lib/libonnxruntime.so.1.14.1 ./sources/go-piper/piper-phonemize/pi/src/libespeak-ng/libespeak-ng.so /usr/lib/x86_64-linux-gnu/libdl.so.2 /usr/lib/x86_64-linux-gnu/librt.so.1 /usr/lib/x86_64-linux-gnu/libpthread.so.0 ./sources/go-piper/piper-phonemize/pi/lib/libpiper_phonemize.so.1 ./sources/go-piper/piper/build/si/lib/libspdlog.a ./sources/go-piper/espeak/ei/lib/libucd.so" \
          make -j4 dist
@@ -251,9 +261,9 @@ jobs:
      - name: Dependencies
        run: |
          sudo apt-get update
-          sudo apt-get install -y --no-install-recommends libopencv-dev protobuf-compiler ccache upx-ucl
-          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
-          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
+          sudo apt-get install -y --no-install-recommends libopencv-dev protobuf-compiler ccache
+          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b
+          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
      - name: Build stablediffusion
        run: |
          export PATH=$PATH:$GOPATH/bin
@@ -328,8 +338,8 @@ jobs:
      - name: Dependencies
        run: |
          brew install protobuf grpc
-          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
-          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
+          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b
+          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
      - name: Build
        id: build
        run: |
--- a/.github/workflows/test-extra.yml
+++ b/.github/workflows/test-extra.yml
@@ -19,7 +19,7 @@ jobs:
    steps:
      - name: Clone
        uses: actions/checkout@v4
-        with:
+        with: 
          submodules: true
      - name: Dependencies
        run: |
@@ -29,8 +29,8 @@ jobs:
          curl -LsSf https://astral.sh/uv/install.sh | sh
          sudo apt-get install -y ca-certificates cmake curl patch python3-pip
          sudo apt-get install -y libopencv-dev
-          pip install --user --no-cache-dir grpcio-tools==1.64.1
-
+          pip install --user grpcio-tools==1.64.0
+          
      - name: Test transformers
        run: |
           make --jobs=5 --output-sync=target -C backend/python/transformers
@@ -41,7 +41,7 @@ jobs:
    steps:
      - name: Clone
        uses: actions/checkout@v4
-        with:
+        with: 
          submodules: true
      - name: Dependencies
        run: |
@@ -51,8 +51,8 @@ jobs:
          curl -LsSf https://astral.sh/uv/install.sh | sh
          sudo apt-get install -y ca-certificates cmake curl patch python3-pip
          sudo apt-get install -y libopencv-dev
-          pip install --user --no-cache-dir grpcio-tools==1.64.1
-
+          pip install --user grpcio-tools==1.64.0
+          
      - name: Test sentencetransformers
        run: |
           make --jobs=5 --output-sync=target -C backend/python/sentencetransformers
@@ -64,7 +64,7 @@ jobs:
    steps:
      - name: Clone
        uses: actions/checkout@v4
-        with:
+        with: 
          submodules: true
      - name: Dependencies
        run: |
@@ -74,7 +74,7 @@ jobs:
          curl -LsSf https://astral.sh/uv/install.sh | sh
          sudo apt-get install -y ca-certificates cmake curl patch python3-pip
          sudo apt-get install -y libopencv-dev
-          pip install --user --no-cache-dir grpcio-tools==1.64.1
+          pip install --user grpcio-tools==1.64.0

      - name: Test rerankers
        run: |
@@ -86,7 +86,7 @@ jobs:
    steps:
      - name: Clone
        uses: actions/checkout@v4
-        with:
+        with: 
          submodules: true
      - name: Dependencies
        run: |
@@ -96,7 +96,7 @@ jobs:
          sudo apt-get install -y libopencv-dev
          # Install UV
          curl -LsSf https://astral.sh/uv/install.sh | sh
-          pip install --user --no-cache-dir grpcio-tools==1.64.1
+          pip install --user grpcio-tools==1.64.0
      - name: Test diffusers
        run: |
          make --jobs=5 --output-sync=target -C backend/python/diffusers
@@ -107,7 +107,7 @@ jobs:
    steps:
      - name: Clone
        uses: actions/checkout@v4
-        with:
+        with: 
          submodules: true
      - name: Dependencies
        run: |
@@ -117,19 +117,19 @@ jobs:
          curl -LsSf https://astral.sh/uv/install.sh | sh
          sudo apt-get install -y ca-certificates cmake curl patch python3-pip
          sudo apt-get install -y libopencv-dev
-          pip install --user --no-cache-dir grpcio-tools==1.64.1
+          pip install --user grpcio-tools==1.64.0

      - name: Test parler-tts
        run: |
           make --jobs=5 --output-sync=target -C backend/python/parler-tts
           make --jobs=5 --output-sync=target -C backend/python/parler-tts test
-
+  
  tests-openvoice:
    runs-on: ubuntu-latest
    steps:
      - name: Clone
        uses: actions/checkout@v4
-        with:
+        with: 
          submodules: true
      - name: Dependencies
        run: |
@@ -139,7 +139,7 @@ jobs:
          curl -LsSf https://astral.sh/uv/install.sh | sh
          sudo apt-get install -y ca-certificates cmake curl patch python3-pip
          sudo apt-get install -y libopencv-dev
-          pip install --user --no-cache-dir grpcio-tools==1.64.1
+          pip install --user grpcio-tools==1.64.0

      - name: Test openvoice
        run: |
@@ -151,7 +151,7 @@ jobs:
    steps:
      - name: Clone
        uses: actions/checkout@v4
-        with:
+        with: 
          submodules: true
      - name: Dependencies
        run: |
@@ -161,7 +161,7 @@ jobs:
          curl -LsSf https://astral.sh/uv/install.sh | sh
          sudo apt-get install -y ca-certificates cmake curl patch python3-pip
          sudo apt-get install -y libopencv-dev
-          pip install --user --no-cache-dir grpcio-tools==1.64.1
+          pip install --user grpcio-tools==1.64.0

      - name: Test transformers-musicgen
        run: |
@@ -175,7 +175,7 @@ jobs:
  #   steps:
  #     - name: Clone
  #       uses: actions/checkout@v4
-  #       with:
+  #       with: 
  #         submodules: true
  #     - name: Dependencies
  #       run: |
@@ -185,14 +185,14 @@ jobs:
  #         curl -LsSf https://astral.sh/uv/install.sh | sh
  #         sudo apt-get install -y ca-certificates cmake curl patch python3-pip
  #         sudo apt-get install -y libopencv-dev
-  #         pip install --user --no-cache-dir grpcio-tools==1.64.1
+  #         pip install --user grpcio-tools==1.64.0

  #     - name: Test petals
  #       run: |
  #          make --jobs=5 --output-sync=target -C backend/python/petals
  #          make --jobs=5 --output-sync=target -C backend/python/petals test

-
+           

  # tests-bark:
  #   runs-on: ubuntu-latest
@@ -239,7 +239,7 @@ jobs:
  #           df -h
  #     - name: Clone
  #       uses: actions/checkout@v4
-  #       with:
+  #       with: 
  #         submodules: true
  #     - name: Dependencies
  #       run: |
@@ -249,14 +249,14 @@ jobs:
  #         curl -LsSf https://astral.sh/uv/install.sh | sh
  #         sudo apt-get install -y ca-certificates cmake curl patch python3-pip
  #         sudo apt-get install -y libopencv-dev
-  #         pip install --user --no-cache-dir grpcio-tools==1.64.1
+  #         pip install --user grpcio-tools==1.64.0

  #     - name: Test bark
  #       run: |
  #          make --jobs=5 --output-sync=target -C backend/python/bark
  #          make --jobs=5 --output-sync=target -C backend/python/bark test

-
+           
  # Below tests needs GPU. Commented out for now
  # TODO: Re-enable as soon as we have GPU nodes
  # tests-vllm:
@@ -264,7 +264,7 @@ jobs:
  #   steps:
  #     - name: Clone
  #       uses: actions/checkout@v4
-  #       with:
+  #       with: 
  #         submodules: true
  #     - name: Dependencies
  #       run: |
@@ -274,7 +274,7 @@ jobs:
  #         curl -LsSf https://astral.sh/uv/install.sh | sh
  #         sudo apt-get install -y ca-certificates cmake curl patch python3-pip
  #         sudo apt-get install -y libopencv-dev
-  #         pip install --user --no-cache-dir grpcio-tools==1.64.1
+  #         pip install --user grpcio-tools==1.64.0
  #     - name: Test vllm
  #       run: |
  #          make --jobs=5 --output-sync=target -C backend/python/vllm
@@ -284,7 +284,7 @@ jobs:
    steps:
      - name: Clone
        uses: actions/checkout@v4
-        with:
+        with: 
          submodules: true
      - name: Dependencies
        run: |
@@ -294,7 +294,7 @@ jobs:
          curl -LsSf https://astral.sh/uv/install.sh | sh
          sudo apt-get install -y ca-certificates cmake curl patch python3-pip
          sudo apt-get install -y libopencv-dev
-          pip install --user --no-cache-dir grpcio-tools==1.64.1
+          pip install --user grpcio-tools==1.64.0
      - name: Test vall-e-x
        run: |
           make --jobs=5 --output-sync=target -C backend/python/vall-e-x
@@ -305,7 +305,7 @@ jobs:
    steps:
      - name: Clone
        uses: actions/checkout@v4
-        with:
+        with: 
          submodules: true
      - name: Dependencies
        run: |
@@ -314,8 +314,8 @@ jobs:
          sudo apt-get install -y ca-certificates cmake curl patch espeak espeak-ng python3-pip
          # Install UV
          curl -LsSf https://astral.sh/uv/install.sh | sh
-          pip install --user --no-cache-dir grpcio-tools==1.64.1
+          pip install --user grpcio-tools==1.64.0
      - name: Test coqui
        run: |
          make --jobs=5 --output-sync=target -C backend/python/coqui
-          make --jobs=5 --output-sync=target -C backend/python/coqui test
+          make --jobs=5 --output-sync=target -C backend/python/coqui test
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -10,7 +10,7 @@ on:
      - '*'

 env:
-  GRPC_VERSION: v1.65.0
+  GRPC_VERSION: v1.64.0

 concurrency:
  group: ci-tests-${{ github.head_ref || github.ref }}-${{ github.repository }}
@@ -70,7 +70,7 @@ jobs:
      - name: Dependencies
        run: |
          sudo apt-get update
-          sudo apt-get install build-essential ccache upx-ucl curl ffmpeg
+          sudo apt-get install build-essential curl ffmpeg
          sudo apt-get install -y libgmock-dev
          curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
             sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
@@ -94,8 +94,8 @@ jobs:
          sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
          export CUDACXX=/usr/local/cuda/bin/nvcc

-          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
-          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
+          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
+          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b

          # The python3-grpc-tools package in 22.04 is too old
          pip install --user grpcio-tools
@@ -110,7 +110,7 @@ jobs:
          # Pre-build stable diffusion before we install a newer version of abseil (not compatible with stablediffusion-ncn)
          PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
        env:
-          CUDA_VERSION: 12-4
+          CUDA_VERSION: 12-3
      - name: Cache grpc
        id: cache-grpc
        uses: actions/cache@v4
@@ -121,8 +121,7 @@ jobs:
        if: steps.cache-grpc.outputs.cache-hit != 'true'
        run: |
          git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --jobs 5 --shallow-submodules https://github.com/grpc/grpc && \
-          cd grpc && sed -i "216i\  TESTONLY" "third_party/abseil-cpp/absl/container/CMakeLists.txt" && mkdir -p cmake/build && cd cmake/build && \
-          cmake -DgRPC_INSTALL=ON \
+          cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
            -DgRPC_BUILD_TESTS=OFF \
            ../.. && sudo make --jobs 5
      - name: Install gRPC
@@ -215,7 +214,7 @@ jobs:
      - name: Dependencies
        run: |
          brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc
-          pip install --user --no-cache-dir grpcio-tools==1.64.1
+          pip install --user grpcio-tools==1.64.0
      - name: Test
        run: |
          export C_INCLUDE_PATH=/usr/local/include
--- a/.github/workflows/update_swagger.yaml
+++ b/.github/workflows/update_swagger.yaml
@@ -13,17 +13,11 @@ jobs:
      - uses: actions/setup-go@v5
        with:
          go-version: 'stable'
-      - name: Dependencies
-        run: |
-          sudo apt-get update
-          sudo apt-get install protobuf-compiler
      - run: |
          go install github.com/swaggo/swag/cmd/swag@latest
-          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
-          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
      - name: Bump swagger 🔧
        run: |
-          make protogen-go swagger
+          make swagger
      - name: Create Pull Request
        uses: peter-evans/create-pull-request@v6
        with:
--- a/42
+++ b/42
@@ -8,7 +8,7 @@ FROM ${BASE_IMAGE} AS requirements-core

 USER root

-ARG GO_VERSION=1.22.5
+ARG GO_VERSION=1.22.4
 ARG TARGETARCH
 ARG TARGETVARIANT

@@ -24,7 +24,7 @@ RUN apt-get update && \
        cmake \
        curl \
        git \
-        unzip upx-ucl && \
+        unzip && \
    apt-get clean && \
    rm -rf /var/lib/apt/lists/*

@@ -99,7 +99,7 @@ FROM requirements-${IMAGE_TYPE} AS requirements-drivers

 ARG BUILD_TYPE
 ARG CUDA_MAJOR_VERSION=12
-ARG CUDA_MINOR_VERSION=0
+ARG CUDA_MINOR_VERSION=4

 ENV BUILD_TYPE=${BUILD_TYPE}

@@ -108,11 +108,11 @@ RUN <<EOT bash
    if [ "${BUILD_TYPE}" = "vulkan" ]; then
        apt-get update && \
        apt-get install -y  --no-install-recommends \
-            software-properties-common pciutils wget gpg-agent && \
+                        software-properties-common pciutils wget gpg-agent && \
        wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
        wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
        apt-get update && \
-        apt-get install -y \
+            apt-get install -y \
            vulkan-sdk && \
        apt-get clean && \
        rm -rf /var/lib/apt/lists/*
@@ -124,13 +124,33 @@ RUN <<EOT bash
    if [ "${BUILD_TYPE}" = "cublas" ]; then
        apt-get update && \
        apt-get install -y  --no-install-recommends \
-            software-properties-common pciutils
+                        software-properties-common pciutils
        if [ "amd64" = "$TARGETARCH" ]; then
            curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
-        fi
+            fi
        if [ "arm64" = "$TARGETARCH" ]; then
            curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/arm64/cuda-keyring_1.1-1_all.deb
        fi
+        dpkg -i cuda-keyring_1.1-1_all.deb && \
+            rm -f cuda-keyring_1.1-1_all.deb && \
+            apt-get update && \
+            apt-get install -y --no-install-recommends \
+                cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
+                libcufft-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
+                libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
+                libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
+                libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
+                libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \
+            apt-get clean && \
+        rm -rf /var/lib/apt/lists/*
+    fi
+EOT
+
+RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
+        apt-get update && \
+        apt-get install -y  --no-install-recommends \
+            software-properties-common pciutils && \
+        curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb && \
        dpkg -i cuda-keyring_1.1-1_all.deb && \
        rm -f cuda-keyring_1.1-1_all.deb && \
        apt-get update && \
@@ -142,9 +162,8 @@ RUN <<EOT bash
            libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
            libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \
        apt-get clean && \
-        rm -rf /var/lib/apt/lists/*
-    fi
-EOT
+        rm -rf /var/lib/apt/lists/* \
+    ; fi

 # If we are building with clblas support, we need the libraries for the builds
 RUN if [ "${BUILD_TYPE}" = "clblas" ]; then \
@@ -187,7 +206,7 @@ FROM ${GRPC_BASE_IMAGE} AS grpc

 # This is a bit of a hack, but it's required in order to be able to effectively cache this layer in CI
 ARG GRPC_MAKEFLAGS="-j4 -Otarget"
-ARG GRPC_VERSION=v1.65.0
+ARG GRPC_VERSION=v1.64.2

 ENV MAKEFLAGS=${GRPC_MAKEFLAGS}

@@ -208,7 +227,6 @@ RUN apt-get update && \
 RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
    mkdir -p /build/grpc/cmake/build && \
    cd /build/grpc/cmake/build && \
-    sed -i "216i\  TESTONLY" "../../third_party/abseil-cpp/absl/container/CMakeLists.txt" && \
    cmake -DgRPC_INSTALL=ON -DgRPC_BUILD_TESTS=OFF -DCMAKE_INSTALL_PREFIX:PATH=/opt/grpc ../.. && \
    make && \
    make install && \
--- a/155
+++ b/155
@@ -6,9 +6,8 @@ BINARY_NAME=local-ai
 DETECT_LIBS?=true

 # llama.cpp versions
-GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
-GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=0d6fb52be0c1b7e77eb855f3adc4952771c8ce4c
+GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
+CPPLLAMA_VERSION?=fd560fe680c72fd0a0af2bc8881add20ad919071

 # gpt4all version
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
@@ -19,23 +18,18 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
 RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6

 # whisper.cpp version
-WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
-WHISPER_CPP_VERSION?=fe36c909715e6751277ddb020e7892c7670b61d4
+WHISPER_CPP_VERSION?=1c31f9d4a8936aec550e6c4dc9ca5cae3b4f304a

 # bert.cpp version
-BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
 BERT_VERSION?=710044b124545415f555e4260d16b146c725a6e4

 # go-piper version
-PIPER_REPO?=https://github.com/mudler/go-piper
 PIPER_VERSION?=9d0100873a7dbb0824dfea40e8cec70a1b110759

 # stablediffusion version
-STABLEDIFFUSION_REPO?=https://github.com/mudler/go-stable-diffusion
 STABLEDIFFUSION_VERSION?=4a3cd6aeae6f66ee57eae9a0075f8c58c3a6a38f

 # tinydream version
-TINYDREAM_REPO?=https://github.com/M0Rf30/go-tiny-dream
 TINYDREAM_VERSION?=c04fa463ace9d9a6464313aa5f9cd0f953b6c057

 export BUILD_TYPE?=
@@ -58,7 +52,7 @@ RANDOM := $(shell bash -c 'echo $$RANDOM')

 VERSION?=$(shell git describe --always --tags || echo "dev" )
 # go tool nm ./local-ai | grep Commit
-LD_FLAGS?=-s -w
+LD_FLAGS?=
 override LD_FLAGS += -X "github.com/mudler/LocalAI/internal.Version=$(VERSION)"
 override LD_FLAGS += -X "github.com/mudler/LocalAI/internal.Commit=$(shell git rev-parse HEAD)"

@@ -72,14 +66,6 @@ WHITE  := $(shell tput -Txterm setaf 7)
 CYAN   := $(shell tput -Txterm setaf 6)
 RESET  := $(shell tput -Txterm sgr0)

-UPX?=
-# check if upx exists
-ifeq (, $(shell which upx))
-	UPX=
-else
-	UPX=$(shell which upx)
-endif
-
 # Default Docker bridge IP
 E2E_BRIDGE_IP?=172.17.0.1

@@ -216,109 +202,69 @@ all: help

 ## BERT embeddings
 sources/go-bert.cpp:
-	mkdir -p sources/go-bert.cpp
-	cd sources/go-bert.cpp && \
-	git init && \
-	git remote add origin $(BERT_REPO) && \
-	git fetch origin && \
-	git checkout $(BERT_VERSION) && \
-	git submodule update --init --recursive --depth 1 --single-branch
+	git clone --recurse-submodules https://github.com/go-skynet/go-bert.cpp sources/go-bert.cpp
+	cd sources/go-bert.cpp && git checkout -b build $(BERT_VERSION) && git submodule update --init --recursive --depth 1

 sources/go-bert.cpp/libgobert.a: sources/go-bert.cpp
 	$(MAKE) -C sources/go-bert.cpp libgobert.a

 ## go-llama.cpp
 sources/go-llama.cpp:
-	mkdir -p sources/go-llama.cpp
-	cd sources/go-llama.cpp && \
-	git init && \
-	git remote add origin $(GOLLAMA_REPO) && \
-	git fetch origin && \
-	git checkout $(GOLLAMA_VERSION) && \
-	git submodule update --init --recursive --depth 1 --single-branch
+	git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp sources/go-llama.cpp
+	cd sources/go-llama.cpp && git checkout -b build $(GOLLAMA_STABLE_VERSION) && git submodule update --init --recursive --depth 1

 sources/go-llama.cpp/libbinding.a: sources/go-llama.cpp
 	$(MAKE) -C sources/go-llama.cpp BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a

 ## go-piper
 sources/go-piper:
-	mkdir -p sources/go-piper
-	cd sources/go-piper && \
-	git init && \
-	git remote add origin $(PIPER_REPO) && \
-	git fetch origin && \
-	git checkout $(PIPER_VERSION) && \
-	git submodule update --init --recursive --depth 1 --single-branch
+	git clone --recurse-submodules https://github.com/mudler/go-piper sources/go-piper
+	cd sources/go-piper && git checkout -b build $(PIPER_VERSION) && git submodule update --init --recursive --depth 1

 sources/go-piper/libpiper_binding.a: sources/go-piper
 	$(MAKE) -C sources/go-piper libpiper_binding.a example/main piper.o

 ## GPT4ALL
 sources/gpt4all:
-	mkdir -p sources/gpt4all
-	cd sources/gpt4all && \
-	git init && \
-	git remote add origin $(GPT4ALL_REPO) && \
-	git fetch origin && \
-	git checkout $(GPT4ALL_VERSION) && \
-	git submodule update --init --recursive --depth 1 --single-branch
+	git clone --recurse-submodules $(GPT4ALL_REPO) sources/gpt4all
+	cd sources/gpt4all && git checkout -b build $(GPT4ALL_VERSION) && git submodule update --init --recursive --depth 1

 sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a: sources/gpt4all
 	$(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ libgpt4all.a

 ## RWKV
 sources/go-rwkv.cpp:
-	mkdir -p sources/go-rwkv.cpp
-	cd sources/go-rwkv.cpp && \
-	git init && \
-	git remote add origin $(RWKV_REPO) && \
-	git fetch origin && \
-	git checkout $(RWKV_VERSION) && \
-	git submodule update --init --recursive --depth 1 --single-branch
+	git clone --recurse-submodules $(RWKV_REPO) sources/go-rwkv.cpp
+	cd sources/go-rwkv.cpp && git checkout -b build $(RWKV_VERSION) && git submodule update --init --recursive --depth 1

 sources/go-rwkv.cpp/librwkv.a: sources/go-rwkv.cpp
 	cd sources/go-rwkv.cpp && cd rwkv.cpp &&	cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF &&	cmake --build . && 	cp librwkv.a ..

 ## stable diffusion
 sources/go-stable-diffusion:
-	mkdir -p sources/go-stable-diffusion
-	cd sources/go-stable-diffusion && \
-	git init && \
-	git remote add origin $(STABLEDIFFUSION_REPO) && \
-	git fetch origin && \
-	git checkout $(STABLEDIFFUSION_VERSION) && \
-	git submodule update --init --recursive --depth 1 --single-branch
+	git clone --recurse-submodules https://github.com/mudler/go-stable-diffusion sources/go-stable-diffusion
+	cd sources/go-stable-diffusion && git checkout -b build $(STABLEDIFFUSION_VERSION) && git submodule update --init --recursive --depth 1

 sources/go-stable-diffusion/libstablediffusion.a: sources/go-stable-diffusion
 	CPATH="$(CPATH):/usr/include/opencv4" $(MAKE) -C sources/go-stable-diffusion libstablediffusion.a

 ## tiny-dream
 sources/go-tiny-dream:
-	mkdir -p sources/go-tiny-dream
-	cd sources/go-tiny-dream && \
-	git init && \
-	git remote add origin $(TINYDREAM_REPO) && \
-	git fetch origin && \
-	git checkout $(TINYDREAM_VERSION) && \
-	git submodule update --init --recursive --depth 1 --single-branch
+	git clone --recurse-submodules https://github.com/M0Rf30/go-tiny-dream sources/go-tiny-dream
+	cd sources/go-tiny-dream && git checkout -b build $(TINYDREAM_VERSION) && git submodule update --init --recursive --depth 1

 sources/go-tiny-dream/libtinydream.a: sources/go-tiny-dream
 	$(MAKE) -C sources/go-tiny-dream libtinydream.a

 ## whisper
 sources/whisper.cpp:
-	mkdir -p sources/whisper.cpp
-	cd sources/whisper.cpp && \
-	git init && \
-	git remote add origin $(WHISPER_REPO) && \
-	git fetch origin && \
-	git checkout $(WHISPER_CPP_VERSION) && \
-	git submodule update --init --recursive --depth 1 --single-branch
+	git clone https://github.com/ggerganov/whisper.cpp sources/whisper.cpp
+	cd sources/whisper.cpp && git checkout -b build $(WHISPER_CPP_VERSION) && git submodule update --init --recursive --depth 1

 sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
 	cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a

-get-sources: sources/go-llama.cpp sources/gpt4all sources/go-piper sources/go-rwkv.cpp sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp
+get-sources: sources/go-llama.cpp sources/gpt4all sources/go-piper sources/go-rwkv.cpp sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream

 replace:
 	$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(CURDIR)/sources/go-rwkv.cpp
@@ -385,7 +331,6 @@ build: prepare backend-assets grpcs ## Build the project
 	$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
 	$(info ${GREEN}I GO_TAGS: ${YELLOW}$(GO_TAGS)${RESET})
 	$(info ${GREEN}I LD_FLAGS: ${YELLOW}$(LD_FLAGS)${RESET})
-	$(info ${GREEN}I UPX: ${YELLOW}$(UPX)${RESET})
 ifneq ($(BACKEND_LIBS),)
 	$(MAKE) backend-assets/lib
 	cp -f $(BACKEND_LIBS) backend-assets/lib/
@@ -393,7 +338,7 @@ endif
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(BINARY_NAME) ./

 build-minimal:
-	BUILD_GRPC_FOR_BACKEND_LLAMA=true GRPC_BACKENDS="backend-assets/grpc/llama-cpp-avx2" GO_TAGS=p2p $(MAKE) build
+	BUILD_GRPC_FOR_BACKEND_LLAMA=true GRPC_BACKENDS="backend-assets/grpc/llama-cpp-avx2" GO_TAGS=none $(MAKE) build

 build-api:
 	BUILD_GRPC_FOR_BACKEND_LLAMA=true BUILD_API_ONLY=true GO_TAGS=none $(MAKE) build
@@ -430,7 +375,7 @@ else
 endif

 dist-cross-linux-arm64:
-	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_NATIVE=off" GRPC_BACKENDS="backend-assets/grpc/llama-cpp-fallback backend-assets/grpc/llama-cpp-grpc backend-assets/util/llama-cpp-rpc-server" GO_TAGS="p2p" \
+	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_NATIVE=off" GRPC_BACKENDS="backend-assets/grpc/llama-cpp-fallback backend-assets/grpc/llama-cpp-grpc backend-assets/util/llama-cpp-rpc-server" \
 	STATIC=true $(MAKE) build
 	mkdir -p release
 # if BUILD_ID is empty, then we don't append it to the binary name
@@ -480,7 +425,7 @@ prepare-e2e:
 	mkdir -p $(TEST_DIR)
 	cp -rfv $(abspath ./tests/e2e-fixtures)/gpu.yaml $(TEST_DIR)/gpu.yaml
 	test -e $(TEST_DIR)/ggllm-test-model.bin || wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(TEST_DIR)/ggllm-test-model.bin
-	docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=0 --build-arg FFMPEG=true -t localai-tests .
+	docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=4 --build-arg FFMPEG=true -t localai-tests .

 run-e2e-image:
 	ls -liah $(abspath ./tests/e2e-fixtures)
@@ -742,22 +687,13 @@ backend-assets/grpc: protogen-go replace
 backend-assets/grpc/bert-embeddings: sources/go-bert.cpp sources/go-bert.cpp/libgobert.a backend-assets/grpc
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-bert.cpp LIBRARY_PATH=$(CURDIR)/sources/go-bert.cpp \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bert-embeddings ./backend/go/llm/bert/
-ifneq ($(UPX),)
-	$(UPX) backend-assets/grpc/bert-embeddings
-endif

 backend-assets/grpc/gpt4all: sources/gpt4all sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a backend-assets/gpt4all backend-assets/grpc
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ LIBRARY_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gpt4all ./backend/go/llm/gpt4all/
-ifneq ($(UPX),)
-	$(UPX) backend-assets/grpc/gpt4all
-endif

 backend-assets/grpc/huggingface: backend-assets/grpc
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/huggingface ./backend/go/llm/langchain/
-ifneq ($(UPX),)
-	$(UPX) backend-assets/grpc/huggingface
-endif

 backend/cpp/llama/llama.cpp:
 	LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama llama.cpp
@@ -785,28 +721,28 @@ else
 endif

 # This target is for manually building a variant with-auto detected flags
-backend-assets/grpc/llama-cpp: backend-assets/grpc backend/cpp/llama/llama.cpp
+backend-assets/grpc/llama-cpp: backend-assets/grpc
 	cp -rf backend/cpp/llama backend/cpp/llama-cpp
 	$(MAKE) -C backend/cpp/llama-cpp purge
 	$(info ${GREEN}I llama-cpp build info:avx2${RESET})
 	$(MAKE) VARIANT="llama-cpp" build-llama-cpp-grpc-server
 	cp -rfv backend/cpp/llama-cpp/grpc-server backend-assets/grpc/llama-cpp

-backend-assets/grpc/llama-cpp-avx2: backend-assets/grpc backend/cpp/llama/llama.cpp
+backend-assets/grpc/llama-cpp-avx2: backend-assets/grpc
 	cp -rf backend/cpp/llama backend/cpp/llama-avx2
 	$(MAKE) -C backend/cpp/llama-avx2 purge
 	$(info ${GREEN}I llama-cpp build info:avx2${RESET})
 	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-avx2" build-llama-cpp-grpc-server
 	cp -rfv backend/cpp/llama-avx2/grpc-server backend-assets/grpc/llama-cpp-avx2

-backend-assets/grpc/llama-cpp-avx: backend-assets/grpc backend/cpp/llama/llama.cpp
+backend-assets/grpc/llama-cpp-avx: backend-assets/grpc
 	cp -rf backend/cpp/llama backend/cpp/llama-avx
 	$(MAKE) -C backend/cpp/llama-avx purge
 	$(info ${GREEN}I llama-cpp build info:avx${RESET})
 	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-avx" build-llama-cpp-grpc-server
 	cp -rfv backend/cpp/llama-avx/grpc-server backend-assets/grpc/llama-cpp-avx

-backend-assets/grpc/llama-cpp-fallback: backend-assets/grpc backend/cpp/llama/llama.cpp
+backend-assets/grpc/llama-cpp-fallback: backend-assets/grpc
 	cp -rf backend/cpp/llama backend/cpp/llama-fallback
 	$(MAKE) -C backend/cpp/llama-fallback purge
 	$(info ${GREEN}I llama-cpp build info:fallback${RESET})
@@ -817,35 +753,35 @@ ifeq ($(BUILD_TYPE),metal)
 	cp backend/cpp/llama-fallback/llama.cpp/build/bin/default.metallib backend-assets/grpc/
 endif

-backend-assets/grpc/llama-cpp-cuda: backend-assets/grpc backend/cpp/llama/llama.cpp
+backend-assets/grpc/llama-cpp-cuda: backend-assets/grpc
 	cp -rf backend/cpp/llama backend/cpp/llama-cuda
 	$(MAKE) -C backend/cpp/llama-cuda purge
 	$(info ${GREEN}I llama-cpp build info:cuda${RESET})
 	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_CUDA=ON" $(MAKE) VARIANT="llama-cuda" build-llama-cpp-grpc-server
 	cp -rfv backend/cpp/llama-cuda/grpc-server backend-assets/grpc/llama-cpp-cuda

-backend-assets/grpc/llama-cpp-hipblas: backend-assets/grpc backend/cpp/llama/llama.cpp
+backend-assets/grpc/llama-cpp-hipblas: backend-assets/grpc
 	cp -rf backend/cpp/llama backend/cpp/llama-hipblas
 	$(MAKE) -C backend/cpp/llama-hipblas purge
 	$(info ${GREEN}I llama-cpp build info:hipblas${RESET})
 	BUILD_TYPE="hipblas" $(MAKE) VARIANT="llama-hipblas" build-llama-cpp-grpc-server
 	cp -rfv backend/cpp/llama-hipblas/grpc-server backend-assets/grpc/llama-cpp-hipblas

-backend-assets/grpc/llama-cpp-sycl_f16: backend-assets/grpc backend/cpp/llama/llama.cpp
+backend-assets/grpc/llama-cpp-sycl_f16: backend-assets/grpc
 	cp -rf backend/cpp/llama backend/cpp/llama-sycl_f16
 	$(MAKE) -C backend/cpp/llama-sycl_f16 purge
 	$(info ${GREEN}I llama-cpp build info:sycl_f16${RESET})
 	BUILD_TYPE="sycl_f16" $(MAKE) VARIANT="llama-sycl_f16" build-llama-cpp-grpc-server
 	cp -rfv backend/cpp/llama-sycl_f16/grpc-server backend-assets/grpc/llama-cpp-sycl_f16

-backend-assets/grpc/llama-cpp-sycl_f32: backend-assets/grpc backend/cpp/llama/llama.cpp
+backend-assets/grpc/llama-cpp-sycl_f32: backend-assets/grpc
 	cp -rf backend/cpp/llama backend/cpp/llama-sycl_f32
 	$(MAKE) -C backend/cpp/llama-sycl_f32 purge
 	$(info ${GREEN}I llama-cpp build info:sycl_f32${RESET})
 	BUILD_TYPE="sycl_f32" $(MAKE) VARIANT="llama-sycl_f32" build-llama-cpp-grpc-server
 	cp -rfv backend/cpp/llama-sycl_f32/grpc-server backend-assets/grpc/llama-cpp-sycl_f32

-backend-assets/grpc/llama-cpp-grpc: backend-assets/grpc backend/cpp/llama/llama.cpp
+backend-assets/grpc/llama-cpp-grpc: backend-assets/grpc
 	cp -rf backend/cpp/llama backend/cpp/llama-grpc
 	$(MAKE) -C backend/cpp/llama-grpc purge
 	$(info ${GREEN}I llama-cpp build info:grpc${RESET})
@@ -859,50 +795,29 @@ backend-assets/util/llama-cpp-rpc-server: backend-assets/grpc/llama-cpp-grpc
 backend-assets/grpc/llama-ggml: sources/go-llama.cpp sources/go-llama.cpp/libbinding.a backend-assets/grpc
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama.cpp LIBRARY_PATH=$(CURDIR)/sources/go-llama.cpp \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-ggml ./backend/go/llm/llama-ggml/
-ifneq ($(UPX),)
-	$(UPX) backend-assets/grpc/llama-ggml
-endif

 backend-assets/grpc/piper: sources/go-piper sources/go-piper/libpiper_binding.a backend-assets/grpc backend-assets/espeak-ng-data
 	CGO_CXXFLAGS="$(PIPER_CGO_CXXFLAGS)" CGO_LDFLAGS="$(PIPER_CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/sources/go-piper \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/piper ./backend/go/tts/
-ifneq ($(UPX),)
-	$(UPX) backend-assets/grpc/piper
-endif

 backend-assets/grpc/rwkv: sources/go-rwkv.cpp sources/go-rwkv.cpp/librwkv.a backend-assets/grpc
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-rwkv.cpp LIBRARY_PATH=$(CURDIR)/sources/go-rwkv.cpp \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/rwkv ./backend/go/llm/rwkv
-ifneq ($(UPX),)
-	$(UPX) backend-assets/grpc/rwkv
-endif

 backend-assets/grpc/stablediffusion: sources/go-stable-diffusion sources/go-stable-diffusion/libstablediffusion.a backend-assets/grpc
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/go-stable-diffusion/:/usr/include/opencv4" LIBRARY_PATH=$(CURDIR)/sources/go-stable-diffusion/ \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion ./backend/go/image/stablediffusion
-ifneq ($(UPX),)
-	$(UPX) backend-assets/grpc/stablediffusion
-endif

 backend-assets/grpc/tinydream: sources/go-tiny-dream sources/go-tiny-dream/libtinydream.a backend-assets/grpc
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/go-tiny-dream \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/tinydream ./backend/go/image/tinydream
-ifneq ($(UPX),)
-	$(UPX) backend-assets/grpc/tinydream
-endif

 backend-assets/grpc/whisper: sources/whisper.cpp sources/whisper.cpp/libwhisper.a backend-assets/grpc
 	CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_WHISPER)" C_INCLUDE_PATH="$(CURDIR)/sources/whisper.cpp/include:$(CURDIR)/sources/whisper.cpp/ggml/include" LIBRARY_PATH=$(CURDIR)/sources/whisper.cpp \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/
-ifneq ($(UPX),)
-	$(UPX) backend-assets/grpc/whisper
-endif

 backend-assets/grpc/local-store: backend-assets/grpc
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/local-store ./backend/go/stores/
-ifneq ($(UPX),)
-	$(UPX) backend-assets/grpc/local-store
-endif

 grpcs: prepare $(GRPC_BACKENDS)

@@ -944,7 +859,7 @@ docker-aio-all:

 docker-image-intel:
 	docker build \
-		--build-arg BASE_IMAGE=intel/oneapi-basekit:2024.2.0-devel-ubuntu22.04 \
+		--build-arg BASE_IMAGE=intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04 \
 		--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
 		--build-arg GO_TAGS="none" \
 		--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
@@ -952,7 +867,7 @@ docker-image-intel:

 docker-image-intel-xpu:
 	docker build \
-		--build-arg BASE_IMAGE=intel/oneapi-basekit:2024.2.0-devel-ubuntu22.04 \
+		--build-arg BASE_IMAGE=intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04 \
 		--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
 		--build-arg GO_TAGS="none" \
 		--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
--- a/README.md
+++ b/README.md
@@ -72,26 +72,23 @@ docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu

 [Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)

- July 2024: 🔥🔥 🆕 P2P Dashboard, LocalAI Federated mode and AI Swarms: https://github.com/mudler/LocalAI/pull/2723
- June 2024: 🆕 You can browse now the model gallery without LocalAI! Check out https://models.localai.io
- June 2024: Support for models from OCI registries: https://github.com/mudler/LocalAI/pull/2628
- May 2024: 🔥🔥 Decentralized P2P llama.cpp:  https://github.com/mudler/LocalAI/pull/2343 (peer2peer llama.cpp!) 👉 Docs  https://localai.io/features/distribute/
- May 2024: 🔥🔥 Openvoice: https://github.com/mudler/LocalAI/pull/2334
- May 2024: 🆕 Function calls without grammars and mixed mode: https://github.com/mudler/LocalAI/pull/2328
- May 2024: 🔥🔥 Distributed inferencing: https://github.com/mudler/LocalAI/pull/2324
- May 2024: Chat, TTS, and Image generation in the WebUI: https://github.com/mudler/LocalAI/pull/2222
- April 2024: Reranker API: https://github.com/mudler/LocalAI/pull/2121
+- 🔥🔥 🆕 P2P Dashboard, LocalAI Federated mode and AI Swarms: https://github.com/mudler/LocalAI/pull/2723
+- 🆕 You can browse now the model gallery without LocalAI! Check out https://models.localai.io
+- 🔥🔥 Decentralized llama.cpp:  https://github.com/mudler/LocalAI/pull/2343 (peer2peer llama.cpp!) 👉 Docs  https://localai.io/features/distribute/
+- 🔥🔥 Openvoice: https://github.com/mudler/LocalAI/pull/2334
+- 🆕 Function calls without grammars and mixed mode: https://github.com/mudler/LocalAI/pull/2328
+- 🔥🔥 Distributed inferencing: https://github.com/mudler/LocalAI/pull/2324
+- Chat, TTS, and Image generation in the WebUI: https://github.com/mudler/LocalAI/pull/2222
+- Reranker API: https://github.com/mudler/LocalAI/pull/2121

 Hot topics (looking for contributors):

- 🔥🔥 Distributed, P2P Global community pools: https://github.com/mudler/LocalAI/issues/3113
 - WebUI improvements: https://github.com/mudler/LocalAI/issues/2156
 - Backends v2: https://github.com/mudler/LocalAI/issues/1126
 - Improving UX v2: https://github.com/mudler/LocalAI/issues/1373
 - Assistant API: https://github.com/mudler/LocalAI/issues/1273
 - Moderation endpoint: https://github.com/mudler/LocalAI/issues/999
 - Vulkan: https://github.com/mudler/LocalAI/issues/1647
- Anthropic API: https://github.com/mudler/LocalAI/issues/1808

 If you want to help and contribute, issues up for grabs: https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3A%22up+for+grabs%22

@@ -137,7 +134,6 @@ Other:
 - Slack bot https://github.com/mudler/LocalAGI/tree/main/examples/slack
 - Shell-Pilot(Interact with LLM using LocalAI models via pure shell scripts on your Linux or MacOS system) https://github.com/reid41/shell-pilot
 - Telegram bot https://github.com/mudler/LocalAI/tree/master/examples/telegram-bot
- Github Actions: https://github.com/marketplace/actions/start-localai
 - Examples: https://github.com/mudler/LocalAI/tree/master/examples/
  

@@ -151,7 +147,6 @@ Other:

 ## :book: 🎥 [Media, Blogs, Social](https://localai.io/basics/news/#media-blogs-social)

- [Run Visual studio code with LocalAI (SUSE)](https://www.suse.com/c/running-ai-locally/)
 - 🆕 [Run LocalAI on Jetson Nano Devkit](https://mudler.pm/posts/local-ai-jetson-nano-devkit/)
 - [Run LocalAI on AWS EKS with Pulumi](https://www.pulumi.com/blog/low-code-llm-apps-with-local-ai-flowise-and-pulumi/)
 - [Run LocalAI on AWS](https://staleks.hashnode.dev/installing-localai-on-aws-ec2-instance)
--- a/backend/cpp/grpc/Makefile
+++ b/backend/cpp/grpc/Makefile
@@ -46,14 +46,9 @@ endif
 $(INSTALLED_PACKAGES): grpc_build

 $(GRPC_REPO):
-	mkdir -p $(GRPC_REPO)/grpc
-	cd $(GRPC_REPO)/grpc && \
-	git init && \
-	git remote add origin $(GIT_REPO_LIB_GRPC)  && \
-	git fetch origin && \
-	git checkout $(TAG_LIB_GRPC) && \
-	git submodule update --init --recursive --depth 1 --single-branch
-	
+	git clone --depth $(GIT_CLONE_DEPTH) -b $(TAG_LIB_GRPC) $(GIT_REPO_LIB_GRPC) $(GRPC_REPO)/grpc
+	cd $(GRPC_REPO)/grpc && git submodule update --jobs 2 --init --recursive --depth $(GIT_CLONE_DEPTH)
+
 $(GRPC_BUILD): $(GRPC_REPO)
 	mkdir -p $(GRPC_BUILD)
 	cd $(GRPC_BUILD) && cmake $(CMAKE_ARGS) ../$(GRPC_REPO)/grpc && cmake --build . && cmake --build . --target install
--- a/backend/cpp/llama/CMakeLists.txt
+++ b/backend/cpp/llama/CMakeLists.txt
@@ -22,6 +22,15 @@ set(TARGET grpc-server)
 set(_PROTOBUF_LIBPROTOBUF libprotobuf)
 set(_REFLECTION grpc++_reflection)

+include(FetchContent)
+FetchContent_Declare(
+  googletest
+  URL https://github.com/google/googletest/archive/03597a01ee50ed33e9dfd640b249b4be3799d395.zip
+)
+# For Windows: Prevent overriding the parent project's compiler/linker settings
+set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
+FetchContent_MakeAvailable(googletest)
+
 if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
    # Set correct Homebrew install folder for Apple Silicon and Intel Macs
    if (CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "arm64")
--- a/backend/cpp/llama/Makefile
+++ b/backend/cpp/llama/Makefile
@@ -1,6 +1,5 @@

 LLAMA_VERSION?=
-LLAMA_REPO?=https://github.com/ggerganov/llama.cpp

 CMAKE_ARGS?=
 BUILD_TYPE?=
@@ -46,13 +45,11 @@ ifeq ($(BUILD_TYPE),sycl_f32)
 endif

 llama.cpp:
-	mkdir -p llama.cpp
-	cd llama.cpp && \
-	git init && \
-	git remote add origin $(LLAMA_REPO)  && \
-	git fetch origin && \
-	git checkout -b build $(LLAMA_VERSION) && \
-	git submodule update --init --recursive --depth 1 --single-branch
+	git clone --recurse-submodules https://github.com/ggerganov/llama.cpp llama.cpp
+	if [ -z "$(LLAMA_VERSION)" ]; then \
+		exit 1; \
+	fi
+	cd llama.cpp && git checkout -b build $(LLAMA_VERSION) && git submodule update --init --recursive --depth 1

 llama.cpp/examples/grpc-server: llama.cpp
 	mkdir -p llama.cpp/examples/grpc-server
--- a/backend/cpp/llama/grpc-server.cpp
+++ b/backend/cpp/llama/grpc-server.cpp
@@ -2108,7 +2108,6 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
    data["grammar"] = predict->grammar();
    data["prompt"] = predict->prompt();
    data["ignore_eos"] = predict->ignoreeos();
-    data["embeddings"] = predict->embeddings();

    // for each image in the request, add the image data
    //
@@ -2259,6 +2258,7 @@ static void params_parse(const backend::ModelOptions* request,
     // get the directory of modelfile
     std::string model_dir = params.model.substr(0, params.model.find_last_of("/\\"));
     params.lora_adapter.push_back(std::make_tuple(model_dir + "/"+request->loraadapter(), scale_factor));
+     params.lora_base  =  model_dir + "/"+request->lorabase();
    }
    params.use_mlock = request->mlock();
    params.use_mmap = request->mmap();
@@ -2385,31 +2385,6 @@ public:

        return grpc::Status::OK;
    }
-
-    /// https://github.com/ggerganov/llama.cpp/blob/aa2341298924ac89778252015efcb792f2df1e20/examples/server/server.cpp#L2969
-    grpc::Status Embedding(ServerContext* context, const backend::PredictOptions* request, backend::EmbeddingResult* embeddingResult) {
-        json data = parse_options(false, request, llama);
-        const int task_id = llama.queue_tasks.get_new_id();
-        llama.queue_results.add_waiting_task_id(task_id);
-        llama.request_completion(task_id, { {"prompt", data["embeddings"]}, { "n_predict", 0}, {"image_data", ""} }, false, true, -1);
-        // get the result
-        task_result result = llama.queue_results.recv(task_id);
-        //std::cout << "Embedding result JSON" << result.result_json.dump() << std::endl;
-        llama.queue_results.remove_waiting_task_id(task_id);
-        if (!result.error && result.stop) {
-            std::vector<float> embeddings = result.result_json.value("embedding", std::vector<float>());
-            // loop the vector and set the embeddings results
-            for (int i = 0; i < embeddings.size(); i++) {
-                embeddingResult->add_embeddings(embeddings[i]);
-            }
-        }
-        else
-        {
-            return grpc::Status::OK;
-        }
-
-        return grpc::Status::OK;
-    }
 };

 void RunServer(const std::string& server_address) {
--- a/backend/go/llm/llama/llama.go
+++ b/backend/go/llm/llama/llama.go
@@ -6,9 +6,9 @@ import (
 	"fmt"
 	"path/filepath"

+	pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
 	"github.com/go-skynet/go-llama.cpp"
 	"github.com/mudler/LocalAI/pkg/grpc/base"
-	pb "github.com/mudler/LocalAI/pkg/grpc/proto"
 )

 type LLM struct {
--- a/backend/python/autogptq/requirements-cublas11.txt
+++ b/backend/python/autogptq/requirements-cublas11.txt
@@ -1,2 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/cu118
-torch
--- a/backend/python/autogptq/requirements-cublas12.txt
+++ b/backend/python/autogptq/requirements-cublas12.txt
@@ -1 +0,0 @@
-torch
--- a/backend/python/autogptq/requirements-intel.txt
+++ b/backend/python/autogptq/requirements-intel.txt
@@ -2,4 +2,4 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
+setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
--- a/backend/python/autogptq/requirements.txt
+++ b/backend/python/autogptq/requirements.txt
@@ -1,6 +1,7 @@
 accelerate
 auto-gptq==0.7.1
-grpcio==1.65.4
+grpcio==1.64.0
 protobuf
+torch
 certifi
 transformers
--- a/backend/python/bark/requirements-cublas11.txt
+++ b/backend/python/bark/requirements-cublas11.txt
@@ -1,3 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/cu118
-torch
-torchaudio
--- a/backend/python/bark/requirements-cublas12.txt
+++ b/backend/python/bark/requirements-cublas12.txt
@@ -1,2 +0,0 @@
-torch
-torchaudio
--- a/backend/python/bark/requirements-intel.txt
+++ b/backend/python/bark/requirements-intel.txt
@@ -3,4 +3,4 @@ intel-extension-for-pytorch
 torch
 torchaudio
 optimum[openvino]
-setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
+setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
--- a/backend/python/bark/requirements.txt
+++ b/backend/python/bark/requirements.txt
@@ -1,6 +1,6 @@
 accelerate
 bark==0.1.5
-grpcio==1.65.4
+grpcio==1.64.0
 protobuf
 certifi
 transformers
--- a/backend/python/common/libbackend.sh
+++ b/backend/python/common/libbackend.sh
@@ -122,13 +122,6 @@ function installRequirements() {
        requirementFiles+=("${MY_DIR}/requirements-${BUILD_PROFILE}.txt")
    fi

-    # if BUILD_TYPE is empty, we are a CPU build, so we should try to install the CPU requirements
-    if [ "x${BUILD_TYPE}" == "x" ]; then
-        requirementFiles+=("${MY_DIR}/requirements-cpu.txt")
-    fi
-
-    requirementFiles+=("${MY_DIR}/requirements-after.txt")
-
    for reqFile in ${requirementFiles[@]}; do
        if [ -f ${reqFile} ]; then
            echo "starting requirements install for ${reqFile}"
--- a/backend/python/common/template/requirements.txt
+++ b/backend/python/common/template/requirements.txt
@@ -1,2 +1,2 @@
-grpcio==1.65.4
+grpcio==1.64.0
 protobuf
--- a/backend/python/coqui/requirements-cublas11.txt
+++ b/backend/python/coqui/requirements-cublas11.txt
@@ -1,3 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/cu118
-torch
-torchaudio
--- a/backend/python/coqui/requirements-cublas12.txt
+++ b/backend/python/coqui/requirements-cublas12.txt
@@ -1,2 +0,0 @@
-torch
-torchaudio
--- a/backend/python/coqui/requirements-intel.txt
+++ b/backend/python/coqui/requirements-intel.txt
@@ -3,4 +3,4 @@ intel-extension-for-pytorch
 torch
 torchaudio
 optimum[openvino]
-setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
+setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
--- a/backend/python/coqui/requirements.txt
+++ b/backend/python/coqui/requirements.txt
@@ -1,6 +1,6 @@
 accelerate
 TTS==0.22.0
-grpcio==1.65.4
+grpcio==1.64.0
 protobuf
 certifi
 transformers
--- a/backend/python/diffusers/backend.py
+++ b/backend/python/diffusers/backend.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 from concurrent import futures
-import traceback
+
 import argparse
 from collections import defaultdict
 from enum import Enum
@@ -17,39 +17,35 @@ import backend_pb2_grpc

 import grpc

-from diffusers import StableDiffusion3Pipeline, StableDiffusionXLPipeline, StableDiffusionDepth2ImgPipeline, DPMSolverMultistepScheduler, StableDiffusionPipeline, DiffusionPipeline, \
-    EulerAncestralDiscreteScheduler
+from diffusers import StableDiffusion3Pipeline, StableDiffusionXLPipeline, StableDiffusionDepth2ImgPipeline, DPMSolverMultistepScheduler, StableDiffusionPipeline, DiffusionPipeline, EulerAncestralDiscreteScheduler
 from diffusers import StableDiffusionImg2ImgPipeline, AutoPipelineForText2Image, ControlNetModel, StableVideoDiffusionPipeline
 from diffusers.pipelines.stable_diffusion import safety_checker
-from diffusers.utils import load_image, export_to_video
+from diffusers.utils import load_image,export_to_video
 from compel import Compel, ReturnedEmbeddingsType

 from transformers import CLIPTextModel
 from safetensors.torch import load_file

+
 _ONE_DAY_IN_SECONDS = 60 * 60 * 24
-COMPEL = os.environ.get("COMPEL", "0") == "1"
-XPU = os.environ.get("XPU", "0") == "1"
-CLIPSKIP = os.environ.get("CLIPSKIP", "1") == "1"
-SAFETENSORS = os.environ.get("SAFETENSORS", "1") == "1"
-CHUNK_SIZE = os.environ.get("CHUNK_SIZE", "8")
-FPS = os.environ.get("FPS", "7")
-DISABLE_CPU_OFFLOAD = os.environ.get("DISABLE_CPU_OFFLOAD", "0") == "1"
-FRAMES = os.environ.get("FRAMES", "64")
+COMPEL=os.environ.get("COMPEL", "0") == "1"
+XPU=os.environ.get("XPU", "0") == "1"
+CLIPSKIP=os.environ.get("CLIPSKIP", "1") == "1"
+SAFETENSORS=os.environ.get("SAFETENSORS", "1") == "1"
+CHUNK_SIZE=os.environ.get("CHUNK_SIZE", "8")
+FPS=os.environ.get("FPS", "7")
+DISABLE_CPU_OFFLOAD=os.environ.get("DISABLE_CPU_OFFLOAD", "0") == "1"
+FRAMES=os.environ.get("FRAMES", "64")

 if XPU:
    import intel_extension_for_pytorch as ipex
-
    print(ipex.xpu.get_device_name(0))

 # If MAX_WORKERS are specified in the environment use it, otherwise default to 1
 MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))

-
 # https://github.com/CompVis/stable-diffusion/issues/239#issuecomment-1627615287
-def sc(self, clip_input, images): return images, [False for i in images]
-
-
+def sc(self, clip_input, images) : return images, [False for i in images]
 # edit the StableDiffusionSafetyChecker class so that, when called, it just returns the images and an array of True values
 safety_checker.StableDiffusionSafetyChecker.forward = sc

@@ -66,8 +62,6 @@ from diffusers.schedulers import (
    PNDMScheduler,
    UniPCMultistepScheduler,
 )
-
-
 # The scheduler list mapping was taken from here: https://github.com/neggles/animatediff-cli/blob/6f336f5f4b5e38e85d7f06f1744ef42d0a45f2a7/src/animatediff/schedulers.py#L39
 # Credits to https://github.com/neggles
 # See https://github.com/huggingface/diffusers/issues/4167 for more details on sched mapping from A1111
@@ -142,12 +136,10 @@ def get_scheduler(name: str, config: dict = {}):

    return sched_class.from_config(config)

-
 # Implement the BackendServicer class with the service methods
 class BackendServicer(backend_pb2_grpc.BackendServicer):
    def Health(self, request, context):
        return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
-
    def LoadModel(self, request, context):
        try:
            print(f"Loading model {request.Model}...", file=sys.stderr)
@@ -157,7 +149,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):

            if request.F16Memory:
                torchType = torch.float16
-                variant = "fp16"
+                variant="fp16"

            local = False
            modelFile = request.Model
@@ -165,38 +157,38 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
            self.cfg_scale = 7
            if request.CFGScale != 0:
                self.cfg_scale = request.CFGScale
-
+            
            clipmodel = "runwayml/stable-diffusion-v1-5"
            if request.CLIPModel != "":
                clipmodel = request.CLIPModel
            clipsubfolder = "text_encoder"
            if request.CLIPSubfolder != "":
                clipsubfolder = request.CLIPSubfolder
-
+            
            # Check if ModelFile exists
            if request.ModelFile != "":
                if os.path.exists(request.ModelFile):
                    local = True
                    modelFile = request.ModelFile
-
+            
            fromSingleFile = request.Model.startswith("http") or request.Model.startswith("/") or local
-            self.img2vid = False
-            self.txt2vid = False
+            self.img2vid=False
+            self.txt2vid=False
            ## img2img
            if (request.PipelineType == "StableDiffusionImg2ImgPipeline") or (request.IMG2IMG and request.PipelineType == ""):
                if fromSingleFile:
                    self.pipe = StableDiffusionImg2ImgPipeline.from_single_file(modelFile,
-                                                                                torch_dtype=torchType)
+                                torch_dtype=torchType)
                else:
                    self.pipe = StableDiffusionImg2ImgPipeline.from_pretrained(request.Model,
-                                                                               torch_dtype=torchType)
+                                torch_dtype=torchType)

            elif request.PipelineType == "StableDiffusionDepth2ImgPipeline":
                self.pipe = StableDiffusionDepth2ImgPipeline.from_pretrained(request.Model,
-                                                                             torch_dtype=torchType)
+                            torch_dtype=torchType)
            ## img2vid
            elif request.PipelineType == "StableVideoDiffusionPipeline":
-                self.img2vid = True
+                self.img2vid=True
                self.pipe = StableVideoDiffusionPipeline.from_pretrained(
                    request.Model, torch_dtype=torchType, variant=variant
                )
@@ -205,63 +197,64 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
            ## text2img
            elif request.PipelineType == "AutoPipelineForText2Image" or request.PipelineType == "":
                self.pipe = AutoPipelineForText2Image.from_pretrained(request.Model,
-                                                                      torch_dtype=torchType,
-                                                                      use_safetensors=SAFETENSORS,
-                                                                      variant=variant)
+                                                    torch_dtype=torchType,
+                                                    use_safetensors=SAFETENSORS, 
+                                                    variant=variant)
            elif request.PipelineType == "StableDiffusionPipeline":
                if fromSingleFile:
                    self.pipe = StableDiffusionPipeline.from_single_file(modelFile,
-                                                                         torch_dtype=torchType)
+                                                        torch_dtype=torchType)
                else:
                    self.pipe = StableDiffusionPipeline.from_pretrained(request.Model,
-                                                                        torch_dtype=torchType)
+                                                        torch_dtype=torchType)
            elif request.PipelineType == "DiffusionPipeline":
                self.pipe = DiffusionPipeline.from_pretrained(request.Model,
-                                                              torch_dtype=torchType)
+                                                        torch_dtype=torchType)
            elif request.PipelineType == "VideoDiffusionPipeline":
-                self.txt2vid = True
+                self.txt2vid=True
                self.pipe = DiffusionPipeline.from_pretrained(request.Model,
-                                                              torch_dtype=torchType)
+                                                        torch_dtype=torchType)
            elif request.PipelineType == "StableDiffusionXLPipeline":
                if fromSingleFile:
                    self.pipe = StableDiffusionXLPipeline.from_single_file(modelFile,
-                                                                           torch_dtype=torchType,
-                                                                           use_safetensors=True)
+                                                               torch_dtype=torchType,
+                                                               use_safetensors=True)
                else:
                    self.pipe = StableDiffusionXLPipeline.from_pretrained(
-                        request.Model,
-                        torch_dtype=torchType,
-                        use_safetensors=True,
+                        request.Model, 
+                        torch_dtype=torchType, 
+                        use_safetensors=True, 
                        variant=variant)
            elif request.PipelineType == "StableDiffusion3Pipeline":
                if fromSingleFile:
                    self.pipe = StableDiffusion3Pipeline.from_single_file(modelFile,
-                                                                          torch_dtype=torchType,
-                                                                          use_safetensors=True)
+                                                               torch_dtype=torchType,
+                                                               use_safetensors=True)
                else:
                    self.pipe = StableDiffusion3Pipeline.from_pretrained(
-                        request.Model,
-                        torch_dtype=torchType,
-                        use_safetensors=True,
+                        request.Model, 
+                        torch_dtype=torchType, 
+                        use_safetensors=True, 
                        variant=variant)

            if CLIPSKIP and request.CLIPSkip != 0:
                self.clip_skip = request.CLIPSkip
            else:
                self.clip_skip = 0
-
+            
            # torch_dtype needs to be customized. float16 for GPU, float32 for CPU
            # TODO: this needs to be customized
            if request.SchedulerType != "":
                self.pipe.scheduler = get_scheduler(request.SchedulerType, self.pipe.scheduler.config)
-
+                
            if COMPEL:
                self.compel = Compel(
-                    tokenizer=[self.pipe.tokenizer, self.pipe.tokenizer_2],
+                    tokenizer=[self.pipe.tokenizer, self.pipe.tokenizer_2 ], 
                    text_encoder=[self.pipe.text_encoder, self.pipe.text_encoder_2],
                    returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
                    requires_pooled=[False, True]
-                )
+                    )
+

            if request.ControlNet:
                self.controlnet = ControlNetModel.from_pretrained(
@@ -270,6 +263,13 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
                self.pipe.controlnet = self.controlnet
            else:
                self.controlnet = None
+
+            if request.CUDA:
+                self.pipe.to('cuda')
+                if self.controlnet:
+                    self.controlnet.to('cuda')
+            if XPU:
+                self.pipe = self.pipe.to("xpu")
            # Assume directory from request.ModelFile.
            # Only if request.LoraAdapter it's not an absolute path
            if request.LoraAdapter and request.ModelFile != "" and not os.path.isabs(request.LoraAdapter) and request.LoraAdapter:
@@ -282,17 +282,10 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
            if request.LoraAdapter:
                # Check if its a local file and not a directory ( we load lora differently for a safetensor file )
                if os.path.exists(request.LoraAdapter) and not os.path.isdir(request.LoraAdapter):
-                    # self.load_lora_weights(request.LoraAdapter, 1, device, torchType)
-                    self.pipe.load_lora_weights(request.LoraAdapter)
+                    self.load_lora_weights(request.LoraAdapter, 1, device, torchType)
                else:
                    self.pipe.unet.load_attn_procs(request.LoraAdapter)

-            if request.CUDA:
-                self.pipe.to('cuda')
-                if self.controlnet:
-                    self.controlnet.to('cuda')
-            if XPU:
-                self.pipe = self.pipe.to("xpu")
        except Exception as err:
            return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
        # Implement your logic here for the LoadModel service
@@ -365,9 +358,9 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):

        # create a dictionary of values for the parameters
        options = {
-            "negative_prompt": request.negative_prompt,
-            "width": request.width,
-            "height": request.height,
+            "negative_prompt":     request.negative_prompt, 
+            "width":               request.width, 
+            "height":              request.height,
            "num_inference_steps": steps,
        }

@@ -379,7 +372,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
            options["image"] = pose_image

        if CLIPSKIP and self.clip_skip != 0:
-            options["clip_skip"] = self.clip_skip
+            options["clip_skip"]=self.clip_skip

        # Get the keys that we will build the args for our pipe for
        keys = options.keys()
@@ -423,21 +416,20 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
            image = self.pipe(
                guidance_scale=self.cfg_scale,
                **kwargs
-            ).images[0]
+                ).images[0] 
        else:
            # pass the kwargs dictionary to the self.pipe method
            image = self.pipe(
                prompt,
                guidance_scale=self.cfg_scale,
                **kwargs
-            ).images[0]
+                ).images[0]

        # save the result
        image.save(request.dst)

        return backend_pb2.Result(message="Media generated", success=True)

-
 def serve(address):
    server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
    backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
@@ -461,7 +453,6 @@ def serve(address):
    except KeyboardInterrupt:
        server.stop(0)

-
 if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Run the gRPC server.")
    parser.add_argument(
@@ -469,4 +460,4 @@ if __name__ == "__main__":
    )
    args = parser.parse_args()

-    serve(args.addr)
+    serve(args.addr)
--- a/backend/python/diffusers/requirements-cublas11.txt
+++ b/backend/python/diffusers/requirements-cublas11.txt
@@ -1,2 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/cu118
-torch
--- a/backend/python/diffusers/requirements-cublas12.txt
+++ b/backend/python/diffusers/requirements-cublas12.txt
@@ -1 +0,0 @@
-torch
--- a/backend/python/diffusers/requirements-intel.txt
+++ b/backend/python/diffusers/requirements-intel.txt
@@ -3,4 +3,4 @@ intel-extension-for-pytorch
 torch
 torchvision
 optimum[openvino]
-setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
+setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
--- a/backend/python/diffusers/requirements.txt
+++ b/backend/python/diffusers/requirements.txt
@@ -1,12 +1,11 @@
-setuptools
 accelerate
 compel
-peft
 diffusers
-grpcio==1.65.4
+grpcio==1.64.0
 opencv-python
 pillow
 protobuf
 sentencepiece
+torch
 transformers
 certifi
--- a/backend/python/exllama/requirements-cublas11.txt
+++ b/backend/python/exllama/requirements-cublas11.txt
@@ -1,2 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/cu118
-torch
--- a/backend/python/exllama/requirements-cublas12.txt
+++ b/backend/python/exllama/requirements-cublas12.txt
@@ -1 +0,0 @@
-torch
--- a/backend/python/exllama/requirements.txt
+++ b/backend/python/exllama/requirements.txt
@@ -1,5 +1,6 @@
-grpcio==1.65.0
+grpcio==1.64.0
 protobuf
+torch
 transformers
 certifi
 setuptools
--- a/backend/python/exllama2/requirements-cublas11.txt
+++ b/backend/python/exllama2/requirements-cublas11.txt
@@ -1,2 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/cu118
-torch
--- a/backend/python/exllama2/requirements-cublas12.txt
+++ b/backend/python/exllama2/requirements-cublas12.txt
@@ -1 +0,0 @@
-torch
--- a/backend/python/exllama2/requirements.txt
+++ b/backend/python/exllama2/requirements.txt
@@ -1,6 +1,7 @@
 accelerate
-grpcio==1.65.4
+grpcio==1.64.0
 protobuf
 certifi
+torch
 wheel
 setuptools
--- a/backend/python/mamba/requirements-after.txt
+++ b/backend/python/mamba/requirements-after.txt
@@ -1,2 +0,0 @@
-causal-conv1d==1.4.0
-mamba-ssm==2.2.2
--- a/backend/python/mamba/requirements-cpu.txt
+++ b/backend/python/mamba/requirements-cpu.txt
@@ -1 +0,0 @@
-torch
--- a/backend/python/mamba/requirements-cublas11.txt
+++ b/backend/python/mamba/requirements-cublas11.txt
@@ -1,2 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/cu118
-torch
--- a/backend/python/mamba/requirements-cublas12.txt
+++ b/backend/python/mamba/requirements-cublas12.txt
@@ -1 +0,0 @@
-torch
--- a/backend/python/mamba/requirements-install.txt
+++ b/backend/python/mamba/requirements-install.txt
@@ -3,4 +3,5 @@
 # https://github.com/Dao-AILab/causal-conv1d/issues/24
 packaging
 setuptools
-wheel
+wheel
+torch==2.2.0
--- a/backend/python/mamba/requirements.txt
+++ b/backend/python/mamba/requirements.txt
@@ -1,4 +1,6 @@
-grpcio==1.65.1
+causal-conv1d==1.2.0.post2
+mamba-ssm==1.2.0.post1
+grpcio==1.64.0
 protobuf
 certifi
 transformers
--- a/backend/python/openvoice/requirements-cublas11.txt
+++ b/backend/python/openvoice/requirements-cublas11.txt
@@ -1,2 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/cu118
-torch
--- a/backend/python/openvoice/requirements-cublas12.txt
+++ b/backend/python/openvoice/requirements-cublas12.txt
@@ -1 +0,0 @@
-torch
--- a/backend/python/openvoice/requirements-intel.txt
+++ b/backend/python/openvoice/requirements-intel.txt
@@ -2,22 +2,22 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-grpcio==1.65.4
+grpcio==1.64.0
 protobuf
 librosa==0.9.1
-faster-whisper==1.0.3
+faster-whisper==0.9.0
 pydub==0.25.1
 wavmark==0.0.3
-numpy==1.26.4
+numpy==1.22.0
 eng_to_ipa==0.0.2
 inflect==7.0.0
 unidecode==1.3.7
-whisper-timestamped==1.15.4
+whisper-timestamped==1.14.2
 openai
 python-dotenv
 pypinyin==0.50.0
 cn2an==0.5.22
 jieba==0.42.1
-gradio==4.38.1
+gradio==3.48.0
 langid==1.1.6
-git+https://github.com/myshell-ai/MeloTTS.git
+git+https://github.com/myshell-ai/MeloTTS.git
--- a/backend/python/openvoice/requirements.txt
+++ b/backend/python/openvoice/requirements.txt
@@ -1,20 +1,20 @@
-grpcio==1.65.4
+grpcio==1.64.0
 protobuf
-librosa
-faster-whisper
+librosa==0.9.1
+faster-whisper==0.9.0
 pydub==0.25.1
 wavmark==0.0.3
-numpy
+numpy==1.22.0
 eng_to_ipa==0.0.2
-inflect
-unidecode
-whisper-timestamped
+inflect==7.0.0
+unidecode==1.3.7
+whisper-timestamped==1.14.2
 openai
 python-dotenv
-pypinyin
+pypinyin==0.50.0
 cn2an==0.5.22
 jieba==0.42.1
-gradio
+gradio==3.48.0
 langid==1.1.6
 git+https://github.com/myshell-ai/MeloTTS.git
-git+https://github.com/myshell-ai/OpenVoice.git
+git+https://github.com/myshell-ai/OpenVoice.git
--- a/backend/python/openvoice/test.sh
+++ b/backend/python/openvoice/test.sh
@@ -5,7 +5,7 @@ source $(dirname $0)/../common/libbackend.sh

 # Download checkpoints if not present
 if [ ! -d "checkpoints_v2" ]; then
-    wget https://myshell-public-repo-host.s3.amazonaws.com/openvoice/checkpoints_v2_0417.zip -O checkpoints_v2.zip
+    wget https://myshell-public-repo-hosting.s3.amazonaws.com/openvoice/checkpoints_v2_0417.zip -O checkpoints_v2.zip
    unzip checkpoints_v2.zip
 fi

--- a/backend/python/parler-tts/requirements-cublas11.txt
+++ b/backend/python/parler-tts/requirements-cublas11.txt
@@ -1,3 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/cu118
-torch
-torchaudio
--- a/backend/python/parler-tts/requirements-cublas12.txt
+++ b/backend/python/parler-tts/requirements-cublas12.txt
@@ -1,2 +0,0 @@
-torch
-torchaudio
--- a/backend/python/parler-tts/requirements-intel.txt
+++ b/backend/python/parler-tts/requirements-intel.txt
@@ -3,4 +3,4 @@ intel-extension-for-pytorch
 torch
 torchaudio
 optimum[openvino]
-setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
+setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
--- a/backend/python/parler-tts/requirements.txt
+++ b/backend/python/parler-tts/requirements.txt
@@ -1,6 +1,7 @@
 accelerate
-grpcio==1.65.1
+grpcio==1.64.0
 protobuf
+torch
 git+https://github.com/huggingface/parler-tts.git@10016fb0300c0dc31a0fb70e26f3affee7b62f16
 certifi
 transformers
--- a/backend/python/petals/requirements-cublas11.txt
+++ b/backend/python/petals/requirements-cublas11.txt
@@ -1,2 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/cu118
-torch
--- a/backend/python/petals/requirements-cublas12.txt
+++ b/backend/python/petals/requirements-cublas12.txt
@@ -1 +0,0 @@
-torch
--- a/backend/python/petals/requirements-intel.txt
+++ b/backend/python/petals/requirements-intel.txt
@@ -2,4 +2,4 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
+setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
--- a/backend/python/rerankers/requirements-cublas11.txt
+++ b/backend/python/rerankers/requirements-cublas11.txt
@@ -1,2 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/cu118
-torch
--- a/backend/python/rerankers/requirements-cublas12.txt
+++ b/backend/python/rerankers/requirements-cublas12.txt
@@ -1 +0,0 @@
-torch
--- a/backend/python/rerankers/requirements-intel.txt
+++ b/backend/python/rerankers/requirements-intel.txt
@@ -2,4 +2,4 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
+setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
--- a/backend/python/rerankers/requirements.txt
+++ b/backend/python/rerankers/requirements.txt
@@ -1,6 +1,6 @@
 accelerate
 rerankers[transformers]
-grpcio==1.65.4
+grpcio==1.64.0
 protobuf
 certifi
 transformers
--- a/backend/python/sentencetransformers/requirements-cublas11.txt
+++ b/backend/python/sentencetransformers/requirements-cublas11.txt
@@ -1,2 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/cu118
-torch
--- a/backend/python/sentencetransformers/requirements-cublas12.txt
+++ b/backend/python/sentencetransformers/requirements-cublas12.txt
@@ -1 +0,0 @@
-torch
--- a/backend/python/sentencetransformers/requirements.txt
+++ b/backend/python/sentencetransformers/requirements.txt
@@ -1,6 +1,6 @@
 accelerate
-sentence-transformers==3.0.1
+sentence-transformers==2.5.1
 transformers
-grpcio==1.65.1
+grpcio==1.64.0
 protobuf
 certifi
--- a/backend/python/transformers-musicgen/requirements-cublas11.txt
+++ b/backend/python/transformers-musicgen/requirements-cublas11.txt
@@ -1,2 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/cu118
-torch
--- a/backend/python/transformers-musicgen/requirements-cublas12.txt
+++ b/backend/python/transformers-musicgen/requirements-cublas12.txt
@@ -1 +0,0 @@
-torch
--- a/backend/python/transformers-musicgen/requirements.txt
+++ b/backend/python/transformers-musicgen/requirements.txt
@@ -1,6 +1,7 @@
 accelerate
 transformers
-grpcio==1.65.4
+grpcio==1.64.0
 protobuf
-scipy==1.14.0
+torch
+scipy==1.13.0
 certifi
--- a/backend/python/transformers/requirements-cublas11.txt
+++ b/backend/python/transformers/requirements-cublas11.txt
@@ -1,2 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/cu118
-torch
--- a/backend/python/transformers/requirements-cublas12.txt
+++ b/backend/python/transformers/requirements-cublas12.txt
@@ -1 +0,0 @@
-torch
--- a/backend/python/transformers/requirements-intel.txt
+++ b/backend/python/transformers/requirements-intel.txt
@@ -2,3 +2,4 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
+setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
--- a/backend/python/transformers/requirements.txt
+++ b/backend/python/transformers/requirements.txt
@@ -1,7 +1,8 @@
 accelerate
 transformers
-grpcio==1.65.4
+grpcio==1.64.0
 protobuf
+torch
 certifi
 intel-extension-for-transformers
 bitsandbytes
--- a/backend/python/vall-e-x/requirements-cublas11.txt
+++ b/backend/python/vall-e-x/requirements-cublas11.txt
@@ -1,3 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/cu118
-torch
-torchaudio
--- a/backend/python/vall-e-x/requirements-cublas12.txt
+++ b/backend/python/vall-e-x/requirements-cublas12.txt
@@ -1,2 +0,0 @@
-torch
-torchaudio
--- a/backend/python/vall-e-x/requirements-intel.txt
+++ b/backend/python/vall-e-x/requirements-intel.txt
@@ -3,4 +3,4 @@ intel-extension-for-pytorch
 torch
 torchaudio
 optimum[openvino]
-setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
+setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
--- a/backend/python/vall-e-x/requirements.txt
+++ b/backend/python/vall-e-x/requirements.txt
@@ -1,4 +1,4 @@
 accelerate
-grpcio==1.65.4
+grpcio==1.64.0
 protobuf
 certifi
--- a/backend/python/vllm/requirements-cublas.txt
+++ b/backend/python/vllm/requirements-cublas.txt
@@ -0,0 +1 @@
+flash-attn
--- a/backend/python/vllm/requirements-cublas11.txt
+++ b/backend/python/vllm/requirements-cublas11.txt
@@ -1,3 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/cu118
-torch
-flash-attn
--- a/backend/python/vllm/requirements-cublas12.txt
+++ b/backend/python/vllm/requirements-cublas12.txt
@@ -1,2 +0,0 @@
-torch
-flash-attn
--- a/backend/python/vllm/requirements-intel.txt
+++ b/backend/python/vllm/requirements-intel.txt
@@ -2,4 +2,4 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
+setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
--- a/backend/python/vllm/requirements.txt
+++ b/backend/python/vllm/requirements.txt
@@ -1,6 +1,6 @@
 accelerate
 vllm
-grpcio==1.65.4
+grpcio==1.64.0
 protobuf
 certifi
 transformers
--- a/core/backend/options.go
+++ b/core/backend/options.go
@@ -91,7 +91,7 @@ func gRPCModelOpts(c config.BackendConfig) *pb.ModelOptions {
 		Type:                 c.ModelType,
 		RopeFreqScale:        c.RopeFreqScale,
 		NUMA:                 c.NUMA,
-		Embeddings:           *c.Embeddings,
+		Embeddings:           c.Embeddings,
 		LowVRAM:              *c.LowVRAM,
 		NGPULayers:           int32(*c.NGPULayers),
 		MMap:                 *c.MMap,
--- a/core/cli/federated.go
+++ b/core/cli/federated.go
@@ -2,20 +2,129 @@ package cli

 import (
 	"context"
+	"errors"
+	"fmt"
+	"io"
+	"net"
+	"time"
+
+	"math/rand/v2"

 	cliContext "github.com/mudler/LocalAI/core/cli/context"
 	"github.com/mudler/LocalAI/core/p2p"
+	"github.com/mudler/edgevpn/pkg/node"
+	"github.com/mudler/edgevpn/pkg/protocol"
+	"github.com/mudler/edgevpn/pkg/types"
+	"github.com/rs/zerolog/log"
 )

 type FederatedCLI struct {
 	Address        string `env:"LOCALAI_ADDRESS,ADDRESS" default:":8080" help:"Bind address for the API server" group:"api"`
 	Peer2PeerToken string `env:"LOCALAI_P2P_TOKEN,P2P_TOKEN,TOKEN" name:"p2ptoken" help:"Token for P2P mode (optional)" group:"p2p"`
-	LoadBalanced   bool   `env:"LOCALAI_LOAD_BALANCED,LOAD_BALANCED" default:"false" help:"Enable load balancing" group:"p2p"`
 }

 func (f *FederatedCLI) Run(ctx *cliContext.Context) error {

-	fs := p2p.NewFederatedServer(f.Address, p2p.FederatedID, f.Peer2PeerToken, f.LoadBalanced)
+	n, err := p2p.NewNode(f.Peer2PeerToken)
+	if err != nil {
+		return fmt.Errorf("creating a new node: %w", err)
+	}
+	err = n.Start(context.Background())
+	if err != nil {
+		return fmt.Errorf("creating a new node: %w", err)
+	}

-	return fs.Start(context.Background())
+	if err := p2p.ServiceDiscoverer(context.Background(), n, f.Peer2PeerToken, p2p.FederatedID, nil); err != nil {
+		return err
+	}
+
+	return Proxy(context.Background(), n, f.Address, p2p.FederatedID)
+}
+
+func Proxy(ctx context.Context, node *node.Node, listenAddr, service string) error {
+
+	log.Info().Msgf("Allocating service '%s' on: %s", service, listenAddr)
+	// Open local port for listening
+	l, err := net.Listen("tcp", listenAddr)
+	if err != nil {
+		log.Error().Err(err).Msg("Error listening")
+		return err
+	}
+	//	ll.Info("Binding local port on", srcaddr)
+
+	ledger, _ := node.Ledger()
+
+	// Announce ourselves so nodes accepts our connection
+	ledger.Announce(
+		ctx,
+		10*time.Second,
+		func() {
+			// Retrieve current ID for ip in the blockchain
+			//_, found := ledger.GetKey(protocol.UsersLedgerKey, node.Host().ID().String())
+			// If mismatch, update the blockchain
+			//if !found {
+			updatedMap := map[string]interface{}{}
+			updatedMap[node.Host().ID().String()] = &types.User{
+				PeerID:    node.Host().ID().String(),
+				Timestamp: time.Now().String(),
+			}
+			ledger.Add(protocol.UsersLedgerKey, updatedMap)
+			//	}
+		},
+	)
+
+	defer l.Close()
+	for {
+		select {
+		case <-ctx.Done():
+			return errors.New("context canceled")
+		default:
+			log.Debug().Msg("New for connection")
+			// Listen for an incoming connection.
+			conn, err := l.Accept()
+			if err != nil {
+				fmt.Println("Error accepting: ", err.Error())
+				continue
+			}
+
+			// Handle connections in a new goroutine, forwarding to the p2p service
+			go func() {
+				var tunnelAddresses []string
+				for _, v := range p2p.GetAvailableNodes(p2p.FederatedID) {
+					if v.IsOnline() {
+						tunnelAddresses = append(tunnelAddresses, v.TunnelAddress)
+					} else {
+						log.Info().Msgf("Node %s is offline", v.ID)
+					}
+				}
+
+				// open a TCP stream to one of the tunnels
+				// chosen randomly
+				// TODO: optimize this and track usage
+				tunnelAddr := tunnelAddresses[rand.IntN(len(tunnelAddresses))]
+
+				tunnelConn, err := net.Dial("tcp", tunnelAddr)
+				if err != nil {
+					log.Error().Err(err).Msg("Error connecting to tunnel")
+					return
+				}
+
+				log.Info().Msgf("Redirecting %s to %s", conn.LocalAddr().String(), tunnelConn.RemoteAddr().String())
+				closer := make(chan struct{}, 2)
+				go copyStream(closer, tunnelConn, conn)
+				go copyStream(closer, conn, tunnelConn)
+				<-closer
+
+				tunnelConn.Close()
+				conn.Close()
+				//	ll.Infof("(service %s) Done handling %s", serviceID, l.Addr().String())
+			}()
+		}
+	}
+
+}
+
+func copyStream(closer chan struct{}, dst io.Writer, src io.Reader) {
+	defer func() { closer <- struct{}{} }() // connection is closed, send signal to stop proxy
+	io.Copy(dst, src)
 }
--- a/core/cli/models.go
+++ b/core/cli/models.go
@@ -83,9 +83,7 @@ func (mi *ModelsInstall) Run(ctx *cliContext.Context) error {
 			return err
 		}

-		modelURI := downloader.URI(modelName)
-
-		if !modelURI.LooksLikeOCI() {
+		if !downloader.LooksLikeOCI(modelName) {
 			model := gallery.FindModel(models, modelName, mi.ModelsPath)
 			if model == nil {
 				log.Error().Str("model", modelName).Msg("model not found")
--- a/core/cli/run.go
+++ b/core/cli/run.go
@@ -119,7 +119,7 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
 		}

 		log.Info().Msg("Starting P2P server discovery...")
-		if err := p2p.ServiceDiscoverer(context.Background(), node, token, "", func(serviceID string, node p2p.NodeData) {
+		if err := p2p.ServiceDiscoverer(context.Background(), node, token, "", func() {
 			var tunnelAddresses []string
 			for _, v := range p2p.GetAvailableNodes("") {
 				if v.IsOnline() {
--- a/core/cli/util.go
+++ b/core/cli/util.go
@@ -86,8 +86,8 @@ func (hfscmd *HFScanCMD) Run(ctx *cliContext.Context) error {
 		var errs error = nil
 		for _, uri := range hfscmd.ToScan {
 			log.Info().Str("uri", uri).Msg("scanning specific uri")
-			scanResults, err := downloader.HuggingFaceScan(downloader.URI(uri))
-			if err != nil && errors.Is(err, downloader.ErrUnsafeFilesFound) {
+			scanResults, err := downloader.HuggingFaceScan(uri)
+			if err != nil && !errors.Is(err, downloader.ErrNonHuggingFaceFile) {
 				log.Error().Err(err).Strs("clamAV", scanResults.ClamAVInfectedFiles).Strs("pickles", scanResults.DangerousPickles).Msg("! WARNING ! A known-vulnerable model is included in this repo!")
 				errs = errors.Join(errs, err)
 			}
--- a/Show More
+++ b/Show More