debug

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
models(gallery): add calme-2.4-llama3-70b (#2942 )
2026-02-07 05:04:29 -05:00 · 2024-07-22 11:51:45 +02:00 · 2024-07-21 22:01:15 +02:00 · 2024-07-21 21:57:30 +02:00 · 2024-07-21 21:51:52 +02:00 · 2024-07-21 21:45:04 +02:00
158 changed files with 5952 additions and 1629 deletions
--- a/.github/check_and_update.py
+++ b/.github/check_and_update.py
@@ -0,0 +1,80 @@
 import hashlib
 from huggingface_hub import hf_hub_download, get_paths_info
 import requests
 import sys
 import os
 uri = sys.argv[1]
 file_name = uri.split('/')[-1]
 # Function to parse the URI and determine download method
 def parse_uri(uri):
    if uri.startswith('huggingface://'):
        repo_id = uri.split('://')[1]
        return 'huggingface', repo_id.rsplit('/', 1)[0]
    elif 'huggingface.co' in uri:
        parts = uri.split('/resolve/')
        if len(parts) > 1:
            repo_path = parts[0].split('https://huggingface.co/')[-1]
            return 'huggingface', repo_path
    return 'direct', uri
 def calculate_sha256(file_path):
    sha256_hash = hashlib.sha256()
    with open(file_path, 'rb') as f:
        for byte_block in iter(lambda: f.read(4096), b''):
            sha256_hash.update(byte_block)
    return sha256_hash.hexdigest()
 def manual_safety_check_hf(repo_id):
    scanResponse = requests.get('https://huggingface.co/api/models/' + repo_id + "/scan")
    scan = scanResponse.json()
    if scan['hasUnsafeFile']:
        return scan
    return None
 download_type, repo_id_or_url = parse_uri(uri)
 new_checksum =  None
 file_path = None
 # Decide download method based on URI type
 if download_type == 'huggingface':
    # Check if the repo is flagged as dangerous by HF
    hazard = manual_safety_check_hf(repo_id_or_url)
    if hazard != None:
        print(f'Error: HuggingFace has detected security problems for {repo_id_or_url}: {str(hazard)}', filename=file_name)
        sys.exit(5)
    # Use HF API to pull sha
    for file in get_paths_info(repo_id_or_url, [file_name], repo_type='model'):
        try:
            new_checksum = file.lfs.sha256
            break
        except Exception as e:
            print(f'Error from Hugging Face Hub: {str(e)}', file=sys.stderr)
            sys.exit(2)
    if new_checksum is None:
        try:
            file_path = hf_hub_download(repo_id=repo_id_or_url, filename=file_name)
        except Exception as e:
            print(f'Error from Hugging Face Hub: {str(e)}', file=sys.stderr)
            sys.exit(2)
 else:
    response = requests.get(repo_id_or_url)
    if response.status_code == 200:
        with open(file_name, 'wb') as f:
            f.write(response.content)
        file_path = file_name
    elif response.status_code == 404:
        print(f'File not found: {response.status_code}', file=sys.stderr)
        sys.exit(2)
    else:
        print(f'Error downloading file: {response.status_code}', file=sys.stderr)
        sys.exit(1)
 if new_checksum is None:
    new_checksum = calculate_sha256(file_path)
    print(new_checksum)
    os.remove(file_path)
 else:
    print(new_checksum)
--- a/.github/checksum_checker.sh
+++ b/.github/checksum_checker.sh
@@ -14,77 +14,14 @@ function check_and_update_checksum() {
    idx="$5"
    # Download the file and calculate new checksum using Python
-    new_checksum=$(python3 -c "
+    new_checksum=$(python3 ./.github/check_and_update.py $uri)
-import hashlib
+    result=$?
 from huggingface_hub import hf_hub_download, get_paths_info
 import requests
 import sys
 import os
-uri = '$uri'
+    if [[ $result -eq 5 ]]; then
-file_name = uri.split('/')[-1]
+        echo "Contaminated entry detected, deleting entry for $model_name..."
-
+        yq eval -i "del([$idx])" "$input_yaml"
-# Function to parse the URI and determine download method
+        return
-# Function to parse the URI and determine download method
+    fi
 def parse_uri(uri):
    if uri.startswith('huggingface://'):
        repo_id = uri.split('://')[1]
        return 'huggingface', repo_id.rsplit('/', 1)[0]
    elif 'huggingface.co' in uri:
        parts = uri.split('/resolve/')
        if len(parts) > 1:
            repo_path = parts[0].split('https://huggingface.co/')[-1]
            return 'huggingface', repo_path
    return 'direct', uri
 def calculate_sha256(file_path):
    sha256_hash = hashlib.sha256()
    with open(file_path, 'rb') as f:
        for byte_block in iter(lambda: f.read(4096), b''):
            sha256_hash.update(byte_block)
    return sha256_hash.hexdigest()
 download_type, repo_id_or_url = parse_uri(uri)
 new_checksum =  None
 # Decide download method based on URI type
 if download_type == 'huggingface':
    # Use HF API to pull sha
    for file in get_paths_info(repo_id_or_url, [file_name], repo_type='model'):
        try:
            new_checksum = file.lfs.sha256
            break
        except Exception as e:
            print(f'Error from Hugging Face Hub: {str(e)}', file=sys.stderr)
            sys.exit(2)
    if new_checksum is None:
        try:
            file_path = hf_hub_download(repo_id=repo_id_or_url, filename=file_name)
        except Exception as e:
            print(f'Error from Hugging Face Hub: {str(e)}', file=sys.stderr)
            sys.exit(2)
 else:
    response = requests.get(repo_id_or_url)
    if response.status_code == 200:
        with open(file_name, 'wb') as f:
            f.write(response.content)
        file_path = file_name
    elif response.status_code == 404:
        print(f'File not found: {response.status_code}', file=sys.stderr)
        sys.exit(2)
    else:
        print(f'Error downloading file: {response.status_code}', file=sys.stderr)
        sys.exit(1)
 if new_checksum is None:
    new_checksum = calculate_sha256(file_path)
    print(new_checksum)
    os.remove(file_path)
 else:
    print(new_checksum)
 ")
    if [[ "$new_checksum" == "" ]]; then
        echo "Error calculating checksum for $file_name. Skipping..."
@@ -94,7 +31,7 @@ else:
    echo "Checksum for $file_name: $new_checksum"
    # Compare and update the YAML file if checksums do not match
-    result=$?
+    
    if [[ $result -eq 2 ]]; then
        echo "File not found, deleting entry for $file_name..."
        # yq eval -i "del(.[$idx].files[] | select(.filename == \"$file_name\"))" "$input_yaml"
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -1,6 +1,10 @@
 # https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
 version: 2
 updates:
  - package-ecosystem: "gitsubmodule"
    directory: "/"
    schedule:
      interval: "weekly"
  - package-ecosystem: "gomod"
    directory: "/"
    schedule:
@@ -23,3 +27,111 @@ updates:
    schedule:
      # Check for updates to GitHub Actions every weekday
      interval: "weekly"
  - package-ecosystem: "pip"
    directory: "/backend/python/autogptq"
    schedule:
      interval: "weekly"
  - package-ecosystem: "pip"
    directory: "/backend/python/bark"
    schedule:
      interval: "weekly"
  - package-ecosystem: "pip"
    directory: "/backend/python/common/template"
    schedule:
      interval: "weekly"
  - package-ecosystem: "pip"
    directory: "/backend/python/coqui"
    schedule:
      interval: "weekly"
  - package-ecosystem: "pip"
    directory: "/backend/python/diffusers"
    schedule:
      interval: "weekly"
  - package-ecosystem: "pip"
    directory: "/backend/python/exllama"
    schedule:
      interval: "weekly"
  - package-ecosystem: "pip"
    directory: "/backend/python/exllama2"
    schedule:
      interval: "weekly"
  - package-ecosystem: "pip"
    directory: "/backend/python/mamba"
    schedule:
      interval: "weekly"
  - package-ecosystem: "pip"
    directory: "/backend/python/openvoice"
    schedule:
      interval: "weekly"
  - package-ecosystem: "pip"
    directory: "/backend/python/parler-tts"
    schedule:
      interval: "weekly"
  - package-ecosystem: "pip"
    directory: "/backend/python/petals"
    schedule:
      interval: "weekly"
  - package-ecosystem: "pip"
    directory: "/backend/python/rerankers"
    schedule:
      interval: "weekly"
  - package-ecosystem: "pip"
    directory: "/backend/python/sentencetransformers"
    schedule:
      interval: "weekly"
  - package-ecosystem: "pip"
    directory: "/backend/python/transformers"
    schedule:
      interval: "weekly"
  - package-ecosystem: "pip"
    directory: "/backend/python/transformers-musicgen"
    schedule:
      interval: "weekly"
  - package-ecosystem: "pip"
    directory: "/backend/python/vall-e-x"
    schedule:
      interval: "weekly"
  - package-ecosystem: "pip"
    directory: "/backend/python/vllm"
    schedule:
      interval: "weekly"
  - package-ecosystem: "pip"
    directory: "/examples/chainlit"
    schedule:
      interval: "weekly"
  - package-ecosystem: "pip"
    directory: "/examples/functions"
    schedule:
      interval: "weekly"
  - package-ecosystem: "pip"
    directory: "/examples/langchain/langchainpy-localai-example"
    schedule:
      interval: "weekly"
  - package-ecosystem: "pip"
    directory: "/examples/langchain-chroma"
    schedule:
      interval: "weekly"
  - package-ecosystem: "pip"
    directory: "/examples/streamlit-bot"
    schedule:
      interval: "weekly"
  - package-ecosystem: "docker"
    directory: "/examples/k8sgpt"
    schedule:
      interval: "weekly"
  - package-ecosystem: "docker"
    directory: "/examples/kubernetes"
    schedule:
      interval: "weekly"
  - package-ecosystem: "docker"
    directory: "/examples/langchain"
    schedule:
      interval: "weekly"
  - package-ecosystem: "gomod"
    directory: "/examples/semantic-todo"
    schedule:
      interval: "weekly"
  - package-ecosystem: "docker"
    directory: "/examples/telegram-bot"
    schedule:
      interval: "weekly"
--- a/.github/workflows/bump_deps.yaml
+++ b/.github/workflows/bump_deps.yaml
@@ -9,9 +9,6 @@ jobs:
      fail-fast: false
      matrix:
        include:
          - repository: "go-skynet/go-llama.cpp"
            variable: "GOLLAMA_VERSION"
            branch: "master"
          - repository: "ggerganov/llama.cpp"
            variable: "CPPLLAMA_VERSION"
            branch: "master"
@@ -30,9 +27,6 @@ jobs:
          - repository: "go-skynet/bloomz.cpp"
            variable: "BLOOMZ_VERSION"
            branch: "main"
          - repository: "nomic-ai/gpt4all"
            variable: "GPT4ALL_VERSION"
            branch: "main"
          - repository: "mudler/go-ggllm.cpp"
            variable: "GOGGLLM_VERSION"
            branch: "master"
@@ -54,7 +48,7 @@ jobs:
          token: ${{ secrets.UPDATE_BOT_TOKEN }}
          push-to-fork: ci-forks/LocalAI
          commit-message: ':arrow_up: Update ${{ matrix.repository }}'
-          title: ':arrow_up: Update ${{ matrix.repository }}'
+          title: 'chore: :arrow_up: Update ${{ matrix.repository }}'
          branch: "update/${{ matrix.variable }}"
          body: Bump of ${{ matrix.repository }} version
          signoff: true
--- a/.github/workflows/bump_docs.yaml
+++ b/.github/workflows/bump_docs.yaml
@@ -22,7 +22,7 @@ jobs:
          token: ${{ secrets.UPDATE_BOT_TOKEN }}
          push-to-fork: ci-forks/LocalAI
          commit-message: ':arrow_up: Update docs version ${{ matrix.repository }}'
-          title: ':arrow_up: Update docs version ${{ matrix.repository }}'
+          title: 'docs: :arrow_up: update docs version ${{ matrix.repository }}'
          branch: "update/docs"
          body: Bump of ${{ matrix.repository }} version inside docs
          signoff: true
--- a/.github/workflows/checksum_checker.yaml
+++ b/.github/workflows/checksum_checker.yaml
@@ -25,7 +25,7 @@ jobs:
      - name: 'Setup yq'
        uses: dcarbone/install-yq-action@v1.1.1
        with:
-          version: 'v4.43.1'
+          version: 'v4.44.2'
          download-compressed: true
          force: true
--- a/.github/workflows/dependabot_auto.yml
+++ b/.github/workflows/dependabot_auto.yml
@@ -14,7 +14,7 @@ jobs:
    steps:
      - name: Dependabot metadata
        id: metadata
-        uses: dependabot/fetch-metadata@v2.1.0
+        uses: dependabot/fetch-metadata@v2.2.0
        with:
          github-token: "${{ secrets.GITHUB_TOKEN }}"
          skip-commit-verification: true
--- a/.github/workflows/disabled/comment-pr.yaml
+++ b/.github/workflows/disabled/comment-pr.yaml
@@ -0,0 +1,83 @@
 name: Comment PRs
 on:
  pull_request_target:
 jobs:
  comment-pr:
    env:
        MODEL_NAME: hermes-2-theta-llama-3-8b
    runs-on: ubuntu-latest
    steps:
    - name: Checkout code
      uses: actions/checkout@v3
      with:
        ref: "${{ github.event.pull_request.merge_commit_sha }}"
        fetch-depth: 0 # needed to checkout all branches for this Action to work
    - uses: mudler/localai-github-action@v1
      with:
        model: 'hermes-2-theta-llama-3-8b' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
      # Check the PR diff using the current branch and the base branch of the PR
    - uses: GrantBirki/git-diff-action@v2.7.0
      id: git-diff-action
      with:
            json_diff_file_output: diff.json
            raw_diff_file_output: diff.txt
            file_output_only: "true"
            base_branch: ${{ github.event.pull_request.base.sha }}
    - name: Show diff
      env:
        DIFF: ${{ steps.git-diff-action.outputs.raw-diff-path }}
      run: |
            cat $DIFF
    - name: Summarize
      env:
        DIFF: ${{ steps.git-diff-action.outputs.raw-diff-path }}
      id: summarize
      run: |
            input="$(cat $DIFF)"
            # Define the LocalAI API endpoint
            API_URL="http://localhost:8080/chat/completions"
            # Create a JSON payload using jq to handle special characters
            json_payload=$(jq -n --arg input "$input" '{
            model: "'$MODEL_NAME'",
            messages: [
                {
                role: "system",
                content: "You are LocalAI-bot in Github that helps understanding PRs and assess complexity. Explain what has changed in this PR diff and why"
                },
                {
                role: "user",
                content: $input
                }
            ]
            }')
            # Send the request to LocalAI
            response=$(curl -s -X POST $API_URL \
            -H "Content-Type: application/json" \
            -d "$json_payload")
            # Extract the summary from the response
            summary="$(echo $response | jq -r '.choices[0].message.content')"
            # Print the summary
            #  -H "Authorization: Bearer $API_KEY" \
            echo "Summary:"
            echo "$summary"
            echo "payload sent"
            echo "$json_payload"
            {
                echo 'message<<EOF'
                echo "$summary"
                echo EOF
              } >> "$GITHUB_OUTPUT"
            docker logs --tail 10 local-ai
    - uses: mshick/add-pr-comment@v2
      if: always()
      with:
          repo-token: ${{ secrets.UPDATE_BOT_TOKEN }}
          message: ${{ steps.summarize.outputs.message }}
          message-failure: |
            Uh oh! Could not analyze this PR, maybe it's too big?
--- a/.github/workflows/generate_grpc_cache.yaml
+++ b/.github/workflows/generate_grpc_cache.yaml
@@ -75,7 +75,7 @@ jobs:
        uses: actions/checkout@v4
      - name: Cache GRPC
-        uses: docker/build-push-action@v5
+        uses: docker/build-push-action@v6
        with:
          builder: ${{ steps.buildx.outputs.name }}
          # The build-args MUST be an EXACT match between the image cache and other workflow steps that want to use that cache.
@@ -84,7 +84,7 @@ jobs:
          build-args: |
            GRPC_BASE_IMAGE=${{ matrix.grpc-base-image }}
            GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
-            GRPC_VERSION=v1.64.0
+            GRPC_VERSION=v1.65.0
          context: .
          file: ./Dockerfile
          cache-to: type=gha,ignore-error=true
--- a/.github/workflows/generate_intel_image.yaml
+++ b/.github/workflows/generate_intel_image.yaml
@@ -15,7 +15,7 @@ jobs:
    strategy:
      matrix:
        include:
-          - base-image: intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04
+          - base-image: intel/oneapi-basekit:2024.2.0-devel-ubuntu22.04
            runs-on: 'ubuntu-latest'
            platforms: 'linux/amd64'
    runs-on: ${{matrix.runs-on}}
@@ -46,7 +46,7 @@ jobs:
        uses: actions/checkout@v4
      - name: Cache Intel images
-        uses: docker/build-push-action@v5
+        uses: docker/build-push-action@v6
        with:
          builder: ${{ steps.buildx.outputs.name }}
          build-args: |
--- a/.github/workflows/image-pr.yml
+++ b/.github/workflows/image-pr.yml
@@ -35,15 +35,16 @@ jobs:
      max-parallel: ${{ github.event_name != 'pull_request' && 4 || 8 }}
      matrix:
        include:
-          - build-type: ''
+          # This is basically covered by the AIO test
-            platforms: 'linux/amd64'
+          # - build-type: ''
-            tag-latest: 'false'
+          #   platforms: 'linux/amd64'
-            tag-suffix: '-ffmpeg'
+          #   tag-latest: 'false'
-            ffmpeg: 'true'
+          #   tag-suffix: '-ffmpeg'
-            image-type: 'extras'
+          #   ffmpeg: 'true'
-            runs-on: 'arc-runner-set'
+          #   image-type: 'extras'
-            base-image: "ubuntu:22.04"
+          #   runs-on: 'arc-runner-set'
-            makeflags: "--jobs=3 --output-sync=target"
+          #   base-image: "ubuntu:22.04"
          #   makeflags: "--jobs=3 --output-sync=target"
          - build-type: 'cublas'
            cuda-major-version: "12"
            cuda-minor-version: "4"
@@ -55,85 +56,85 @@ jobs:
            runs-on: 'arc-runner-set'
            base-image: "ubuntu:22.04"
            makeflags: "--jobs=3 --output-sync=target"
-          - build-type: 'hipblas'
+          # - build-type: 'hipblas'
-            platforms: 'linux/amd64'
+          #   platforms: 'linux/amd64'
-            tag-latest: 'false'
+          #   tag-latest: 'false'
-            tag-suffix: '-hipblas'
+          #   tag-suffix: '-hipblas'
-            ffmpeg: 'false'
+          #   ffmpeg: 'false'
-            image-type: 'extras'
+          #   image-type: 'extras'
-            base-image: "rocm/dev-ubuntu-22.04:6.1"
+          #   base-image: "rocm/dev-ubuntu-22.04:6.1"
-            grpc-base-image: "ubuntu:22.04"
+          #   grpc-base-image: "ubuntu:22.04"
-            runs-on: 'arc-runner-set'
+          #   runs-on: 'arc-runner-set'
-            makeflags: "--jobs=3 --output-sync=target"
+          #   makeflags: "--jobs=3 --output-sync=target"
-          - build-type: 'sycl_f16'
+          # - build-type: 'sycl_f16'
-            platforms: 'linux/amd64'
+          #   platforms: 'linux/amd64'
-            tag-latest: 'false'
+          #   tag-latest: 'false'
-            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+          #   base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
-            grpc-base-image: "ubuntu:22.04"
+          #   grpc-base-image: "ubuntu:22.04"
-            tag-suffix: 'sycl-f16-ffmpeg'
+          #   tag-suffix: 'sycl-f16-ffmpeg'
-            ffmpeg: 'true'
+          #   ffmpeg: 'true'
-            image-type: 'extras'
+          #   image-type: 'extras'
-            runs-on: 'arc-runner-set'
+          #   runs-on: 'arc-runner-set'
-            makeflags: "--jobs=3 --output-sync=target"
+          #   makeflags: "--jobs=3 --output-sync=target"
-  core-image-build:
+  # core-image-build:
-    uses: ./.github/workflows/image_build.yml
+  #   uses: ./.github/workflows/image_build.yml
-    with:
+  #   with:
-      tag-latest: ${{ matrix.tag-latest }}
+  #     tag-latest: ${{ matrix.tag-latest }}
-      tag-suffix: ${{ matrix.tag-suffix }}
+  #     tag-suffix: ${{ matrix.tag-suffix }}
-      ffmpeg: ${{ matrix.ffmpeg }}
+  #     ffmpeg: ${{ matrix.ffmpeg }}
-      image-type: ${{ matrix.image-type }}
+  #     image-type: ${{ matrix.image-type }}
-      build-type: ${{ matrix.build-type }}
+  #     build-type: ${{ matrix.build-type }}
-      cuda-major-version: ${{ matrix.cuda-major-version }}
+  #     cuda-major-version: ${{ matrix.cuda-major-version }}
-      cuda-minor-version: ${{ matrix.cuda-minor-version }}
+  #     cuda-minor-version: ${{ matrix.cuda-minor-version }}
-      platforms: ${{ matrix.platforms }}
+  #     platforms: ${{ matrix.platforms }}
-      runs-on: ${{ matrix.runs-on }}
+  #     runs-on: ${{ matrix.runs-on }}
-      base-image: ${{ matrix.base-image }}
+  #     base-image: ${{ matrix.base-image }}
-      grpc-base-image: ${{ matrix.grpc-base-image }}
+  #     grpc-base-image: ${{ matrix.grpc-base-image }}
-      makeflags: ${{ matrix.makeflags }}
+  #     makeflags: ${{ matrix.makeflags }}
-    secrets:
+  #   secrets:
-      dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
+  #     dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
-      dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
+  #     dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
-      quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
+  #     quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
-      quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
+  #     quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
-    strategy:
+  #   strategy:
-      matrix:
+  #     matrix:
-        include:
+  #       include:
-          - build-type: ''
+          # - build-type: ''
-            platforms: 'linux/amd64'
+          #   platforms: 'linux/amd64'
-            tag-latest: 'false'
+          #   tag-latest: 'false'
-            tag-suffix: '-ffmpeg-core'
+          #   tag-suffix: '-ffmpeg-core'
-            ffmpeg: 'true'
+          #   ffmpeg: 'true'
-            image-type: 'core'
+          #   image-type: 'core'
-            runs-on: 'ubuntu-latest'
+          #   runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+          #   base-image: "ubuntu:22.04"
-            makeflags: "--jobs=4 --output-sync=target"
+          #   makeflags: "--jobs=4 --output-sync=target"
-          - build-type: 'sycl_f16'
+          # - build-type: 'sycl_f16'
-            platforms: 'linux/amd64'
+          #   platforms: 'linux/amd64'
-            tag-latest: 'false'
+          #   tag-latest: 'false'
-            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+          #   base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
-            grpc-base-image: "ubuntu:22.04"
+          #   grpc-base-image: "ubuntu:22.04"
-            tag-suffix: 'sycl-f16-ffmpeg-core'
+          #   tag-suffix: 'sycl-f16-ffmpeg-core'
-            ffmpeg: 'true'
+          #   ffmpeg: 'true'
-            image-type: 'core'
+          #   image-type: 'core'
-            runs-on: 'arc-runner-set'
+          #   runs-on: 'arc-runner-set'
-            makeflags: "--jobs=3 --output-sync=target"
+          #   makeflags: "--jobs=3 --output-sync=target"
-          - build-type: 'cublas'
+          # - build-type: 'cublas'
-            cuda-major-version: "12"
+          #   cuda-major-version: "12"
-            cuda-minor-version: "4"
+          #   cuda-minor-version: "4"
-            platforms: 'linux/amd64'
+          #   platforms: 'linux/amd64'
-            tag-latest: 'false'
+          #   tag-latest: 'false'
-            tag-suffix: '-cublas-cuda12-ffmpeg-core'
+          #   tag-suffix: '-cublas-cuda12-ffmpeg-core'
-            ffmpeg: 'true'
+          #   ffmpeg: 'true'
-            image-type: 'core'
+          #   image-type: 'core'
-            runs-on: 'ubuntu-latest'
+          #   runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+          #   base-image: "ubuntu:22.04"
-            makeflags: "--jobs=4 --output-sync=target"
+          #   makeflags: "--jobs=4 --output-sync=target"
-          - build-type: 'vulkan'
+          # - build-type: 'vulkan'
-            platforms: 'linux/amd64'
+          #   platforms: 'linux/amd64'
-            tag-latest: 'false'
+          #   tag-latest: 'false'
-            tag-suffix: '-vulkan-ffmpeg-core'
+          #   tag-suffix: '-vulkan-ffmpeg-core'
-            ffmpeg: 'true'
+          #   ffmpeg: 'true'
-            image-type: 'core'
+          #   image-type: 'core'
-            runs-on: 'ubuntu-latest'
+          #   runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+          #   base-image: "ubuntu:22.04"
-            makeflags: "--jobs=4 --output-sync=target"
+          #   makeflags: "--jobs=4 --output-sync=target"
--- a/.github/workflows/image_build.yml
+++ b/.github/workflows/image_build.yml
@@ -215,7 +215,7 @@ jobs:
          password: ${{ secrets.quayPassword }}
      - name: Build and push
-        uses: docker/build-push-action@v5
+        uses: docker/build-push-action@v6
        if: github.event_name != 'pull_request'
        with:
          builder: ${{ steps.buildx.outputs.name }}
@@ -232,7 +232,7 @@ jobs:
            BASE_IMAGE=${{ inputs.base-image }}
            GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }}
            GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
-            GRPC_VERSION=v1.64.0
+            GRPC_VERSION=v1.65.0
            MAKEFLAGS=${{ inputs.makeflags }}
          context: .
          file: ./Dockerfile
@@ -243,7 +243,7 @@ jobs:
          labels: ${{ steps.meta.outputs.labels }}
 ### Start testing image
      - name: Build and push
-        uses: docker/build-push-action@v5
+        uses: docker/build-push-action@v6
        if: github.event_name == 'pull_request'
        with:
          builder: ${{ steps.buildx.outputs.name }}
@@ -260,7 +260,7 @@ jobs:
            BASE_IMAGE=${{ inputs.base-image }}
            GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }}
            GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
-            GRPC_VERSION=v1.64.0
+            GRPC_VERSION=v1.65.0
            MAKEFLAGS=${{ inputs.makeflags }}
          context: .
          file: ./Dockerfile
@@ -276,7 +276,7 @@ jobs:
 ## End testing image
      - name: Build and push AIO image
        if: inputs.aio != ''
-        uses: docker/build-push-action@v5
+        uses: docker/build-push-action@v6
        with:
          builder: ${{ steps.buildx.outputs.name }}
          build-args: |
@@ -291,7 +291,7 @@ jobs:
      - name: Build and push AIO image (dockerhub)
        if: inputs.aio != ''
-        uses: docker/build-push-action@v5
+        uses: docker/build-push-action@v6
        with:
          builder: ${{ steps.buildx.outputs.name }}
          build-args: |
--- a/.github/workflows/notify-models.yaml
+++ b/.github/workflows/notify-models.yaml
@@ -14,11 +14,9 @@ jobs:
    - uses: actions/checkout@v4
      with:
        fetch-depth: 0 # needed to checkout all branches for this Action to work
-    - name: Start LocalAI
+    - uses: mudler/localai-github-action@v1
-      run: |
+      with:
-        echo "Starting LocalAI..."
+        model: 'hermes-2-theta-llama-3-8b' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
        docker run -e -ti -d --name local-ai -p 8080:8080 localai/localai:master-ffmpeg-core run --debug $MODEL_NAME
        until [ "`docker inspect -f {{.State.Health.Status}} local-ai`" == "healthy" ]; do echo "Waiting for container to be ready";  docker logs --tail 10 local-ai; sleep 2; done
        # Check the PR diff using the current branch and the base branch of the PR
    - uses: GrantBirki/git-diff-action@v2.7.0
      id: git-diff-action
--- a/.github/workflows/notify-releases.yaml
+++ b/.github/workflows/notify-releases.yaml
@@ -12,11 +12,9 @@ jobs:
        RELEASE_TITLE: ${{ github.event.release.name }}
        RELEASE_TAG_NAME: ${{ github.event.release.tag_name }}
    steps:
-    - name: Start LocalAI
+    - uses: mudler/localai-github-action@v1
-      run: |
+      with:
-        echo "Starting LocalAI..."
+        model: 'hermes-2-theta-llama-3-8b' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
        docker run -e -ti -d --name local-ai -p 8080:8080 localai/localai:master-ffmpeg-core run --debug $MODEL_NAME
        until [ "`docker inspect -f {{.State.Health.Status}} local-ai`" == "healthy" ]; do echo "Waiting for container to be ready";  docker logs --tail 10 local-ai; sleep 2; done
    - name: Summarize
      id: summarize
      run: |
--- a/.github/workflows/prlint.yaml
+++ b/.github/workflows/prlint.yaml
@@ -0,0 +1,28 @@
 name: Check PR style
 on:
  pull_request_target:
    types:
      - opened
      - reopened
      - edited
      - synchronize
 jobs:
  title-lint:
    runs-on: ubuntu-latest
    permissions:
      statuses: write
    steps:
      - uses: aslafy-z/conventional-pr-title-action@v3
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
 #  check-pr-description:
 #    runs-on: ubuntu-latest
 #    steps:
 #      - uses: actions/checkout@v2
 #      - uses: jadrol/pr-description-checker-action@v1.0.0
 #        id: description-checker
 #        with:
 #          repo-token: ${{ secrets.GITHUB_TOKEN }}
 #          exempt-labels: no qa
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -1,11 +1,15 @@
 name: Build and Release
 on:
- push
+  push:
- pull_request
+    branches:
      - master
    tags:
      - 'v*'
  pull_request:
 env:
-  GRPC_VERSION: v1.64.0
+  GRPC_VERSION: v1.65.0
 permissions:
  contents: write
@@ -32,7 +36,7 @@ jobs:
        run: |
          sudo apt-get update
          sudo apt-get install build-essential ffmpeg protobuf-compiler ccache gawk
-          sudo apt-get install -qy binutils-aarch64-linux-gnu gcc-aarch64-linux-gnu g++-aarch64-linux-gnu
+          sudo apt-get install -qy binutils-aarch64-linux-gnu gcc-aarch64-linux-gnu g++-aarch64-linux-gnu libgmock-dev
      - name: Install CUDA Dependencies
        run: |
          curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/cross-linux-aarch64/cuda-keyring_1.1-1_all.deb
@@ -52,7 +56,8 @@ jobs:
        run: |
          git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
-          cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
+          cd grpc && sed -i "216i\  TESTONLY" "third_party/abseil-cpp/absl/container/CMakeLists.txt" && mkdir -p cmake/build && \
          cd cmake/build && cmake -DgRPC_INSTALL=ON \
            -DgRPC_BUILD_TESTS=OFF \
            ../.. && sudo make --jobs 5 --output-sync=target
      - name: Install gRPC
@@ -77,16 +82,6 @@ jobs:
            echo "set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)" >> $CMAKE_CROSS_TOOLCHAIN && \
            echo "set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)" >> $CMAKE_CROSS_TOOLCHAIN
          GRPC_DIR=$PWD/grpc
          # http://google.github.io/googletest/quickstart-cmake.html
          # Seems otherwise cross-arch fails to find it
          echo "include(FetchContent)" >> $GRPC_DIR/CMakeLists.txt
          echo "FetchContent_Declare(" >> $GRPC_DIR/CMakeLists.txt
          echo "  googletest" >> $GRPC_DIR/CMakeLists.txt
          echo "  URL https://github.com/google/googletest/archive/03597a01ee50ed33e9dfd640b249b4be3799d395.zip" >> $GRPC_DIR/CMakeLists.txt
          echo ")" >> $GRPC_DIR/CMakeLists.txt
          echo "FetchContent_MakeAvailable(googletest)" >> $GRPC_DIR/CMakeLists.txt
          cd grpc && cd cmake/build && sudo make --jobs 5 --output-sync=target install && \
          GRPC_CROSS_BUILD_DIR=$GRPC_DIR/cmake/cross_build && \
          mkdir -p $GRPC_CROSS_BUILD_DIR && \
@@ -106,8 +101,8 @@ jobs:
          CROSS_TOOLCHAIN=/usr/$GNU_HOST
          CROSS_STAGING_PREFIX=$CROSS_TOOLCHAIN/stage
          CMAKE_CROSS_TOOLCHAIN=/tmp/arm.toolchain.cmake
-          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b
+          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
-          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
+          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
          export PATH=$PATH:$GOPATH/bin
          export PATH=/usr/local/cuda/bin:$PATH
          sudo rm -rf /usr/aarch64-linux-gnu/lib/libstdc++.so.6
@@ -156,7 +151,7 @@ jobs:
      - name: Dependencies
        run: |
          sudo apt-get update
-          sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache gawk cmake
+          sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache gawk cmake libgmock-dev
      - name: Intel Dependencies
        run: |
          wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null
@@ -170,7 +165,7 @@ jobs:
          sudo apt-get update
          sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
        env:
-          CUDA_VERSION: 12-3
+          CUDA_VERSION: 12-5
      - name: "Install Hipblas"
        env:
          ROCM_VERSION: "6.1"
@@ -206,7 +201,8 @@ jobs:
        if: steps.cache-grpc.outputs.cache-hit != 'true'
        run: |
          git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
-          cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
+          cd grpc && sed -i "216i\  TESTONLY" "third_party/abseil-cpp/absl/container/CMakeLists.txt" && mkdir -p cmake/build && \
          cd cmake/build && cmake -DgRPC_INSTALL=ON \
            -DgRPC_BUILD_TESTS=OFF \
            ../.. && sudo make --jobs 5 --output-sync=target
      - name: Install gRPC
@@ -216,8 +212,8 @@ jobs:
      - name: Build
        id: build
        run: |
-          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b
+          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
-          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
+          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
          export PATH=$PATH:$GOPATH/bin
          export PATH=/usr/local/cuda/bin:$PATH
          export PATH=/opt/rocm/bin:$PATH
@@ -257,8 +253,8 @@ jobs:
        run: |
          sudo apt-get update
          sudo apt-get install -y --no-install-recommends libopencv-dev protobuf-compiler ccache
-          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b
+          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
-          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
+          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
      - name: Build stablediffusion
        run: |
          export PATH=$PATH:$GOPATH/bin
@@ -333,8 +329,8 @@ jobs:
      - name: Dependencies
        run: |
          brew install protobuf grpc
-          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b
+          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
-          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
+          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
      - name: Build
        id: build
        run: |
--- a/.github/workflows/test-extra.yml
+++ b/.github/workflows/test-extra.yml
@@ -29,7 +29,7 @@ jobs:
          curl -LsSf https://astral.sh/uv/install.sh | sh
          sudo apt-get install -y ca-certificates cmake curl patch python3-pip
          sudo apt-get install -y libopencv-dev
-          pip install --user grpcio-tools==1.64.0
+          pip install --user --no-cache-dir grpcio-tools==1.64.1
      - name: Test transformers
        run: |
@@ -51,7 +51,7 @@ jobs:
          curl -LsSf https://astral.sh/uv/install.sh | sh
          sudo apt-get install -y ca-certificates cmake curl patch python3-pip
          sudo apt-get install -y libopencv-dev
-          pip install --user grpcio-tools==1.64.0
+          pip install --user --no-cache-dir grpcio-tools==1.64.1
      - name: Test sentencetransformers
        run: |
@@ -74,7 +74,7 @@ jobs:
          curl -LsSf https://astral.sh/uv/install.sh | sh
          sudo apt-get install -y ca-certificates cmake curl patch python3-pip
          sudo apt-get install -y libopencv-dev
-          pip install --user grpcio-tools==1.64.0
+          pip install --user --no-cache-dir grpcio-tools==1.64.1
      - name: Test rerankers
        run: |
@@ -96,7 +96,7 @@ jobs:
          sudo apt-get install -y libopencv-dev
          # Install UV
          curl -LsSf https://astral.sh/uv/install.sh | sh
-          pip install --user grpcio-tools==1.64.0
+          pip install --user --no-cache-dir grpcio-tools==1.64.1
      - name: Test diffusers
        run: |
          make --jobs=5 --output-sync=target -C backend/python/diffusers
@@ -117,7 +117,7 @@ jobs:
          curl -LsSf https://astral.sh/uv/install.sh | sh
          sudo apt-get install -y ca-certificates cmake curl patch python3-pip
          sudo apt-get install -y libopencv-dev
-          pip install --user grpcio-tools==1.64.0
+          pip install --user --no-cache-dir grpcio-tools==1.64.1
      - name: Test parler-tts
        run: |
@@ -139,7 +139,7 @@ jobs:
          curl -LsSf https://astral.sh/uv/install.sh | sh
          sudo apt-get install -y ca-certificates cmake curl patch python3-pip
          sudo apt-get install -y libopencv-dev
-          pip install --user grpcio-tools==1.64.0
+          pip install --user --no-cache-dir grpcio-tools==1.64.1
      - name: Test openvoice
        run: |
@@ -161,7 +161,7 @@ jobs:
          curl -LsSf https://astral.sh/uv/install.sh | sh
          sudo apt-get install -y ca-certificates cmake curl patch python3-pip
          sudo apt-get install -y libopencv-dev
-          pip install --user grpcio-tools==1.64.0
+          pip install --user --no-cache-dir grpcio-tools==1.64.1
      - name: Test transformers-musicgen
        run: |
@@ -185,7 +185,7 @@ jobs:
  #         curl -LsSf https://astral.sh/uv/install.sh | sh
  #         sudo apt-get install -y ca-certificates cmake curl patch python3-pip
  #         sudo apt-get install -y libopencv-dev
-  #         pip install --user grpcio-tools==1.64.0
+  #         pip install --user --no-cache-dir grpcio-tools==1.64.1
  #     - name: Test petals
  #       run: |
@@ -249,7 +249,7 @@ jobs:
  #         curl -LsSf https://astral.sh/uv/install.sh | sh
  #         sudo apt-get install -y ca-certificates cmake curl patch python3-pip
  #         sudo apt-get install -y libopencv-dev
-  #         pip install --user grpcio-tools==1.64.0
+  #         pip install --user --no-cache-dir grpcio-tools==1.64.1
  #     - name: Test bark
  #       run: |
@@ -274,7 +274,7 @@ jobs:
  #         curl -LsSf https://astral.sh/uv/install.sh | sh
  #         sudo apt-get install -y ca-certificates cmake curl patch python3-pip
  #         sudo apt-get install -y libopencv-dev
-  #         pip install --user grpcio-tools==1.64.0
+  #         pip install --user --no-cache-dir grpcio-tools==1.64.1
  #     - name: Test vllm
  #       run: |
  #          make --jobs=5 --output-sync=target -C backend/python/vllm
@@ -294,7 +294,7 @@ jobs:
          curl -LsSf https://astral.sh/uv/install.sh | sh
          sudo apt-get install -y ca-certificates cmake curl patch python3-pip
          sudo apt-get install -y libopencv-dev
-          pip install --user grpcio-tools==1.64.0
+          pip install --user --no-cache-dir grpcio-tools==1.64.1
      - name: Test vall-e-x
        run: |
           make --jobs=5 --output-sync=target -C backend/python/vall-e-x
@@ -314,7 +314,7 @@ jobs:
          sudo apt-get install -y ca-certificates cmake curl patch espeak espeak-ng python3-pip
          # Install UV
          curl -LsSf https://astral.sh/uv/install.sh | sh
-          pip install --user grpcio-tools==1.64.0
+          pip install --user --no-cache-dir grpcio-tools==1.64.1
      - name: Test coqui
        run: |
          make --jobs=5 --output-sync=target -C backend/python/coqui
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -10,7 +10,7 @@ on:
      - '*'
 env:
-  GRPC_VERSION: v1.64.0
+  GRPC_VERSION: v1.65.0
 concurrency:
  group: ci-tests-${{ github.head_ref || github.ref }}-${{ github.repository }}
@@ -71,6 +71,7 @@ jobs:
        run: |
          sudo apt-get update
          sudo apt-get install build-essential curl ffmpeg
          sudo apt-get install -y libgmock-dev
          curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
             sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
             gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
@@ -93,8 +94,8 @@ jobs:
          sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
          export CUDACXX=/usr/local/cuda/bin/nvcc
-          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
+          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
-          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b
+          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
          # The python3-grpc-tools package in 22.04 is too old
          pip install --user grpcio-tools
@@ -109,7 +110,7 @@ jobs:
          # Pre-build stable diffusion before we install a newer version of abseil (not compatible with stablediffusion-ncn)
          PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
        env:
-          CUDA_VERSION: 12-3
+          CUDA_VERSION: 12-4
      - name: Cache grpc
        id: cache-grpc
        uses: actions/cache@v4
@@ -120,7 +121,8 @@ jobs:
        if: steps.cache-grpc.outputs.cache-hit != 'true'
        run: |
          git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --jobs 5 --shallow-submodules https://github.com/grpc/grpc && \
-          cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
+          cd grpc && sed -i "216i\  TESTONLY" "third_party/abseil-cpp/absl/container/CMakeLists.txt" && mkdir -p cmake/build && cd cmake/build && \
          cmake -DgRPC_INSTALL=ON \
            -DgRPC_BUILD_TESTS=OFF \
            ../.. && sudo make --jobs 5
      - name: Install gRPC
@@ -213,7 +215,7 @@ jobs:
      - name: Dependencies
        run: |
          brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc
-          pip install --user grpcio-tools==1.64.0
+          pip install --user --no-cache-dir grpcio-tools==1.64.1
      - name: Test
        run: |
          export C_INCLUDE_PATH=/usr/local/include
--- a/.github/workflows/update_swagger.yaml
+++ b/.github/workflows/update_swagger.yaml
@@ -13,11 +13,17 @@ jobs:
      - uses: actions/setup-go@v5
        with:
          go-version: 'stable'
      - name: Dependencies
        run: |
          sudo apt-get update
          sudo apt-get install protobuf-compiler
      - run: |
          go install github.com/swaggo/swag/cmd/swag@latest
          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
      - name: Bump swagger 🔧
        run: |
-          make swagger
+          make protogen-go swagger
      - name: Create Pull Request
        uses: peter-evans/create-pull-request@v6
        with:
--- a/24
+++ b/24
@@ -8,7 +8,7 @@ FROM ${BASE_IMAGE} AS requirements-core
 USER root
-ARG GO_VERSION=1.22.4
+ARG GO_VERSION=1.22.5
 ARG TARGETARCH
 ARG TARGETVARIANT
@@ -146,25 +146,6 @@ RUN <<EOT bash
    fi
 EOT
 RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
        apt-get update && \
        apt-get install -y  --no-install-recommends \
            software-properties-common pciutils && \
        curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb && \
        dpkg -i cuda-keyring_1.1-1_all.deb && \
        rm -f cuda-keyring_1.1-1_all.deb && \
        apt-get update && \
        apt-get install -y --no-install-recommends \
            cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
            libcufft-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
            libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
            libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
            libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
            libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \
        apt-get clean && \
        rm -rf /var/lib/apt/lists/* \
    ; fi
 # If we are building with clblas support, we need the libraries for the builds
 RUN if [ "${BUILD_TYPE}" = "clblas" ]; then \
        apt-get update && \
@@ -206,7 +187,7 @@ FROM ${GRPC_BASE_IMAGE} AS grpc
 # This is a bit of a hack, but it's required in order to be able to effectively cache this layer in CI
 ARG GRPC_MAKEFLAGS="-j4 -Otarget"
-ARG GRPC_VERSION=v1.64.2
+ARG GRPC_VERSION=v1.65.0
 ENV MAKEFLAGS=${GRPC_MAKEFLAGS}
@@ -227,6 +208,7 @@ RUN apt-get update && \
 RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
    mkdir -p /build/grpc/cmake/build && \
    cd /build/grpc/cmake/build && \
    sed -i "216i\  TESTONLY" "../../third_party/abseil-cpp/absl/container/CMakeLists.txt" && \
    cmake -DgRPC_INSTALL=ON -DgRPC_BUILD_TESTS=OFF -DCMAKE_INSTALL_PREFIX:PATH=/opt/grpc ../.. && \
    make && \
    make install && \
--- a/113
+++ b/113
@@ -6,8 +6,9 @@ BINARY_NAME=local-ai
 DETECT_LIBS?=true
 # llama.cpp versions
-GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
+GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
-CPPLLAMA_VERSION?=c4dd11d1d3903e1922c06242e189f6310fc4d8c3
+GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
 CPPLLAMA_VERSION?=45f2c19cc57286eead7b232ce8028273a817aa4d
 # gpt4all version
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
@@ -18,18 +19,23 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
 RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
 # whisper.cpp version
-WHISPER_CPP_VERSION?=1c31f9d4a8936aec550e6c4dc9ca5cae3b4f304a
+WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
 WHISPER_CPP_VERSION?=f68298ce06ca3edd6e6f3f21c3d0bb5f073942c3
 # bert.cpp version
 BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
 BERT_VERSION?=710044b124545415f555e4260d16b146c725a6e4
 # go-piper version
 PIPER_REPO?=https://github.com/mudler/go-piper
 PIPER_VERSION?=9d0100873a7dbb0824dfea40e8cec70a1b110759
 # stablediffusion version
 STABLEDIFFUSION_REPO?=https://github.com/mudler/go-stable-diffusion
 STABLEDIFFUSION_VERSION?=4a3cd6aeae6f66ee57eae9a0075f8c58c3a6a38f
 # tinydream version
 TINYDREAM_REPO?=https://github.com/M0Rf30/go-tiny-dream
 TINYDREAM_VERSION?=c04fa463ace9d9a6464313aa5f9cd0f953b6c057
 export BUILD_TYPE?=
@@ -202,69 +208,109 @@ all: help
 ## BERT embeddings
 sources/go-bert.cpp:
-	git clone --recurse-submodules https://github.com/go-skynet/go-bert.cpp sources/go-bert.cpp
+	mkdir -p sources/go-bert.cpp
-	cd sources/go-bert.cpp && git checkout -b build $(BERT_VERSION) && git submodule update --init --recursive --depth 1
+	cd sources/go-bert.cpp && \
 	git init && \
 	git remote add origin $(BERT_REPO) && \
 	git fetch origin && \
 	git checkout $(BERT_VERSION) && \
 	git submodule update --init --recursive --depth 1 --single-branch
 sources/go-bert.cpp/libgobert.a: sources/go-bert.cpp
 	$(MAKE) -C sources/go-bert.cpp libgobert.a
 ## go-llama.cpp
 sources/go-llama.cpp:
-	git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp sources/go-llama.cpp
+	mkdir -p sources/go-llama.cpp
-	cd sources/go-llama.cpp && git checkout -b build $(GOLLAMA_STABLE_VERSION) && git submodule update --init --recursive --depth 1
+	cd sources/go-llama.cpp && \
 	git init && \
 	git remote add origin $(GOLLAMA_REPO) && \
 	git fetch origin && \
 	git checkout $(GOLLAMA_VERSION) && \
 	git submodule update --init --recursive --depth 1 --single-branch
 sources/go-llama.cpp/libbinding.a: sources/go-llama.cpp
 	$(MAKE) -C sources/go-llama.cpp BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a
 ## go-piper
 sources/go-piper:
-	git clone --recurse-submodules https://github.com/mudler/go-piper sources/go-piper
+	mkdir -p sources/go-piper
-	cd sources/go-piper && git checkout -b build $(PIPER_VERSION) && git submodule update --init --recursive --depth 1
+	cd sources/go-piper && \
 	git init && \
 	git remote add origin $(PIPER_REPO) && \
 	git fetch origin && \
 	git checkout $(PIPER_VERSION) && \
 	git submodule update --init --recursive --depth 1 --single-branch
 sources/go-piper/libpiper_binding.a: sources/go-piper
 	$(MAKE) -C sources/go-piper libpiper_binding.a example/main piper.o
 ## GPT4ALL
 sources/gpt4all:
-	git clone --recurse-submodules $(GPT4ALL_REPO) sources/gpt4all
+	mkdir -p sources/gpt4all
-	cd sources/gpt4all && git checkout -b build $(GPT4ALL_VERSION) && git submodule update --init --recursive --depth 1
+	cd sources/gpt4all && \
 	git init && \
 	git remote add origin $(GPT4ALL_REPO) && \
 	git fetch origin && \
 	git checkout $(GPT4ALL_VERSION) && \
 	git submodule update --init --recursive --depth 1 --single-branch
 sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a: sources/gpt4all
 	$(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ libgpt4all.a
 ## RWKV
 sources/go-rwkv.cpp:
-	git clone --recurse-submodules $(RWKV_REPO) sources/go-rwkv.cpp
+	mkdir -p sources/go-rwkv.cpp
-	cd sources/go-rwkv.cpp && git checkout -b build $(RWKV_VERSION) && git submodule update --init --recursive --depth 1
+	cd sources/go-rwkv.cpp && \
 	git init && \
 	git remote add origin $(RWKV_REPO) && \
 	git fetch origin && \
 	git checkout $(RWKV_VERSION) && \
 	git submodule update --init --recursive --depth 1 --single-branch
 sources/go-rwkv.cpp/librwkv.a: sources/go-rwkv.cpp
 	cd sources/go-rwkv.cpp && cd rwkv.cpp &&	cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF &&	cmake --build . && 	cp librwkv.a ..
 ## stable diffusion
 sources/go-stable-diffusion:
-	git clone --recurse-submodules https://github.com/mudler/go-stable-diffusion sources/go-stable-diffusion
+	mkdir -p sources/go-stable-diffusion
-	cd sources/go-stable-diffusion && git checkout -b build $(STABLEDIFFUSION_VERSION) && git submodule update --init --recursive --depth 1
+	cd sources/go-stable-diffusion && \
 	git init && \
 	git remote add origin $(STABLEDIFFUSION_REPO) && \
 	git fetch origin && \
 	git checkout $(STABLEDIFFUSION_VERSION) && \
 	git submodule update --init --recursive --depth 1 --single-branch
 sources/go-stable-diffusion/libstablediffusion.a: sources/go-stable-diffusion
 	CPATH="$(CPATH):/usr/include/opencv4" $(MAKE) -C sources/go-stable-diffusion libstablediffusion.a
 ## tiny-dream
 sources/go-tiny-dream:
-	git clone --recurse-submodules https://github.com/M0Rf30/go-tiny-dream sources/go-tiny-dream
+	mkdir -p sources/go-tiny-dream
-	cd sources/go-tiny-dream && git checkout -b build $(TINYDREAM_VERSION) && git submodule update --init --recursive --depth 1
+	cd sources/go-tiny-dream && \
 	git init && \
 	git remote add origin $(TINYDREAM_REPO) && \
 	git fetch origin && \
 	git checkout $(TINYDREAM_VERSION) && \
 	git submodule update --init --recursive --depth 1 --single-branch
 sources/go-tiny-dream/libtinydream.a: sources/go-tiny-dream
 	$(MAKE) -C sources/go-tiny-dream libtinydream.a
 ## whisper
 sources/whisper.cpp:
-	git clone https://github.com/ggerganov/whisper.cpp sources/whisper.cpp
+	mkdir -p sources/whisper.cpp
-	cd sources/whisper.cpp && git checkout -b build $(WHISPER_CPP_VERSION) && git submodule update --init --recursive --depth 1
+	cd sources/whisper.cpp && \
 	git init && \
 	git remote add origin $(WHISPER_REPO) && \
 	git fetch origin && \
 	git checkout $(WHISPER_CPP_VERSION) && \
 	git submodule update --init --recursive --depth 1 --single-branch
 sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
 	cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a
-get-sources: sources/go-llama.cpp sources/gpt4all sources/go-piper sources/go-rwkv.cpp sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream
+get-sources: sources/go-llama.cpp sources/gpt4all sources/go-piper sources/go-rwkv.cpp sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp
 replace:
 	$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(CURDIR)/sources/go-rwkv.cpp
@@ -331,6 +377,7 @@ build: prepare backend-assets grpcs ## Build the project
 	$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
 	$(info ${GREEN}I GO_TAGS: ${YELLOW}$(GO_TAGS)${RESET})
 	$(info ${GREEN}I LD_FLAGS: ${YELLOW}$(LD_FLAGS)${RESET})
 	ls -liah backend-assets/grpc
 ifneq ($(BACKEND_LIBS),)
 	$(MAKE) backend-assets/lib
 	cp -f $(BACKEND_LIBS) backend-assets/lib/
@@ -338,7 +385,7 @@ endif
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(BINARY_NAME) ./
 build-minimal:
-	BUILD_GRPC_FOR_BACKEND_LLAMA=true GRPC_BACKENDS="backend-assets/grpc/llama-cpp-avx2" GO_TAGS=none $(MAKE) build
+	BUILD_GRPC_FOR_BACKEND_LLAMA=true GRPC_BACKENDS="backend-assets/grpc/llama-cpp-avx2" GO_TAGS=p2p $(MAKE) build
 build-api:
 	BUILD_GRPC_FOR_BACKEND_LLAMA=true BUILD_API_ONLY=true GO_TAGS=none $(MAKE) build
@@ -375,7 +422,7 @@ else
 endif
 dist-cross-linux-arm64:
-	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_NATIVE=off" GRPC_BACKENDS="backend-assets/grpc/llama-cpp-fallback backend-assets/grpc/llama-cpp-grpc backend-assets/util/llama-cpp-rpc-server" \
+	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_NATIVE=off" GRPC_BACKENDS="backend-assets/grpc/llama-cpp-fallback backend-assets/grpc/llama-cpp-grpc backend-assets/util/llama-cpp-rpc-server" GO_TAGS="p2p" \
 	STATIC=true $(MAKE) build
 	mkdir -p release
 # if BUILD_ID is empty, then we don't append it to the binary name
@@ -721,28 +768,28 @@ else
 endif
 # This target is for manually building a variant with-auto detected flags
-backend-assets/grpc/llama-cpp: backend-assets/grpc
+backend-assets/grpc/llama-cpp: backend-assets/grpc backend/cpp/llama/llama.cpp
 	cp -rf backend/cpp/llama backend/cpp/llama-cpp
 	$(MAKE) -C backend/cpp/llama-cpp purge
 	$(info ${GREEN}I llama-cpp build info:avx2${RESET})
 	$(MAKE) VARIANT="llama-cpp" build-llama-cpp-grpc-server
 	cp -rfv backend/cpp/llama-cpp/grpc-server backend-assets/grpc/llama-cpp
-backend-assets/grpc/llama-cpp-avx2: backend-assets/grpc
+backend-assets/grpc/llama-cpp-avx2: backend-assets/grpc backend/cpp/llama/llama.cpp
 	cp -rf backend/cpp/llama backend/cpp/llama-avx2
 	$(MAKE) -C backend/cpp/llama-avx2 purge
 	$(info ${GREEN}I llama-cpp build info:avx2${RESET})
 	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-avx2" build-llama-cpp-grpc-server
 	cp -rfv backend/cpp/llama-avx2/grpc-server backend-assets/grpc/llama-cpp-avx2
-backend-assets/grpc/llama-cpp-avx: backend-assets/grpc
+backend-assets/grpc/llama-cpp-avx: backend-assets/grpc backend/cpp/llama/llama.cpp
 	cp -rf backend/cpp/llama backend/cpp/llama-avx
 	$(MAKE) -C backend/cpp/llama-avx purge
 	$(info ${GREEN}I llama-cpp build info:avx${RESET})
 	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-avx" build-llama-cpp-grpc-server
 	cp -rfv backend/cpp/llama-avx/grpc-server backend-assets/grpc/llama-cpp-avx
-backend-assets/grpc/llama-cpp-fallback: backend-assets/grpc
+backend-assets/grpc/llama-cpp-fallback: backend-assets/grpc backend/cpp/llama/llama.cpp
 	cp -rf backend/cpp/llama backend/cpp/llama-fallback
 	$(MAKE) -C backend/cpp/llama-fallback purge
 	$(info ${GREEN}I llama-cpp build info:fallback${RESET})
@@ -753,35 +800,35 @@ ifeq ($(BUILD_TYPE),metal)
 	cp backend/cpp/llama-fallback/llama.cpp/build/bin/default.metallib backend-assets/grpc/
 endif
-backend-assets/grpc/llama-cpp-cuda: backend-assets/grpc
+backend-assets/grpc/llama-cpp-cuda: backend-assets/grpc backend/cpp/llama/llama.cpp
 	cp -rf backend/cpp/llama backend/cpp/llama-cuda
 	$(MAKE) -C backend/cpp/llama-cuda purge
 	$(info ${GREEN}I llama-cpp build info:cuda${RESET})
 	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_CUDA=ON" $(MAKE) VARIANT="llama-cuda" build-llama-cpp-grpc-server
 	cp -rfv backend/cpp/llama-cuda/grpc-server backend-assets/grpc/llama-cpp-cuda
-backend-assets/grpc/llama-cpp-hipblas: backend-assets/grpc
+backend-assets/grpc/llama-cpp-hipblas: backend-assets/grpc backend/cpp/llama/llama.cpp
 	cp -rf backend/cpp/llama backend/cpp/llama-hipblas
 	$(MAKE) -C backend/cpp/llama-hipblas purge
 	$(info ${GREEN}I llama-cpp build info:hipblas${RESET})
 	BUILD_TYPE="hipblas" $(MAKE) VARIANT="llama-hipblas" build-llama-cpp-grpc-server
 	cp -rfv backend/cpp/llama-hipblas/grpc-server backend-assets/grpc/llama-cpp-hipblas
-backend-assets/grpc/llama-cpp-sycl_f16: backend-assets/grpc
+backend-assets/grpc/llama-cpp-sycl_f16: backend-assets/grpc backend/cpp/llama/llama.cpp
 	cp -rf backend/cpp/llama backend/cpp/llama-sycl_f16
 	$(MAKE) -C backend/cpp/llama-sycl_f16 purge
 	$(info ${GREEN}I llama-cpp build info:sycl_f16${RESET})
 	BUILD_TYPE="sycl_f16" $(MAKE) VARIANT="llama-sycl_f16" build-llama-cpp-grpc-server
 	cp -rfv backend/cpp/llama-sycl_f16/grpc-server backend-assets/grpc/llama-cpp-sycl_f16
-backend-assets/grpc/llama-cpp-sycl_f32: backend-assets/grpc
+backend-assets/grpc/llama-cpp-sycl_f32: backend-assets/grpc backend/cpp/llama/llama.cpp
 	cp -rf backend/cpp/llama backend/cpp/llama-sycl_f32
 	$(MAKE) -C backend/cpp/llama-sycl_f32 purge
 	$(info ${GREEN}I llama-cpp build info:sycl_f32${RESET})
 	BUILD_TYPE="sycl_f32" $(MAKE) VARIANT="llama-sycl_f32" build-llama-cpp-grpc-server
 	cp -rfv backend/cpp/llama-sycl_f32/grpc-server backend-assets/grpc/llama-cpp-sycl_f32
-backend-assets/grpc/llama-cpp-grpc: backend-assets/grpc
+backend-assets/grpc/llama-cpp-grpc: backend-assets/grpc backend/cpp/llama/llama.cpp
 	cp -rf backend/cpp/llama backend/cpp/llama-grpc
 	$(MAKE) -C backend/cpp/llama-grpc purge
 	$(info ${GREEN}I llama-cpp build info:grpc${RESET})
@@ -859,7 +906,7 @@ docker-aio-all:
 docker-image-intel:
 	docker build \
-		--build-arg BASE_IMAGE=intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04 \
+		--build-arg BASE_IMAGE=intel/oneapi-basekit:2024.2.0-devel-ubuntu22.04 \
 		--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
 		--build-arg GO_TAGS="none" \
 		--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
@@ -867,7 +914,7 @@ docker-image-intel:
 docker-image-intel-xpu:
 	docker build \
-		--build-arg BASE_IMAGE=intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04 \
+		--build-arg BASE_IMAGE=intel/oneapi-basekit:2024.2.0-devel-ubuntu22.04 \
 		--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
 		--build-arg GO_TAGS="none" \
 		--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
--- a/README.md
+++ b/README.md
@@ -72,14 +72,15 @@ docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
 [Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
- 🔥🔥 🆕 P2P Dashboard, LocalAI Federated mode and AI Swarms: https://github.com/mudler/LocalAI/pull/2723
+- July 2024: 🔥🔥 🆕 P2P Dashboard, LocalAI Federated mode and AI Swarms: https://github.com/mudler/LocalAI/pull/2723
- 🆕 You can browse now the model gallery without LocalAI! Check out https://models.localai.io
+- June 2024: 🆕 You can browse now the model gallery without LocalAI! Check out https://models.localai.io
- 🔥🔥 Decentralized llama.cpp:  https://github.com/mudler/LocalAI/pull/2343 (peer2peer llama.cpp!) 👉 Docs  https://localai.io/features/distribute/
+- June 2024: Support for models from OCI registries: https://github.com/mudler/LocalAI/pull/2628
- 🔥🔥 Openvoice: https://github.com/mudler/LocalAI/pull/2334
+- May 2024: 🔥🔥 Decentralized P2P llama.cpp:  https://github.com/mudler/LocalAI/pull/2343 (peer2peer llama.cpp!) 👉 Docs  https://localai.io/features/distribute/
- 🆕 Function calls without grammars and mixed mode: https://github.com/mudler/LocalAI/pull/2328
+- May 2024: 🔥🔥 Openvoice: https://github.com/mudler/LocalAI/pull/2334
- 🔥🔥 Distributed inferencing: https://github.com/mudler/LocalAI/pull/2324
+- May 2024: 🆕 Function calls without grammars and mixed mode: https://github.com/mudler/LocalAI/pull/2328
- Chat, TTS, and Image generation in the WebUI: https://github.com/mudler/LocalAI/pull/2222
+- May 2024: 🔥🔥 Distributed inferencing: https://github.com/mudler/LocalAI/pull/2324
- Reranker API: https://github.com/mudler/LocalAI/pull/2121
+- May 2024: Chat, TTS, and Image generation in the WebUI: https://github.com/mudler/LocalAI/pull/2222
 - April 2024: Reranker API: https://github.com/mudler/LocalAI/pull/2121
 Hot topics (looking for contributors):
@@ -89,6 +90,7 @@ Hot topics (looking for contributors):
 - Assistant API: https://github.com/mudler/LocalAI/issues/1273
 - Moderation endpoint: https://github.com/mudler/LocalAI/issues/999
 - Vulkan: https://github.com/mudler/LocalAI/issues/1647
 - Anthropic API: https://github.com/mudler/LocalAI/issues/1808
 If you want to help and contribute, issues up for grabs: https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3A%22up+for+grabs%22
@@ -134,6 +136,7 @@ Other:
 - Slack bot https://github.com/mudler/LocalAGI/tree/main/examples/slack
 - Shell-Pilot(Interact with LLM using LocalAI models via pure shell scripts on your Linux or MacOS system) https://github.com/reid41/shell-pilot
 - Telegram bot https://github.com/mudler/LocalAI/tree/master/examples/telegram-bot
 - Github Actions: https://github.com/marketplace/actions/start-localai
 - Examples: https://github.com/mudler/LocalAI/tree/master/examples/
--- a/backend/cpp/grpc/Makefile
+++ b/backend/cpp/grpc/Makefile
@@ -46,8 +46,13 @@ endif
 $(INSTALLED_PACKAGES): grpc_build
 $(GRPC_REPO):
-	git clone --depth $(GIT_CLONE_DEPTH) -b $(TAG_LIB_GRPC) $(GIT_REPO_LIB_GRPC) $(GRPC_REPO)/grpc
+	mkdir -p $(GRPC_REPO)/grpc
-	cd $(GRPC_REPO)/grpc && git submodule update --jobs 2 --init --recursive --depth $(GIT_CLONE_DEPTH)
+	cd $(GRPC_REPO)/grpc && \
 	git init && \
 	git remote add origin $(GIT_REPO_LIB_GRPC)  && \
 	git fetch origin && \
 	git checkout $(TAG_LIB_GRPC) && \
 	git submodule update --init --recursive --depth 1 --single-branch
 $(GRPC_BUILD): $(GRPC_REPO)
 	mkdir -p $(GRPC_BUILD)
--- a/backend/cpp/llama/Makefile
+++ b/backend/cpp/llama/Makefile
@@ -1,5 +1,6 @@
 LLAMA_VERSION?=
 LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
 CMAKE_ARGS?=
 BUILD_TYPE?=
@@ -45,11 +46,13 @@ ifeq ($(BUILD_TYPE),sycl_f32)
 endif
 llama.cpp:
-	git clone --recurse-submodules https://github.com/ggerganov/llama.cpp llama.cpp
+	mkdir -p llama.cpp
-	if [ -z "$(LLAMA_VERSION)" ]; then \
+	cd llama.cpp && \
-		exit 1; \
+	git init && \
-	fi
+	git remote add origin $(LLAMA_REPO)  && \
-	cd llama.cpp && git checkout -b build $(LLAMA_VERSION) && git submodule update --init --recursive --depth 1
+	git fetch origin && \
 	git checkout -b build $(LLAMA_VERSION) && \
 	git submodule update --init --recursive --depth 1 --single-branch
 llama.cpp/examples/grpc-server: llama.cpp
 	mkdir -p llama.cpp/examples/grpc-server
--- a/backend/cpp/llama/grpc-server.cpp
+++ b/backend/cpp/llama/grpc-server.cpp
@@ -2108,6 +2108,7 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
    data["grammar"] = predict->grammar();
    data["prompt"] = predict->prompt();
    data["ignore_eos"] = predict->ignoreeos();
    data["embeddings"] = predict->embeddings();
    // for each image in the request, add the image data
    //
@@ -2385,6 +2386,31 @@ public:
        return grpc::Status::OK;
    }
    /// https://github.com/ggerganov/llama.cpp/blob/aa2341298924ac89778252015efcb792f2df1e20/examples/server/server.cpp#L2969
    grpc::Status Embedding(ServerContext* context, const backend::PredictOptions* request, backend::EmbeddingResult* embeddingResult) {
        json data = parse_options(false, request, llama);
        const int task_id = llama.queue_tasks.get_new_id();
        llama.queue_results.add_waiting_task_id(task_id);
        llama.request_completion(task_id, { {"prompt", data["embeddings"]}, { "n_predict", 0}, {"image_data", ""} }, false, true, -1);
        // get the result
        task_result result = llama.queue_results.recv(task_id);
        //std::cout << "Embedding result JSON" << result.result_json.dump() << std::endl;
        llama.queue_results.remove_waiting_task_id(task_id);
        if (!result.error && result.stop) {
            std::vector<float> embeddings = result.result_json.value("embedding", std::vector<float>());
            // loop the vector and set the embeddings results
            for (int i = 0; i < embeddings.size(); i++) {
                embeddingResult->add_embeddings(embeddings[i]);
            }
        }
        else
        {
            return grpc::Status::OK;
        }
        return grpc::Status::OK;
    }
 };
 void RunServer(const std::string& server_address) {
--- a/backend/go/llm/llama/llama.go
+++ b/backend/go/llm/llama/llama.go
@@ -6,9 +6,9 @@ import (
 	"fmt"
 	"path/filepath"
 	pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
 	"github.com/go-skynet/go-llama.cpp"
 	"github.com/mudler/LocalAI/pkg/grpc/base"
 	pb "github.com/mudler/LocalAI/pkg/grpc/proto"
 )
 type LLM struct {
--- a/backend/python/autogptq/requirements-intel.txt
+++ b/backend/python/autogptq/requirements-intel.txt
@@ -2,4 +2,4 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
+setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
--- a/backend/python/autogptq/requirements.txt
+++ b/backend/python/autogptq/requirements.txt
@@ -1,6 +1,6 @@
 accelerate
 auto-gptq==0.7.1
-grpcio==1.64.0
+grpcio==1.65.0
 protobuf
 torch
 certifi
--- a/backend/python/bark/requirements-intel.txt
+++ b/backend/python/bark/requirements-intel.txt
@@ -3,4 +3,4 @@ intel-extension-for-pytorch
 torch
 torchaudio
 optimum[openvino]
-setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
+setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
--- a/backend/python/bark/requirements.txt
+++ b/backend/python/bark/requirements.txt
@@ -1,6 +1,6 @@
 accelerate
 bark==0.1.5
-grpcio==1.64.0
+grpcio==1.65.0
 protobuf
 certifi
 transformers
--- a/backend/python/common/template/requirements.txt
+++ b/backend/python/common/template/requirements.txt
@@ -1,2 +1,2 @@
-grpcio==1.64.0
+grpcio==1.65.0
 protobuf
--- a/backend/python/coqui/requirements-intel.txt
+++ b/backend/python/coqui/requirements-intel.txt
@@ -3,4 +3,4 @@ intel-extension-for-pytorch
 torch
 torchaudio
 optimum[openvino]
-setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
+setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
--- a/backend/python/coqui/requirements.txt
+++ b/backend/python/coqui/requirements.txt
@@ -1,6 +1,6 @@
 accelerate
 TTS==0.22.0
-grpcio==1.64.0
+grpcio==1.65.0
 protobuf
 certifi
 transformers
--- a/backend/python/diffusers/backend.py
+++ b/backend/python/diffusers/backend.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 from concurrent import futures
-
+import traceback
 import argparse
 from collections import defaultdict
 from enum import Enum
@@ -17,35 +17,39 @@ import backend_pb2_grpc
 import grpc
-from diffusers import StableDiffusion3Pipeline, StableDiffusionXLPipeline, StableDiffusionDepth2ImgPipeline, DPMSolverMultistepScheduler, StableDiffusionPipeline, DiffusionPipeline, EulerAncestralDiscreteScheduler
+from diffusers import StableDiffusion3Pipeline, StableDiffusionXLPipeline, StableDiffusionDepth2ImgPipeline, DPMSolverMultistepScheduler, StableDiffusionPipeline, DiffusionPipeline, \
    EulerAncestralDiscreteScheduler
 from diffusers import StableDiffusionImg2ImgPipeline, AutoPipelineForText2Image, ControlNetModel, StableVideoDiffusionPipeline
 from diffusers.pipelines.stable_diffusion import safety_checker
-from diffusers.utils import load_image,export_to_video
+from diffusers.utils import load_image, export_to_video
 from compel import Compel, ReturnedEmbeddingsType
 from transformers import CLIPTextModel
 from safetensors.torch import load_file
 _ONE_DAY_IN_SECONDS = 60 * 60 * 24
-COMPEL=os.environ.get("COMPEL", "0") == "1"
+COMPEL = os.environ.get("COMPEL", "0") == "1"
-XPU=os.environ.get("XPU", "0") == "1"
+XPU = os.environ.get("XPU", "0") == "1"
-CLIPSKIP=os.environ.get("CLIPSKIP", "1") == "1"
+CLIPSKIP = os.environ.get("CLIPSKIP", "1") == "1"
-SAFETENSORS=os.environ.get("SAFETENSORS", "1") == "1"
+SAFETENSORS = os.environ.get("SAFETENSORS", "1") == "1"
-CHUNK_SIZE=os.environ.get("CHUNK_SIZE", "8")
+CHUNK_SIZE = os.environ.get("CHUNK_SIZE", "8")
-FPS=os.environ.get("FPS", "7")
+FPS = os.environ.get("FPS", "7")
-DISABLE_CPU_OFFLOAD=os.environ.get("DISABLE_CPU_OFFLOAD", "0") == "1"
+DISABLE_CPU_OFFLOAD = os.environ.get("DISABLE_CPU_OFFLOAD", "0") == "1"
-FRAMES=os.environ.get("FRAMES", "64")
+FRAMES = os.environ.get("FRAMES", "64")
 if XPU:
    import intel_extension_for_pytorch as ipex
    print(ipex.xpu.get_device_name(0))
 # If MAX_WORKERS are specified in the environment use it, otherwise default to 1
 MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
 # https://github.com/CompVis/stable-diffusion/issues/239#issuecomment-1627615287
-def sc(self, clip_input, images) : return images, [False for i in images]
+def sc(self, clip_input, images): return images, [False for i in images]
 # edit the StableDiffusionSafetyChecker class so that, when called, it just returns the images and an array of True values
 safety_checker.StableDiffusionSafetyChecker.forward = sc
@@ -62,6 +66,8 @@ from diffusers.schedulers import (
    PNDMScheduler,
    UniPCMultistepScheduler,
 )
 # The scheduler list mapping was taken from here: https://github.com/neggles/animatediff-cli/blob/6f336f5f4b5e38e85d7f06f1744ef42d0a45f2a7/src/animatediff/schedulers.py#L39
 # Credits to https://github.com/neggles
 # See https://github.com/huggingface/diffusers/issues/4167 for more details on sched mapping from A1111
@@ -136,10 +142,12 @@ def get_scheduler(name: str, config: dict = {}):
    return sched_class.from_config(config)
 # Implement the BackendServicer class with the service methods
 class BackendServicer(backend_pb2_grpc.BackendServicer):
    def Health(self, request, context):
        return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
    def LoadModel(self, request, context):
        try:
            print(f"Loading model {request.Model}...", file=sys.stderr)
@@ -149,7 +157,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
            if request.F16Memory:
                torchType = torch.float16
-                variant="fp16"
+                variant = "fp16"
            local = False
            modelFile = request.Model
@@ -172,8 +180,8 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
                    modelFile = request.ModelFile
            fromSingleFile = request.Model.startswith("http") or request.Model.startswith("/") or local
-            self.img2vid=False
+            self.img2vid = False
-            self.txt2vid=False
+            self.txt2vid = False
            ## img2img
            if (request.PipelineType == "StableDiffusionImg2ImgPipeline") or (request.IMG2IMG and request.PipelineType == ""):
                if fromSingleFile:
@@ -188,7 +196,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
                                                                             torch_dtype=torchType)
            ## img2vid
            elif request.PipelineType == "StableVideoDiffusionPipeline":
-                self.img2vid=True
+                self.img2vid = True
                self.pipe = StableVideoDiffusionPipeline.from_pretrained(
                    request.Model, torch_dtype=torchType, variant=variant
                )
@@ -211,7 +219,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
                self.pipe = DiffusionPipeline.from_pretrained(request.Model,
                                                              torch_dtype=torchType)
            elif request.PipelineType == "VideoDiffusionPipeline":
-                self.txt2vid=True
+                self.txt2vid = True
                self.pipe = DiffusionPipeline.from_pretrained(request.Model,
                                                              torch_dtype=torchType)
            elif request.PipelineType == "StableDiffusionXLPipeline":
@@ -249,13 +257,12 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
            if COMPEL:
                self.compel = Compel(
-                    tokenizer=[self.pipe.tokenizer, self.pipe.tokenizer_2 ], 
+                    tokenizer=[self.pipe.tokenizer, self.pipe.tokenizer_2],
                    text_encoder=[self.pipe.text_encoder, self.pipe.text_encoder_2],
                    returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
                    requires_pooled=[False, True]
                )
            if request.ControlNet:
                self.controlnet = ControlNetModel.from_pretrained(
                    request.ControlNet, torch_dtype=torchType, variant=variant
@@ -263,13 +270,6 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
                self.pipe.controlnet = self.controlnet
            else:
                self.controlnet = None
            if request.CUDA:
                self.pipe.to('cuda')
                if self.controlnet:
                    self.controlnet.to('cuda')
            if XPU:
                self.pipe = self.pipe.to("xpu")
            # Assume directory from request.ModelFile.
            # Only if request.LoraAdapter it's not an absolute path
            if request.LoraAdapter and request.ModelFile != "" and not os.path.isabs(request.LoraAdapter) and request.LoraAdapter:
@@ -282,10 +282,17 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
            if request.LoraAdapter:
                # Check if its a local file and not a directory ( we load lora differently for a safetensor file )
                if os.path.exists(request.LoraAdapter) and not os.path.isdir(request.LoraAdapter):
-                    self.load_lora_weights(request.LoraAdapter, 1, device, torchType)
+                    # self.load_lora_weights(request.LoraAdapter, 1, device, torchType)
                    self.pipe.load_lora_weights(request.LoraAdapter)
                else:
                    self.pipe.unet.load_attn_procs(request.LoraAdapter)
            if request.CUDA:
                self.pipe.to('cuda')
                if self.controlnet:
                    self.controlnet.to('cuda')
            if XPU:
                self.pipe = self.pipe.to("xpu")
        except Exception as err:
            return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
        # Implement your logic here for the LoadModel service
@@ -372,7 +379,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
            options["image"] = pose_image
        if CLIPSKIP and self.clip_skip != 0:
-            options["clip_skip"]=self.clip_skip
+            options["clip_skip"] = self.clip_skip
        # Get the keys that we will build the args for our pipe for
        keys = options.keys()
@@ -430,6 +437,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
        return backend_pb2.Result(message="Media generated", success=True)
 def serve(address):
    server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
    backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
@@ -453,6 +461,7 @@ def serve(address):
    except KeyboardInterrupt:
        server.stop(0)
 if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Run the gRPC server.")
    parser.add_argument(
--- a/backend/python/diffusers/requirements-intel.txt
+++ b/backend/python/diffusers/requirements-intel.txt
@@ -3,4 +3,4 @@ intel-extension-for-pytorch
 torch
 torchvision
 optimum[openvino]
-setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
+setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
--- a/backend/python/diffusers/requirements.txt
+++ b/backend/python/diffusers/requirements.txt
@@ -1,7 +1,9 @@
 setuptools
 accelerate
 compel
 peft
 diffusers
-grpcio==1.64.0
+grpcio==1.65.0
 opencv-python
 pillow
 protobuf
--- a/backend/python/exllama/requirements.txt
+++ b/backend/python/exllama/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.64.0
+grpcio==1.65.0
 protobuf
 torch
 transformers
--- a/backend/python/exllama2/requirements.txt
+++ b/backend/python/exllama2/requirements.txt
@@ -1,5 +1,5 @@
 accelerate
-grpcio==1.64.0
+grpcio==1.65.0
 protobuf
 certifi
 torch
--- a/backend/python/mamba/requirements-install.txt
+++ b/backend/python/mamba/requirements-install.txt
@@ -4,4 +4,4 @@
 packaging
 setuptools
 wheel
-torch==2.2.0
+torch==2.3.1
--- a/backend/python/mamba/requirements.txt
+++ b/backend/python/mamba/requirements.txt
@@ -1,6 +1,6 @@
-causal-conv1d==1.2.0.post2
+causal-conv1d==1.4.0
-mamba-ssm==1.2.0.post1
+mamba-ssm==2.2.2
-grpcio==1.64.0
+grpcio==1.65.0
 protobuf
 certifi
 transformers
--- a/backend/python/openvoice/requirements-intel.txt
+++ b/backend/python/openvoice/requirements-intel.txt
@@ -2,22 +2,22 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-grpcio==1.64.0
+grpcio==1.64.1
 protobuf
 librosa==0.9.1
-faster-whisper==0.9.0
+faster-whisper==1.0.3
 pydub==0.25.1
 wavmark==0.0.3
-numpy==1.22.0
+numpy==1.26.4
 eng_to_ipa==0.0.2
 inflect==7.0.0
 unidecode==1.3.7
-whisper-timestamped==1.14.2
+whisper-timestamped==1.15.4
 openai
 python-dotenv
 pypinyin==0.50.0
 cn2an==0.5.22
 jieba==0.42.1
-gradio==3.48.0
+gradio==4.38.1
 langid==1.1.6
 git+https://github.com/myshell-ai/MeloTTS.git
--- a/backend/python/openvoice/requirements.txt
+++ b/backend/python/openvoice/requirements.txt
@@ -1,20 +1,20 @@
-grpcio==1.64.0
+grpcio==1.65.0
 protobuf
-librosa==0.9.1
+librosa
-faster-whisper==0.9.0
+faster-whisper
 pydub==0.25.1
 wavmark==0.0.3
-numpy==1.22.0
+numpy
 eng_to_ipa==0.0.2
-inflect==7.0.0
+inflect
-unidecode==1.3.7
+unidecode
-whisper-timestamped==1.14.2
+whisper-timestamped
 openai
 python-dotenv
-pypinyin==0.50.0
+pypinyin
 cn2an==0.5.22
 jieba==0.42.1
-gradio==3.48.0
+gradio
 langid==1.1.6
 git+https://github.com/myshell-ai/MeloTTS.git
 git+https://github.com/myshell-ai/OpenVoice.git
--- a/backend/python/parler-tts/requirements-intel.txt
+++ b/backend/python/parler-tts/requirements-intel.txt
@@ -3,4 +3,4 @@ intel-extension-for-pytorch
 torch
 torchaudio
 optimum[openvino]
-setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
+setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
--- a/backend/python/parler-tts/requirements.txt
+++ b/backend/python/parler-tts/requirements.txt
@@ -1,5 +1,5 @@
 accelerate
-grpcio==1.64.0
+grpcio==1.65.0
 protobuf
 torch
 git+https://github.com/huggingface/parler-tts.git@10016fb0300c0dc31a0fb70e26f3affee7b62f16
--- a/backend/python/petals/requirements-intel.txt
+++ b/backend/python/petals/requirements-intel.txt
@@ -2,4 +2,4 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
+setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
--- a/backend/python/rerankers/requirements-intel.txt
+++ b/backend/python/rerankers/requirements-intel.txt
@@ -2,4 +2,4 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
+setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
--- a/backend/python/rerankers/requirements.txt
+++ b/backend/python/rerankers/requirements.txt
@@ -1,6 +1,6 @@
 accelerate
 rerankers[transformers]
-grpcio==1.64.0
+grpcio==1.65.0
 protobuf
 certifi
 transformers
--- a/backend/python/sentencetransformers/requirements-intel.txt
+++ b/backend/python/sentencetransformers/requirements-intel.txt
@@ -2,4 +2,4 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
+setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
--- a/backend/python/sentencetransformers/requirements.txt
+++ b/backend/python/sentencetransformers/requirements.txt
@@ -1,6 +1,6 @@
 accelerate
-sentence-transformers==2.5.1
+sentence-transformers==3.0.1
 transformers
-grpcio==1.64.0
+grpcio==1.65.0
 protobuf
 certifi
--- a/backend/python/transformers-musicgen/requirements-intel.txt
+++ b/backend/python/transformers-musicgen/requirements-intel.txt
@@ -2,4 +2,4 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
+setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
--- a/backend/python/transformers-musicgen/requirements.txt
+++ b/backend/python/transformers-musicgen/requirements.txt
@@ -1,7 +1,7 @@
 accelerate
 transformers
-grpcio==1.64.0
+grpcio==1.65.0
 protobuf
 torch
-scipy==1.13.0
+scipy==1.14.0
 certifi
--- a/backend/python/transformers/requirements-intel.txt
+++ b/backend/python/transformers/requirements-intel.txt
@@ -2,4 +2,4 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
+setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
--- a/backend/python/transformers/requirements.txt
+++ b/backend/python/transformers/requirements.txt
@@ -1,9 +1,9 @@
 accelerate
 transformers
-grpcio==1.64.0
+grpcio==1.65.0
 protobuf
 torch
 certifi
 intel-extension-for-transformers
 bitsandbytes
-setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
+setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
--- a/backend/python/vall-e-x/requirements-intel.txt
+++ b/backend/python/vall-e-x/requirements-intel.txt
@@ -3,4 +3,4 @@ intel-extension-for-pytorch
 torch
 torchaudio
 optimum[openvino]
-setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
+setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
--- a/backend/python/vall-e-x/requirements.txt
+++ b/backend/python/vall-e-x/requirements.txt
@@ -1,4 +1,4 @@
 accelerate
-grpcio==1.64.0
+grpcio==1.65.0
 protobuf
 certifi
--- a/backend/python/vllm/requirements-intel.txt
+++ b/backend/python/vllm/requirements-intel.txt
@@ -2,4 +2,4 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
+setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
--- a/backend/python/vllm/requirements.txt
+++ b/backend/python/vllm/requirements.txt
@@ -1,6 +1,6 @@
 accelerate
 vllm
-grpcio==1.64.0
+grpcio==1.65.0
 protobuf
 certifi
 transformers
--- a/core/application.go
+++ b/core/application.go
@@ -28,7 +28,6 @@ type Application struct {
 	// LocalAI System Services
 	BackendMonitorService *services.BackendMonitorService
 	GalleryService        *services.GalleryService
 	ListModelsService     *services.ListModelsService
 	LocalAIMetricsService *services.LocalAIMetricsService
 	// OpenAIService         *services.OpenAIService
 }
--- a/core/backend/llm.go
+++ b/core/backend/llm.go
@@ -57,7 +57,7 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im
 		if _, err := os.Stat(modelFile); os.IsNotExist(err) {
 			utils.ResetDownloadTimers()
 			// if we failed to load the model, we try to download it
-			err := gallery.InstallModelFromGallery(o.Galleries, modelFile, loader.ModelPath, gallery.GalleryModel{}, utils.DisplayDownloadFunction)
+			err := gallery.InstallModelFromGallery(o.Galleries, modelFile, loader.ModelPath, gallery.GalleryModel{}, utils.DisplayDownloadFunction, o.EnforcePredownloadScans)
 			if err != nil {
 				return nil, err
 			}
--- a/core/backend/options.go
+++ b/core/backend/options.go
@@ -91,7 +91,7 @@ func gRPCModelOpts(c config.BackendConfig) *pb.ModelOptions {
 		Type:                 c.ModelType,
 		RopeFreqScale:        c.RopeFreqScale,
 		NUMA:                 c.NUMA,
-		Embeddings:           c.Embeddings,
+		Embeddings:           *c.Embeddings,
 		LowVRAM:              *c.LowVRAM,
 		NGPULayers:           int32(*c.NGPULayers),
 		MMap:                 *c.MMap,
--- a/core/cli/federated.go
+++ b/core/cli/federated.go
@@ -2,129 +2,20 @@ package cli
 import (
 	"context"
 	"errors"
 	"fmt"
 	"io"
 	"net"
 	"time"
 	"math/rand/v2"
 	cliContext "github.com/mudler/LocalAI/core/cli/context"
 	"github.com/mudler/LocalAI/core/p2p"
 	"github.com/mudler/edgevpn/pkg/node"
 	"github.com/mudler/edgevpn/pkg/protocol"
 	"github.com/mudler/edgevpn/pkg/types"
 	"github.com/rs/zerolog/log"
 )
 type FederatedCLI struct {
 	Address        string `env:"LOCALAI_ADDRESS,ADDRESS" default:":8080" help:"Bind address for the API server" group:"api"`
 	Peer2PeerToken string `env:"LOCALAI_P2P_TOKEN,P2P_TOKEN,TOKEN" name:"p2ptoken" help:"Token for P2P mode (optional)" group:"p2p"`
 	LoadBalanced   bool   `env:"LOCALAI_LOAD_BALANCED,LOAD_BALANCED" default:"false" help:"Enable load balancing" group:"p2p"`
 }
 func (f *FederatedCLI) Run(ctx *cliContext.Context) error {
-	n, err := p2p.NewNode(f.Peer2PeerToken)
+	fs := p2p.NewFederatedServer(f.Address, p2p.FederatedID, f.Peer2PeerToken, f.LoadBalanced)
 	if err != nil {
 		return fmt.Errorf("creating a new node: %w", err)
 	}
 	err = n.Start(context.Background())
 	if err != nil {
 		return fmt.Errorf("creating a new node: %w", err)
 	}
-	if err := p2p.ServiceDiscoverer(context.Background(), n, f.Peer2PeerToken, p2p.FederatedID, nil); err != nil {
+	return fs.Start(context.Background())
 		return err
 	}
 	return Proxy(context.Background(), n, f.Address, p2p.FederatedID)
 }
 func Proxy(ctx context.Context, node *node.Node, listenAddr, service string) error {
 	log.Info().Msgf("Allocating service '%s' on: %s", service, listenAddr)
 	// Open local port for listening
 	l, err := net.Listen("tcp", listenAddr)
 	if err != nil {
 		log.Error().Err(err).Msg("Error listening")
 		return err
 	}
 	//	ll.Info("Binding local port on", srcaddr)
 	ledger, _ := node.Ledger()
 	// Announce ourselves so nodes accepts our connection
 	ledger.Announce(
 		ctx,
 		10*time.Second,
 		func() {
 			// Retrieve current ID for ip in the blockchain
 			//_, found := ledger.GetKey(protocol.UsersLedgerKey, node.Host().ID().String())
 			// If mismatch, update the blockchain
 			//if !found {
 			updatedMap := map[string]interface{}{}
 			updatedMap[node.Host().ID().String()] = &types.User{
 				PeerID:    node.Host().ID().String(),
 				Timestamp: time.Now().String(),
 			}
 			ledger.Add(protocol.UsersLedgerKey, updatedMap)
 			//	}
 		},
 	)
 	defer l.Close()
 	for {
 		select {
 		case <-ctx.Done():
 			return errors.New("context canceled")
 		default:
 			log.Debug().Msg("New for connection")
 			// Listen for an incoming connection.
 			conn, err := l.Accept()
 			if err != nil {
 				fmt.Println("Error accepting: ", err.Error())
 				continue
 			}
 			// Handle connections in a new goroutine, forwarding to the p2p service
 			go func() {
 				var tunnelAddresses []string
 				for _, v := range p2p.GetAvailableNodes(p2p.FederatedID) {
 					if v.IsOnline() {
 						tunnelAddresses = append(tunnelAddresses, v.TunnelAddress)
 					} else {
 						log.Info().Msgf("Node %s is offline", v.ID)
 					}
 				}
 				// open a TCP stream to one of the tunnels
 				// chosen randomly
 				// TODO: optimize this and track usage
 				tunnelAddr := tunnelAddresses[rand.IntN(len(tunnelAddresses))]
 				tunnelConn, err := net.Dial("tcp", tunnelAddr)
 				if err != nil {
 					log.Error().Err(err).Msg("Error connecting to tunnel")
 					return
 				}
 				log.Info().Msgf("Redirecting %s to %s", conn.LocalAddr().String(), tunnelConn.RemoteAddr().String())
 				closer := make(chan struct{}, 2)
 				go copyStream(closer, tunnelConn, conn)
 				go copyStream(closer, conn, tunnelConn)
 				<-closer
 				tunnelConn.Close()
 				conn.Close()
 				//	ll.Infof("(service %s) Done handling %s", serviceID, l.Addr().String())
 			}()
 		}
 	}
 }
 func copyStream(closer chan struct{}, dst io.Writer, src io.Reader) {
 	defer func() { closer <- struct{}{} }() // connection is closed, send signal to stop proxy
 	io.Copy(dst, src)
 }
--- a/core/cli/models.go
+++ b/core/cli/models.go
@@ -2,6 +2,7 @@ package cli
 import (
 	"encoding/json"
 	"errors"
 	"fmt"
 	cliContext "github.com/mudler/LocalAI/core/cli/context"
@@ -24,6 +25,7 @@ type ModelsList struct {
 }
 type ModelsInstall struct {
 	DisablePredownloadScan bool     `env:"LOCALAI_DISABLE_PREDOWNLOAD_SCAN" help:"If true, disables the best-effort security scanner before downloading any files." group:"hardening" default:"false"`
 	ModelArgs              []string `arg:"" optional:"" name:"models" help:"Model configuration URLs to load"`
 	ModelsCMDFlags `embed:""`
@@ -88,9 +90,15 @@ func (mi *ModelsInstall) Run(ctx *cliContext.Context) error {
 				return err
 			}
 			err = gallery.SafetyScanGalleryModel(model)
 			if err != nil && !errors.Is(err, downloader.ErrNonHuggingFaceFile) {
 				return err
 			}
 			log.Info().Str("model", modelName).Str("license", model.License).Msg("installing model")
 		}
-		err = startup.InstallModels(galleries, "", mi.ModelsPath, progressCallback, modelName)
+
 		err = startup.InstallModels(galleries, "", mi.ModelsPath, !mi.DisablePredownloadScan, progressCallback, modelName)
 		if err != nil {
 			return err
 		}
--- a/core/cli/run.go
+++ b/core/cli/run.go
@@ -50,7 +50,8 @@ type RunCMD struct {
 	UploadLimit            int      `env:"LOCALAI_UPLOAD_LIMIT,UPLOAD_LIMIT" default:"15" help:"Default upload-limit in MB" group:"api"`
 	APIKeys                []string `env:"LOCALAI_API_KEY,API_KEY" help:"List of API Keys to enable API authentication. When this is set, all the requests must be authenticated with one of these API keys" group:"api"`
 	DisableWebUI           bool     `env:"LOCALAI_DISABLE_WEBUI,DISABLE_WEBUI" default:"false" help:"Disable webui" group:"api"`
-	OpaqueErrors         bool     `env:"LOCALAI_OPAQUE_ERRORS" default:"false" help:"If true, all error responses are replaced with blank 500 errors. This is intended only for hardening against information leaks and is normally not recommended." group:"api"`
+	DisablePredownloadScan bool     `env:"LOCALAI_DISABLE_PREDOWNLOAD_SCAN" help:"If true, disables the best-effort security scanner before downloading any files." group:"hardening" default:"false"`
 	OpaqueErrors           bool     `env:"LOCALAI_OPAQUE_ERRORS" default:"false" help:"If true, all error responses are replaced with blank 500 errors. This is intended only for hardening against information leaks and is normally not recommended." group:"hardening"`
 	Peer2Peer              bool     `env:"LOCALAI_P2P,P2P" name:"p2p" default:"false" help:"Enable P2P mode" group:"p2p"`
 	Peer2PeerToken         string   `env:"LOCALAI_P2P_TOKEN,P2P_TOKEN,TOKEN" name:"p2ptoken" help:"Token for P2P mode (optional)" group:"p2p"`
 	ParallelRequests       bool     `env:"LOCALAI_PARALLEL_REQUESTS,PARALLEL_REQUESTS" help:"Enable backends to handle multiple requests in parallel if they support it (e.g.: llama.cpp or vllm)" group:"backends"`
@@ -92,6 +93,7 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
 		config.WithApiKeys(r.APIKeys),
 		config.WithModelsURL(append(r.Models, r.ModelArgs...)...),
 		config.WithOpaqueErrors(r.OpaqueErrors),
 		config.WithEnforcedPredownloadScans(!r.DisablePredownloadScan),
 	}
 	token := ""
@@ -117,7 +119,7 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
 		}
 		log.Info().Msg("Starting P2P server discovery...")
-		if err := p2p.ServiceDiscoverer(context.Background(), node, token, "", func() {
+		if err := p2p.ServiceDiscoverer(context.Background(), node, token, "", func(serviceID string, node p2p.NodeData) {
 			var tunnelAddresses []string
 			for _, v := range p2p.GetAvailableNodes("") {
 				if v.IsOnline() {
--- a/core/cli/util.go
+++ b/core/cli/util.go
@@ -1,16 +1,22 @@
 package cli
 import (
 	"encoding/json"
 	"errors"
 	"fmt"
 	"github.com/rs/zerolog/log"
 	cliContext "github.com/mudler/LocalAI/core/cli/context"
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/gallery"
 	"github.com/mudler/LocalAI/pkg/downloader"
 	gguf "github.com/thxcode/gguf-parser-go"
 )
 type UtilCMD struct {
 	GGUFInfo GGUFInfoCMD `cmd:"" name:"gguf-info" help:"Get information about a GGUF file"`
 	HFScan   HFScanCMD   `cmd:"" name:"hf-scan" help:"Checks installed models for known security issues. WARNING: this is a best-effort feature and may not catch everything!"`
 }
 type GGUFInfoCMD struct {
@@ -18,6 +24,12 @@ type GGUFInfoCMD struct {
 	Header bool     `optional:"" default:"false" name:"header" help:"Show header information"`
 }
 type HFScanCMD struct {
 	ModelsPath string   `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
 	Galleries  string   `env:"LOCALAI_GALLERIES,GALLERIES" help:"JSON list of galleries" group:"models" default:"${galleries}"`
 	ToScan     []string `arg:""`
 }
 func (u *GGUFInfoCMD) Run(ctx *cliContext.Context) error {
 	if u.Args == nil || len(u.Args) == 0 {
 		return fmt.Errorf("no GGUF file provided")
@@ -53,3 +65,37 @@ func (u *GGUFInfoCMD) Run(ctx *cliContext.Context) error {
 	return nil
 }
 func (hfscmd *HFScanCMD) Run(ctx *cliContext.Context) error {
 	log.Info().Msg("LocalAI Security Scanner - This is BEST EFFORT functionality! Currently limited to huggingface models!")
 	if len(hfscmd.ToScan) == 0 {
 		log.Info().Msg("Checking all installed models against galleries")
 		var galleries []config.Gallery
 		if err := json.Unmarshal([]byte(hfscmd.Galleries), &galleries); err != nil {
 			log.Error().Err(err).Msg("unable to load galleries")
 		}
 		err := gallery.SafetyScanGalleryModels(galleries, hfscmd.ModelsPath)
 		if err == nil {
 			log.Info().Msg("No security warnings were detected for your installed models. Please note that this is a BEST EFFORT tool, and all issues may not be detected.")
 		} else {
 			log.Error().Err(err).Msg("! WARNING ! A known-vulnerable model is installed!")
 		}
 		return err
 	} else {
 		var errs error = nil
 		for _, uri := range hfscmd.ToScan {
 			log.Info().Str("uri", uri).Msg("scanning specific uri")
 			scanResults, err := downloader.HuggingFaceScan(uri)
 			if err != nil && !errors.Is(err, downloader.ErrNonHuggingFaceFile) {
 				log.Error().Err(err).Strs("clamAV", scanResults.ClamAVInfectedFiles).Strs("pickles", scanResults.DangerousPickles).Msg("! WARNING ! A known-vulnerable model is included in this repo!")
 				errs = errors.Join(errs, err)
 			}
 		}
 		if errs != nil {
 			return errs
 		}
 		log.Info().Msg("No security warnings were detected for your installed models. Please note that this is a BEST EFFORT tool, and all issues may not be detected.")
 		return nil
 	}
 }
--- a/core/config/application_config.go
+++ b/core/config/application_config.go
@@ -31,6 +31,7 @@ type ApplicationConfig struct {
 	PreloadModelsFromPath               string
 	CORSAllowOrigins                    string
 	ApiKeys                             []string
 	EnforcePredownloadScans             bool
 	OpaqueErrors                        bool
 	P2PToken                            string
@@ -301,6 +302,12 @@ func WithApiKeys(apiKeys []string) AppOption {
 	}
 }
 func WithEnforcedPredownloadScans(enforced bool) AppOption {
 	return func(o *ApplicationConfig) {
 		o.EnforcePredownloadScans = enforced
 	}
 }
 func WithOpaqueErrors(opaque bool) AppOption {
 	return func(o *ApplicationConfig) {
 		o.OpaqueErrors = opaque
--- a/core/config/backend_config.go
+++ b/core/config/backend_config.go
@@ -32,7 +32,7 @@ type BackendConfig struct {
 	Threads        *int              `yaml:"threads"`
 	Debug          *bool             `yaml:"debug"`
 	Roles          map[string]string `yaml:"roles"`
-	Embeddings     bool              `yaml:"embeddings"`
+	Embeddings     *bool             `yaml:"embeddings"`
 	Backend        string            `yaml:"backend"`
 	TemplateConfig TemplateConfig    `yaml:"template"`
@@ -338,6 +338,10 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) {
 		cfg.LowVRAM = &falseV
 	}
 	if cfg.Embeddings == nil {
 		cfg.Embeddings = &falseV
 	}
 	// Value passed by the top level are treated as default (no implicit defaults)
 	// defaults are set by the user
 	if ctx == 0 {
--- a/core/config/guesser.go
+++ b/core/config/guesser.go
@@ -20,6 +20,7 @@ const (
 	ChatML
 	Mistral03
 	Gemma
 	DeepSeek2
 )
 type settingsConfig struct {
@@ -37,6 +38,17 @@ var defaultsSettings map[familyType]settingsConfig = map[familyType]settingsConf
 			Completion:  "{{.Input}}",
 		},
 	},
 	DeepSeek2: {
 		StopWords: []string{"<｜end▁of▁sentence｜>"},
 		TemplateConfig: TemplateConfig{
 			ChatMessage: `{{if eq .RoleName "user" -}}User: {{.Content }}
 {{ end -}}
 {{if eq .RoleName "assistant" -}}Assistant: {{.Content}}<｜end▁of▁sentence｜>{{end}}
 {{if eq .RoleName "system" -}}{{.Content}}
 {{end -}}`,
 			Chat: "{{.Input -}}\nAssistant: ",
 		},
 	},
 	LLaMa3: {
 		StopWords: []string{"<|eot_id|>"},
 		TemplateConfig: TemplateConfig{
@@ -208,8 +220,11 @@ func identifyFamily(f *gguf.GGUFFile) familyType {
 	qwen2 := arch == "qwen2"
 	phi3 := arch == "phi-3"
 	gemma := strings.HasPrefix(f.Model().Name, "gemma")
 	deepseek2 := arch == "deepseek2"
 	switch {
 	case deepseek2:
 		return DeepSeek2
 	case gemma:
 		return Gemma
 	case llama3:
--- a/core/gallery/gallery.go
+++ b/core/gallery/gallery.go
@@ -7,15 +7,16 @@ import (
 	"path/filepath"
 	"strings"
-	"github.com/imdario/mergo"
+	"dario.cat/mergo"
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/pkg/downloader"
 	"github.com/mudler/LocalAI/pkg/utils"
 	"github.com/rs/zerolog/log"
 	"gopkg.in/yaml.v2"
 )
 // Installs a model from the gallery
-func InstallModelFromGallery(galleries []config.Gallery, name string, basePath string, req GalleryModel, downloadStatus func(string, string, string, float64)) error {
+func InstallModelFromGallery(galleries []config.Gallery, name string, basePath string, req GalleryModel, downloadStatus func(string, string, string, float64), enforceScan bool) error {
 	applyModel := func(model *GalleryModel) error {
 		name = strings.ReplaceAll(name, string(os.PathSeparator), "__")
@@ -63,7 +64,7 @@ func InstallModelFromGallery(galleries []config.Gallery, name string, basePath s
 			return err
 		}
-		if err := InstallModel(basePath, installName, &config, model.Overrides, downloadStatus); err != nil {
+		if err := InstallModel(basePath, installName, &config, model.Overrides, downloadStatus, enforceScan); err != nil {
 			return err
 		}
@@ -189,6 +190,12 @@ func DeleteModelFromSystem(basePath string, name string, additionalFiles []strin
 	galleryFile := filepath.Join(basePath, galleryFileName(name))
 	for _, f := range []string{configFile, galleryFile} {
 		if err := utils.VerifyPath(f, basePath); err != nil {
 			return fmt.Errorf("failed to verify path %s: %w", f, err)
 		}
 	}
 	var err error
 	// Delete all the files associated to the model
 	// read the model config
@@ -228,3 +235,29 @@ func DeleteModelFromSystem(basePath string, name string, additionalFiles []strin
 	return err
 }
 // This is ***NEVER*** going to be perfect or finished.
 // This is a BEST EFFORT function to surface known-vulnerable models to users.
 func SafetyScanGalleryModels(galleries []config.Gallery, basePath string) error {
 	galleryModels, err := AvailableGalleryModels(galleries, basePath)
 	if err != nil {
 		return err
 	}
 	for _, gM := range galleryModels {
 		if gM.Installed {
 			err = errors.Join(err, SafetyScanGalleryModel(gM))
 		}
 	}
 	return err
 }
 func SafetyScanGalleryModel(galleryModel *GalleryModel) error {
 	for _, file := range galleryModel.AdditionalFiles {
 		scanResults, err := downloader.HuggingFaceScan(file.URI)
 		if err != nil && !errors.Is(err, downloader.ErrNonHuggingFaceFile) {
 			log.Error().Str("model", galleryModel.Name).Strs("clamAV", scanResults.ClamAVInfectedFiles).Strs("pickles", scanResults.DangerousPickles).Msg("Contains unsafe file(s)!")
 			return err
 		}
 	}
 	return nil
 }
--- a/core/gallery/models.go
+++ b/core/gallery/models.go
@@ -1,11 +1,12 @@
 package gallery
 import (
 	"errors"
 	"fmt"
 	"os"
 	"path/filepath"
-	"github.com/imdario/mergo"
+	"dario.cat/mergo"
 	lconfig "github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/pkg/downloader"
 	"github.com/mudler/LocalAI/pkg/utils"
@@ -94,7 +95,7 @@ func ReadConfigFile(filePath string) (*Config, error) {
 	return &config, nil
 }
-func InstallModel(basePath, nameOverride string, config *Config, configOverrides map[string]interface{}, downloadStatus func(string, string, string, float64)) error {
+func InstallModel(basePath, nameOverride string, config *Config, configOverrides map[string]interface{}, downloadStatus func(string, string, string, float64), enforceScan bool) error {
 	// Create base path if it doesn't exist
 	err := os.MkdirAll(basePath, 0750)
 	if err != nil {
@@ -112,9 +113,18 @@ func InstallModel(basePath, nameOverride string, config *Config, configOverrides
 		if err := utils.VerifyPath(file.Filename, basePath); err != nil {
 			return err
 		}
 		// Create file path
 		filePath := filepath.Join(basePath, file.Filename)
 		if enforceScan {
 			scanResults, err := downloader.HuggingFaceScan(file.URI)
 			if err != nil && !errors.Is(err, downloader.ErrNonHuggingFaceFile) {
 				log.Error().Str("model", config.Name).Strs("clamAV", scanResults.ClamAVInfectedFiles).Strs("pickles", scanResults.DangerousPickles).Msg("Contains unsafe file(s)!")
 				return err
 			}
 		}
 		if err := downloader.DownloadFile(file.URI, filePath, file.SHA256, i, len(config.Files), downloadStatus); err != nil {
 			return err
 		}
--- a/core/gallery/models_test.go
+++ b/core/gallery/models_test.go
@@ -21,7 +21,7 @@ var _ = Describe("Model test", func() {
 			defer os.RemoveAll(tempdir)
 			c, err := ReadConfigFile(filepath.Join(os.Getenv("FIXTURES"), "gallery_simple.yaml"))
 			Expect(err).ToNot(HaveOccurred())
-			err = InstallModel(tempdir, "", c, map[string]interface{}{}, func(string, string, string, float64) {})
+			err = InstallModel(tempdir, "", c, map[string]interface{}{}, func(string, string, string, float64) {}, true)
 			Expect(err).ToNot(HaveOccurred())
 			for _, f := range []string{"cerebras", "cerebras-completion.tmpl", "cerebras-chat.tmpl", "cerebras.yaml"} {
@@ -69,7 +69,7 @@ var _ = Describe("Model test", func() {
 			Expect(models[0].URL).To(Equal("https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml"))
 			Expect(models[0].Installed).To(BeFalse())
-			err = InstallModelFromGallery(galleries, "test@bert", tempdir, GalleryModel{}, func(s1, s2, s3 string, f float64) {})
+			err = InstallModelFromGallery(galleries, "test@bert", tempdir, GalleryModel{}, func(s1, s2, s3 string, f float64) {}, true)
 			Expect(err).ToNot(HaveOccurred())
 			dat, err := os.ReadFile(filepath.Join(tempdir, "bert.yaml"))
@@ -106,7 +106,7 @@ var _ = Describe("Model test", func() {
 			c, err := ReadConfigFile(filepath.Join(os.Getenv("FIXTURES"), "gallery_simple.yaml"))
 			Expect(err).ToNot(HaveOccurred())
-			err = InstallModel(tempdir, "foo", c, map[string]interface{}{}, func(string, string, string, float64) {})
+			err = InstallModel(tempdir, "foo", c, map[string]interface{}{}, func(string, string, string, float64) {}, true)
 			Expect(err).ToNot(HaveOccurred())
 			for _, f := range []string{"cerebras", "cerebras-completion.tmpl", "cerebras-chat.tmpl", "foo.yaml"} {
@@ -122,7 +122,7 @@ var _ = Describe("Model test", func() {
 			c, err := ReadConfigFile(filepath.Join(os.Getenv("FIXTURES"), "gallery_simple.yaml"))
 			Expect(err).ToNot(HaveOccurred())
-			err = InstallModel(tempdir, "foo", c, map[string]interface{}{"backend": "foo"}, func(string, string, string, float64) {})
+			err = InstallModel(tempdir, "foo", c, map[string]interface{}{"backend": "foo"}, func(string, string, string, float64) {}, true)
 			Expect(err).ToNot(HaveOccurred())
 			for _, f := range []string{"cerebras", "cerebras-completion.tmpl", "cerebras-chat.tmpl", "foo.yaml"} {
@@ -148,7 +148,7 @@ var _ = Describe("Model test", func() {
 			c, err := ReadConfigFile(filepath.Join(os.Getenv("FIXTURES"), "gallery_simple.yaml"))
 			Expect(err).ToNot(HaveOccurred())
-			err = InstallModel(tempdir, "../../../foo", c, map[string]interface{}{}, func(string, string, string, float64) {})
+			err = InstallModel(tempdir, "../../../foo", c, map[string]interface{}{}, func(string, string, string, float64) {}, true)
 			Expect(err).To(HaveOccurred())
 		})
 	})
--- a/core/http/ctx/fiber.go
+++ b/core/http/ctx/fiber.go
@@ -5,6 +5,8 @@ import (
 	"strings"
 	"github.com/gofiber/fiber/v2"
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/services"
 	"github.com/mudler/LocalAI/pkg/model"
 	"github.com/rs/zerolog/log"
 )
@@ -13,7 +15,7 @@ import (
 // If no model is specified, it will take the first available
 // Takes a model string as input which should be the one received from the user request.
 // It returns the model name resolved from the context and an error if any.
-func ModelFromContext(ctx *fiber.Ctx, loader *model.ModelLoader, modelInput string, firstModel bool) (string, error) {
+func ModelFromContext(ctx *fiber.Ctx, cl *config.BackendConfigLoader, loader *model.ModelLoader, modelInput string, firstModel bool) (string, error) {
 	if ctx.Params("model") != "" {
 		modelInput = ctx.Params("model")
 	}
@@ -24,7 +26,7 @@ func ModelFromContext(ctx *fiber.Ctx, loader *model.ModelLoader, modelInput stri
 	// If no model was specified, take the first available
 	if modelInput == "" && !bearerExists && firstModel {
-		models, _ := loader.ListModels()
+		models, _ := services.ListModels(cl, loader, "", true)
 		if len(models) > 0 {
 			modelInput = models[0]
 			log.Debug().Msgf("No model specified, using: %s", modelInput)
--- a/core/http/endpoints/elevenlabs/tts.go
+++ b/core/http/endpoints/elevenlabs/tts.go
@@ -28,7 +28,7 @@ func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfi
 			return err
 		}
-		modelFile, err := fiberContext.ModelFromContext(c, ml, input.ModelID, false)
+		modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.ModelID, false)
 		if err != nil {
 			modelFile = input.ModelID
 			log.Warn().Msgf("Model not found in context: %s", input.ModelID)
--- a/core/http/endpoints/jina/rerank.go
+++ b/core/http/endpoints/jina/rerank.go
@@ -12,6 +12,11 @@ import (
 	"github.com/rs/zerolog/log"
 )
 // JINARerankEndpoint acts like the Jina reranker endpoint (https://jina.ai/reranker/)
 // @Summary Reranks a list of phrases by relevance to a given text query.
 // @Param request body schema.JINARerankRequest true "query params"
 // @Success 200 {object} schema.JINARerankResponse "Response"
 // @Router /v1/rerank [post]
 func JINARerankEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		req := new(schema.JINARerankRequest)
@@ -28,7 +33,7 @@ func JINARerankEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a
 			return err
 		}
-		modelFile, err := fiberContext.ModelFromContext(c, ml, input.Model, false)
+		modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.Model, false)
 		if err != nil {
 			modelFile = input.Model
 			log.Warn().Msgf("Model not found in context: %s", input.Model)
--- a/core/http/endpoints/localai/backend_monitor.go
+++ b/core/http/endpoints/localai/backend_monitor.go
@@ -6,6 +6,11 @@ import (
 	"github.com/mudler/LocalAI/core/services"
 )
 // BackendMonitorEndpoint returns the status of the specified backend
 // @Summary Backend monitor endpoint
 // @Param request body schema.BackendMonitorRequest true "Backend statistics request"
 // @Success 200 {object} proto.StatusResponse "Response"
 // @Router /backend/monitor [get]
 func BackendMonitorEndpoint(bm *services.BackendMonitorService) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
@@ -23,6 +28,10 @@ func BackendMonitorEndpoint(bm *services.BackendMonitorService) func(c *fiber.Ct
 	}
 }
 // BackendMonitorEndpoint shuts down the specified backend
 // @Summary Backend monitor endpoint
 // @Param request body schema.BackendMonitorRequest true "Backend statistics request"
 // @Router /backend/shutdown [post]
 func BackendShutdownEndpoint(bm *services.BackendMonitorService) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		input := new(schema.BackendMonitorRequest)
--- a/core/http/endpoints/localai/gallery.go
+++ b/core/http/endpoints/localai/gallery.go
@@ -9,6 +9,7 @@ import (
 	"github.com/google/uuid"
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/gallery"
 	"github.com/mudler/LocalAI/core/schema"
 	"github.com/mudler/LocalAI/core/services"
 	"github.com/rs/zerolog/log"
 )
@@ -33,6 +34,10 @@ func CreateModelGalleryEndpointService(galleries []config.Gallery, modelPath str
 	}
 }
 // GetOpStatusEndpoint returns the job status
 // @Summary Returns the job status
 // @Success 200 {object} gallery.GalleryOpStatus "Response"
 // @Router /models/jobs/{uuid} [get]
 func (mgs *ModelGalleryEndpointService) GetOpStatusEndpoint() func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		status := mgs.galleryApplier.GetStatus(c.Params("uuid"))
@@ -43,12 +48,21 @@ func (mgs *ModelGalleryEndpointService) GetOpStatusEndpoint() func(c *fiber.Ctx)
 	}
 }
 // GetAllStatusEndpoint returns all the jobs status progress
 // @Summary Returns all the jobs status progress
 // @Success 200 {object} map[string]gallery.GalleryOpStatus "Response"
 // @Router /models/jobs [get]
 func (mgs *ModelGalleryEndpointService) GetAllStatusEndpoint() func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		return c.JSON(mgs.galleryApplier.GetAllStatus())
 	}
 }
 // ApplyModelGalleryEndpoint installs a new model to a LocalAI instance from the model gallery
 // @Summary Install models to LocalAI.
 // @Param request body GalleryModel true "query params"
 // @Success 200 {object} schema.GalleryResponse "Response"
 // @Router /models/apply [post]
 func (mgs *ModelGalleryEndpointService) ApplyModelGalleryEndpoint() func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		input := new(GalleryModel)
@@ -68,13 +82,15 @@ func (mgs *ModelGalleryEndpointService) ApplyModelGalleryEndpoint() func(c *fibe
 			Galleries:        mgs.galleries,
 			ConfigURL:        input.ConfigURL,
 		}
-		return c.JSON(struct {
+		return c.JSON(schema.GalleryResponse{ID: uuid.String(), StatusURL: c.BaseURL() + "/models/jobs/" + uuid.String()})
 			ID        string `json:"uuid"`
 			StatusURL string `json:"status"`
 		}{ID: uuid.String(), StatusURL: c.BaseURL() + "/models/jobs/" + uuid.String()})
 	}
 }
 // DeleteModelGalleryEndpoint lets delete models from a LocalAI instance
 // @Summary delete models to LocalAI.
 // @Param name	path string	true	"Model name"
 // @Success 200 {object} schema.GalleryResponse "Response"
 // @Router /models/delete/{name} [post]
 func (mgs *ModelGalleryEndpointService) DeleteModelGalleryEndpoint() func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		modelName := c.Params("name")
@@ -89,13 +105,14 @@ func (mgs *ModelGalleryEndpointService) DeleteModelGalleryEndpoint() func(c *fib
 			return err
 		}
-		return c.JSON(struct {
+		return c.JSON(schema.GalleryResponse{ID: uuid.String(), StatusURL: c.BaseURL() + "/models/jobs/" + uuid.String()})
 			ID        string `json:"uuid"`
 			StatusURL string `json:"status"`
 		}{ID: uuid.String(), StatusURL: c.BaseURL() + "/models/jobs/" + uuid.String()})
 	}
 }
 // ListModelFromGalleryEndpoint list the available models for installation from the active galleries
 // @Summary List installable models.
 // @Success 200 {object} []gallery.GalleryModel "Response"
 // @Router /models/available [get]
 func (mgs *ModelGalleryEndpointService) ListModelFromGalleryEndpoint() func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		log.Debug().Msgf("Listing models from galleries: %+v", mgs.galleries)
@@ -116,6 +133,10 @@ func (mgs *ModelGalleryEndpointService) ListModelFromGalleryEndpoint() func(c *f
 	}
 }
 // ListModelGalleriesEndpoint list the available galleries configured in LocalAI
 // @Summary List all Galleries
 // @Success 200 {object} []config.Gallery "Response"
 // @Router /models/galleries [get]
 // NOTE: This is different (and much simpler!) than above! This JUST lists the model galleries that have been loaded, not their contents!
 func (mgs *ModelGalleryEndpointService) ListModelGalleriesEndpoint() func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
@@ -128,6 +149,11 @@ func (mgs *ModelGalleryEndpointService) ListModelGalleriesEndpoint() func(c *fib
 	}
 }
 // AddModelGalleryEndpoint adds a gallery in LocalAI
 // @Summary Adds a gallery in LocalAI
 // @Param request body config.Gallery true "Gallery details"
 // @Success 200 {object} []config.Gallery "Response"
 // @Router /models/galleries [post]
 func (mgs *ModelGalleryEndpointService) AddModelGalleryEndpoint() func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		input := new(config.Gallery)
@@ -150,6 +176,11 @@ func (mgs *ModelGalleryEndpointService) AddModelGalleryEndpoint() func(c *fiber.
 	}
 }
 // RemoveModelGalleryEndpoint remove a gallery in LocalAI
 // @Summary removes a gallery from LocalAI
 // @Param request body config.Gallery true "Gallery details"
 // @Success 200 {object} []config.Gallery "Response"
 // @Router /models/galleries [delete]
 func (mgs *ModelGalleryEndpointService) RemoveModelGalleryEndpoint() func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		input := new(config.Gallery)
@@ -165,6 +196,10 @@ func (mgs *ModelGalleryEndpointService) RemoveModelGalleryEndpoint() func(c *fib
 		mgs.galleries = slices.DeleteFunc(mgs.galleries, func(gallery config.Gallery) bool {
 			return gallery.Name == input.Name
 		})
-		return c.Send(nil)
+		dat, err := json.Marshal(mgs.galleries)
 		if err != nil {
 			return err
 		}
 		return c.Send(dat)
 	}
 }
--- a/core/http/endpoints/localai/metrics.go
+++ b/core/http/endpoints/localai/metrics.go
@@ -9,8 +9,11 @@ import (
 	"github.com/prometheus/client_golang/prometheus/promhttp"
 )
 // LocalAIMetricsEndpoint returns the metrics endpoint for LocalAI
 // @Summary Prometheus metrics endpoint
 // @Param request body config.Gallery true "Gallery details"
 // @Router /metrics [get]
 func LocalAIMetricsEndpoint() fiber.Handler {
 	return adaptor.HTTPHandler(promhttp.Handler())
 }
--- a/core/http/endpoints/localai/p2p.go
+++ b/core/http/endpoints/localai/p2p.go
@@ -0,0 +1,28 @@
 package localai
 import (
 	"github.com/gofiber/fiber/v2"
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/p2p"
 	"github.com/mudler/LocalAI/core/schema"
 )
 // ShowP2PNodes returns the P2P Nodes
 // @Summary Returns available P2P nodes
 // @Success 200 {object} []schema.P2PNodesResponse "Response"
 // @Router /api/p2p [get]
 func ShowP2PNodes(c *fiber.Ctx) error {
 	// Render index
 	return c.JSON(schema.P2PNodesResponse{
 		Nodes:          p2p.GetAvailableNodes(""),
 		FederatedNodes: p2p.GetAvailableNodes(p2p.FederatedID),
 	})
 }
 // ShowP2PToken returns the P2P token
 // @Summary Show the P2P token
 // @Success 200 {string} string	 "Response"
 // @Router /api/p2p/token [get]
 func ShowP2PToken(appConfig *config.ApplicationConfig) func(*fiber.Ctx) error {
 	return func(c *fiber.Ctx) error { return c.Send([]byte(appConfig.P2PToken)) }
 }
--- a/core/http/endpoints/localai/tts.go
+++ b/core/http/endpoints/localai/tts.go
@@ -29,7 +29,7 @@ func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfi
 			return err
 		}
-		modelFile, err := fiberContext.ModelFromContext(c, ml, input.Model, false)
+		modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.Model, false)
 		if err != nil {
 			modelFile = input.Model
 			log.Warn().Msgf("Model not found in context: %s", input.Model)
--- a/core/http/endpoints/localai/welcome.go
+++ b/core/http/endpoints/localai/welcome.go
@@ -5,6 +5,7 @@ import (
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/gallery"
 	"github.com/mudler/LocalAI/core/p2p"
 	"github.com/mudler/LocalAI/core/services"
 	"github.com/mudler/LocalAI/internal"
 	"github.com/mudler/LocalAI/pkg/model"
 )
@@ -12,7 +13,7 @@ import (
 func WelcomeEndpoint(appConfig *config.ApplicationConfig,
 	cl *config.BackendConfigLoader, ml *model.ModelLoader, modelStatus func() (map[string]string, map[string]string)) func(*fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
-		models, _ := ml.ListModels()
+		models, _ := services.ListModels(cl, ml, "", true)
 		backendConfigs := cl.GetAllBackendConfigs()
 		galleryConfigs := map[string]*gallery.Config{}
@@ -28,10 +29,18 @@ func WelcomeEndpoint(appConfig *config.ApplicationConfig,
 		// Get model statuses to display in the UI the operation in progress
 		processingModels, taskTypes := modelStatus()
 		modelsWithoutConfig := []string{}
 		for _, m := range models {
 			if _, ok := galleryConfigs[m]; !ok {
 				modelsWithoutConfig = append(modelsWithoutConfig, m)
 			}
 		}
 		summary := fiber.Map{
 			"Title":             "LocalAI API - " + internal.PrintableVersion(),
 			"Version":           internal.PrintableVersion(),
-			"Models":            models,
+			"Models":            modelsWithoutConfig,
 			"ModelsConfig":      backendConfigs,
 			"GalleryConfig":     galleryConfigs,
 			"IsP2PEnabled":      p2p.IsP2PEnabled(),
--- a/core/http/endpoints/openai/assistant.go
+++ b/core/http/endpoints/openai/assistant.go
@@ -11,6 +11,8 @@ import (
 	"github.com/gofiber/fiber/v2"
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/schema"
 	"github.com/mudler/LocalAI/core/services"
 	model "github.com/mudler/LocalAI/pkg/model"
 	"github.com/mudler/LocalAI/pkg/utils"
 	"github.com/rs/zerolog/log"
@@ -79,7 +81,7 @@ func CreateAssistantEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoad
 			return c.Status(fiber.StatusBadRequest).JSON(fiber.Map{"error": "Cannot parse JSON"})
 		}
-		if !modelExists(ml, request.Model) {
+		if !modelExists(cl, ml, request.Model) {
 			log.Warn().Msgf("Model: %s was not found in list of models.", request.Model)
 			return c.Status(fiber.StatusBadRequest).SendString("Model " + request.Model + " not found")
 		}
@@ -124,6 +126,14 @@ func generateRandomID() int64 {
 	return currentId
 }
 // ListAssistantsEndpoint is the OpenAI Assistant API endpoint to list assistents https://platform.openai.com/docs/api-reference/assistants/listAssistants
 // @Summary List available assistents
 // @Param limit query int false "Limit the number of assistants returned"
 // @Param order query string false "Order of assistants returned"
 // @Param after query string false "Return assistants created after the given ID"
 // @Param before query string false "Return assistants created before the given ID"
 // @Success 200 {object} []Assistant "Response"
 // @Router /v1/assistants [get]
 func ListAssistantsEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		// Because we're altering the existing assistants list we should just duplicate it for now.
@@ -213,9 +223,9 @@ func filterAssistantsAfterID(assistants []Assistant, id string) []Assistant {
 	return filteredAssistants
 }
-func modelExists(ml *model.ModelLoader, modelName string) (found bool) {
+func modelExists(cl *config.BackendConfigLoader, ml *model.ModelLoader, modelName string) (found bool) {
 	found = false
-	models, err := ml.ListModels()
+	models, err := services.ListModels(cl, ml, "", true)
 	if err != nil {
 		return
 	}
@@ -229,13 +239,11 @@ func modelExists(ml *model.ModelLoader, modelName string) (found bool) {
 	return
 }
 // DeleteAssistantEndpoint is the OpenAI Assistant API endpoint to delete assistents https://platform.openai.com/docs/api-reference/assistants/deleteAssistant
 // @Summary Delete assistents
 // @Success 200 {object} schema.DeleteAssistantResponse "Response"
 // @Router /v1/assistants/{assistant_id} [delete]
 func DeleteAssistantEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
 	type DeleteAssistantResponse struct {
 		ID      string `json:"id"`
 		Object  string `json:"object"`
 		Deleted bool   `json:"deleted"`
 	}
 	return func(c *fiber.Ctx) error {
 		assistantID := c.Params("assistant_id")
 		if assistantID == "" {
@@ -246,7 +254,7 @@ func DeleteAssistantEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoad
 			if assistant.ID == assistantID {
 				Assistants = append(Assistants[:i], Assistants[i+1:]...)
 				utils.SaveConfig(appConfig.ConfigsDir, AssistantsConfigFile, Assistants)
-				return c.Status(fiber.StatusOK).JSON(DeleteAssistantResponse{
+				return c.Status(fiber.StatusOK).JSON(schema.DeleteAssistantResponse{
 					ID:      assistantID,
 					Object:  "assistant.deleted",
 					Deleted: true,
@@ -255,7 +263,7 @@ func DeleteAssistantEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoad
 		}
 		log.Warn().Msgf("Unable to find assistant %s for deletion", assistantID)
-		return c.Status(fiber.StatusNotFound).JSON(DeleteAssistantResponse{
+		return c.Status(fiber.StatusNotFound).JSON(schema.DeleteAssistantResponse{
 			ID:      assistantID,
 			Object:  "assistant.deleted",
 			Deleted: false,
@@ -263,6 +271,10 @@ func DeleteAssistantEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoad
 	}
 }
 // GetAssistantEndpoint is the OpenAI Assistant API endpoint to get assistents https://platform.openai.com/docs/api-reference/assistants/getAssistant
 // @Summary Get assistent data
 // @Success 200 {object} Assistant "Response"
 // @Router /v1/assistants/{assistant_id} [get]
 func GetAssistantEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		assistantID := c.Params("assistant_id")
@@ -292,19 +304,9 @@ var (
 	AssistantsFileConfigFile = "assistantsFile.json"
 )
 type AssistantFileRequest struct {
 	FileID string `json:"file_id"`
 }
 type DeleteAssistantFileResponse struct {
 	ID      string `json:"id"`
 	Object  string `json:"object"`
 	Deleted bool   `json:"deleted"`
 }
 func CreateAssistantFileEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
-		request := new(AssistantFileRequest)
+		request := new(schema.AssistantFileRequest)
 		if err := c.BodyParser(request); err != nil {
 			return c.Status(fiber.StatusBadRequest).JSON(fiber.Map{"error": "Cannot parse JSON"})
 		}
@@ -345,7 +347,7 @@ func CreateAssistantFileEndpoint(cl *config.BackendConfigLoader, ml *model.Model
 func ListAssistantFilesEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
 	type ListAssistantFiles struct {
-		Data   []File
+		Data   []schema.File
 		Object string
 	}
@@ -463,7 +465,7 @@ func DeleteAssistantFileEndpoint(cl *config.BackendConfigLoader, ml *model.Model
 							// Remove the file from the assistantFiles slice
 							AssistantFiles = append(AssistantFiles[:i], AssistantFiles[i+1:]...)
 							utils.SaveConfig(appConfig.ConfigsDir, AssistantsFileConfigFile, AssistantFiles)
-							return c.Status(fiber.StatusOK).JSON(DeleteAssistantFileResponse{
+							return c.Status(fiber.StatusOK).JSON(schema.DeleteAssistantFileResponse{
 								ID:      fileId,
 								Object:  "assistant.file.deleted",
 								Deleted: true,
@@ -479,7 +481,7 @@ func DeleteAssistantFileEndpoint(cl *config.BackendConfigLoader, ml *model.Model
 						AssistantFiles = append(AssistantFiles[:i], AssistantFiles[i+1:]...)
 						utils.SaveConfig(appConfig.ConfigsDir, AssistantsFileConfigFile, AssistantFiles)
-						return c.Status(fiber.StatusNotFound).JSON(DeleteAssistantFileResponse{
+						return c.Status(fiber.StatusNotFound).JSON(schema.DeleteAssistantFileResponse{
 							ID:      fileId,
 							Object:  "assistant.file.deleted",
 							Deleted: true,
@@ -490,7 +492,7 @@ func DeleteAssistantFileEndpoint(cl *config.BackendConfigLoader, ml *model.Model
 		}
 		log.Warn().Msgf("Unable to find assistant: %s", assistantID)
-		return c.Status(fiber.StatusNotFound).JSON(DeleteAssistantFileResponse{
+		return c.Status(fiber.StatusNotFound).JSON(schema.DeleteAssistantFileResponse{
 			ID:      fileId,
 			Object:  "assistant.file.deleted",
 			Deleted: false,
--- a/core/http/endpoints/openai/assistant_test.go
+++ b/core/http/endpoints/openai/assistant_test.go
@@ -14,6 +14,7 @@ import (
 	"github.com/gofiber/fiber/v2"
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/schema"
 	"github.com/mudler/LocalAI/pkg/model"
 	"github.com/stretchr/testify/assert"
 )
@@ -26,7 +27,7 @@ type MockLoader struct {
 func tearDown() func() {
 	return func() {
-		UploadedFiles = []File{}
+		UploadedFiles = []schema.File{}
 		Assistants = []Assistant{}
 		AssistantFiles = []AssistantFile{}
 		_ = os.Remove(filepath.Join(configsDir, AssistantsConfigFile))
@@ -294,7 +295,7 @@ func TestAssistantEndpoints(t *testing.T) {
 		file, assistant, err := createFileAndAssistant(t, app, appConfig)
 		assert.NoError(t, err)
-		afr := AssistantFileRequest{FileID: file.ID}
+		afr := schema.AssistantFileRequest{FileID: file.ID}
 		af, _, err := createAssistantFile(app, afr, assistant.ID)
 		assert.NoError(t, err)
@@ -305,7 +306,7 @@ func TestAssistantEndpoints(t *testing.T) {
 		file, assistant, err := createFileAndAssistant(t, app, appConfig)
 		assert.NoError(t, err)
-		afr := AssistantFileRequest{FileID: file.ID}
+		afr := schema.AssistantFileRequest{FileID: file.ID}
 		af, _, err := createAssistantFile(app, afr, assistant.ID)
 		assert.NoError(t, err)
@@ -316,7 +317,7 @@ func TestAssistantEndpoints(t *testing.T) {
 		file, assistant, err := createFileAndAssistant(t, app, appConfig)
 		assert.NoError(t, err)
-		afr := AssistantFileRequest{FileID: file.ID}
+		afr := schema.AssistantFileRequest{FileID: file.ID}
 		af, _, err := createAssistantFile(app, afr, assistant.ID)
 		assert.NoError(t, err)
 		t.Cleanup(cleanupAssistantFile(t, app, af.ID, af.AssistantID))
@@ -338,7 +339,7 @@ func TestAssistantEndpoints(t *testing.T) {
 		file, assistant, err := createFileAndAssistant(t, app, appConfig)
 		assert.NoError(t, err)
-		afr := AssistantFileRequest{FileID: file.ID}
+		afr := schema.AssistantFileRequest{FileID: file.ID}
 		af, _, err := createAssistantFile(app, afr, assistant.ID)
 		assert.NoError(t, err)
@@ -349,7 +350,7 @@ func TestAssistantEndpoints(t *testing.T) {
 }
-func createFileAndAssistant(t *testing.T, app *fiber.App, o *config.ApplicationConfig) (File, Assistant, error) {
+func createFileAndAssistant(t *testing.T, app *fiber.App, o *config.ApplicationConfig) (schema.File, Assistant, error) {
 	ar := &AssistantRequest{
 		Model:        "ggml-gpt4all-j",
 		Name:         "3.5-turbo",
@@ -362,7 +363,7 @@ func createFileAndAssistant(t *testing.T, app *fiber.App, o *config.ApplicationC
 	assistant, _, err := createAssistant(app, *ar)
 	if err != nil {
-		return File{}, Assistant{}, err
+		return schema.File{}, Assistant{}, err
 	}
 	t.Cleanup(cleanupAllAssistants(t, app, []string{assistant.ID}))
@@ -374,7 +375,7 @@ func createFileAndAssistant(t *testing.T, app *fiber.App, o *config.ApplicationC
 	return file, assistant, nil
 }
-func createAssistantFile(app *fiber.App, afr AssistantFileRequest, assistantId string) (AssistantFile, *http.Response, error) {
+func createAssistantFile(app *fiber.App, afr schema.AssistantFileRequest, assistantId string) (AssistantFile, *http.Response, error) {
 	afrJson, err := json.Marshal(afr)
 	if err != nil {
 		return AssistantFile{}, nil, err
@@ -451,7 +452,7 @@ func cleanupAssistantFile(t *testing.T, app *fiber.App, fileId, assistantId stri
 		resp, err := app.Test(request)
 		assert.NoError(t, err)
-		var dafr DeleteAssistantFileResponse
+		var dafr schema.DeleteAssistantFileResponse
 		err = json.NewDecoder(resp.Body).Decode(&dafr)
 		assert.NoError(t, err)
 		assert.True(t, dafr.Deleted)
--- a/core/http/endpoints/openai/chat.go
+++ b/core/http/endpoints/openai/chat.go
@@ -159,7 +159,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
 	}
 	return func(c *fiber.Ctx) error {
-		modelFile, input, err := readRequest(c, ml, startupOptions, true)
+		modelFile, input, err := readRequest(c, cl, ml, startupOptions, true)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
@@ -225,18 +225,10 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
 			}
 			// Update input grammar
-			// Handle if we should return "name" instead of "functions"
+			jsStruct := funcs.ToJSONStructure(config.FunctionsConfig.FunctionNameKey, config.FunctionsConfig.FunctionNameKey)
 			if config.FunctionsConfig.FunctionName {
 				jsStruct := funcs.ToJSONNameStructure()
 			config.Grammar = jsStruct.Grammar(config.FunctionsConfig.GrammarConfig.Options()...)
 			} else {
 				jsStruct := funcs.ToJSONFunctionStructure()
 				config.Grammar = jsStruct.Grammar(config.FunctionsConfig.GrammarConfig.Options()...)
 			}
 		case input.JSONFunctionGrammarObject != nil:
 			config.Grammar = input.JSONFunctionGrammarObject.Grammar(config.FunctionsConfig.GrammarConfig.Options()...)
 		case input.JSONFunctionGrammarObjectName != nil:
 			config.Grammar = input.JSONFunctionGrammarObjectName.Grammar(config.FunctionsConfig.GrammarConfig.Options()...)
 		default:
 			// Force picking one of the functions by the request
 			if config.FunctionToCall() != "" {
--- a/core/http/endpoints/openai/completion.go
+++ b/core/http/endpoints/openai/completion.go
@@ -57,7 +57,7 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a
 	}
 	return func(c *fiber.Ctx) error {
-		modelFile, input, err := readRequest(c, ml, appConfig, true)
+		modelFile, input, err := readRequest(c, cl, ml, appConfig, true)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
--- a/core/http/endpoints/openai/edit.go
+++ b/core/http/endpoints/openai/edit.go
@@ -16,9 +16,14 @@ import (
 	"github.com/rs/zerolog/log"
 )
 // EditEndpoint is the OpenAI edit API endpoint
 // @Summary OpenAI edit endpoint
 // @Param request body schema.OpenAIRequest true "query params"
 // @Success 200 {object} schema.OpenAIResponse "Response"
 // @Router /v1/edits [post]
 func EditEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
-		modelFile, input, err := readRequest(c, ml, appConfig, true)
+		modelFile, input, err := readRequest(c, cl, ml, appConfig, true)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
--- a/core/http/endpoints/openai/embeddings.go
+++ b/core/http/endpoints/openai/embeddings.go
@@ -23,7 +23,7 @@ import (
 // @Router /v1/embeddings [post]
 func EmbeddingsEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
-		model, input, err := readRequest(c, ml, appConfig, true)
+		model, input, err := readRequest(c, cl, ml, appConfig, true)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
--- a/core/http/endpoints/openai/files.go
+++ b/core/http/endpoints/openai/files.go
@@ -9,25 +9,16 @@ import (
 	"time"
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/schema"
 	"github.com/gofiber/fiber/v2"
 	"github.com/mudler/LocalAI/pkg/utils"
 )
-var UploadedFiles []File
+var UploadedFiles []schema.File
 const UploadedFilesFile = "uploadedFiles.json"
 // File represents the structure of a file object from the OpenAI API.
 type File struct {
 	ID        string    `json:"id"`         // Unique identifier for the file
 	Object    string    `json:"object"`     // Type of the object (e.g., "file")
 	Bytes     int       `json:"bytes"`      // Size of the file in bytes
 	CreatedAt time.Time `json:"created_at"` // The time at which the file was created
 	Filename  string    `json:"filename"`   // The name of the file
 	Purpose   string    `json:"purpose"`    // The purpose of the file (e.g., "fine-tune", "classifications", etc.)
 }
 // UploadFilesEndpoint https://platform.openai.com/docs/api-reference/files/create
 func UploadFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
@@ -61,7 +52,7 @@ func UploadFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.Appli
 			return c.Status(fiber.StatusInternalServerError).SendString("Failed to save file: " + err.Error())
 		}
-		f := File{
+		f := schema.File{
 			ID:        fmt.Sprintf("file-%d", getNextFileId()),
 			Object:    "file",
 			Bytes:     int(file.Size),
@@ -84,14 +75,13 @@ func getNextFileId() int64 {
 }
 // ListFilesEndpoint https://platform.openai.com/docs/api-reference/files/list
 // @Summary List files.
 // @Success 200 {object} schema.ListFiles "Response"
 // @Router /v1/files [get]
 func ListFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
 	type ListFiles struct {
 		Data   []File
 		Object string
 	}
 	return func(c *fiber.Ctx) error {
-		var listFiles ListFiles
+		var listFiles schema.ListFiles
 		purpose := c.Query("purpose")
 		if purpose == "" {
@@ -108,7 +98,7 @@ func ListFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.Applica
 	}
 }
-func getFileFromRequest(c *fiber.Ctx) (*File, error) {
+func getFileFromRequest(c *fiber.Ctx) (*schema.File, error) {
 	id := c.Params("file_id")
 	if id == "" {
 		return nil, fmt.Errorf("file_id parameter is required")
@@ -125,7 +115,7 @@ func getFileFromRequest(c *fiber.Ctx) (*File, error) {
 // GetFilesEndpoint is the OpenAI API endpoint to get files https://platform.openai.com/docs/api-reference/files/retrieve
 // @Summary Returns information about a specific file.
-// @Success 200 {object} File "Response"
+// @Success 200 {object} schema.File "Response"
 // @Router /v1/files/{file_id} [get]
 func GetFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
--- a/core/http/endpoints/openai/files_test.go
+++ b/core/http/endpoints/openai/files_test.go
@@ -14,6 +14,7 @@ import (
 	"github.com/rs/zerolog/log"
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/schema"
 	"github.com/gofiber/fiber/v2"
 	utils2 "github.com/mudler/LocalAI/pkg/utils"
@@ -22,11 +23,6 @@ import (
 	"testing"
 )
 type ListFiles struct {
 	Data   []File
 	Object string
 }
 func startUpApp() (app *fiber.App, option *config.ApplicationConfig, loader *config.BackendConfigLoader) {
 	// Preparing the mocked objects
 	loader = &config.BackendConfigLoader{}
@@ -159,7 +155,7 @@ func TestUploadFileExceedSizeLimit(t *testing.T) {
 		resp, _ := app.Test(req)
 		assert.Equal(t, 200, resp.StatusCode)
-		var listFiles ListFiles
+		var listFiles schema.ListFiles
 		if err := json.Unmarshal(bodyToByteArray(resp, t), &listFiles); err != nil {
 			t.Errorf("Failed to decode response: %v", err)
 			return
@@ -201,7 +197,7 @@ func CallFilesUploadEndpoint(t *testing.T, app *fiber.App, fileName, tag, purpos
 	return app.Test(req)
 }
-func CallFilesUploadEndpointWithCleanup(t *testing.T, app *fiber.App, fileName, tag, purpose string, fileSize int, appConfig *config.ApplicationConfig) File {
+func CallFilesUploadEndpointWithCleanup(t *testing.T, app *fiber.App, fileName, tag, purpose string, fileSize int, appConfig *config.ApplicationConfig) schema.File {
 	// Create a file that exceeds the limit
 	testName := strings.Split(t.Name(), "/")[1]
 	file := createTestFile(t, testName+"-"+fileName, fileSize, appConfig)
@@ -280,8 +276,8 @@ func bodyToByteArray(resp *http.Response, t *testing.T) []byte {
 	return bodyBytes
 }
-func responseToFile(t *testing.T, resp *http.Response) File {
+func responseToFile(t *testing.T, resp *http.Response) schema.File {
-	var file File
+	var file schema.File
 	responseToString := bodyToString(resp, t)
 	err := json.NewDecoder(strings.NewReader(responseToString)).Decode(&file)
@@ -292,8 +288,8 @@ func responseToFile(t *testing.T, resp *http.Response) File {
 	return file
 }
-func responseToListFile(t *testing.T, resp *http.Response) ListFiles {
+func responseToListFile(t *testing.T, resp *http.Response) schema.ListFiles {
-	var listFiles ListFiles
+	var listFiles schema.ListFiles
 	responseToString := bodyToString(resp, t)
 	err := json.NewDecoder(strings.NewReader(responseToString)).Decode(&listFiles)
--- a/core/http/endpoints/openai/image.go
+++ b/core/http/endpoints/openai/image.go
@@ -66,7 +66,7 @@ func downloadFile(url string) (string, error) {
 // @Router /v1/images/generations [post]
 func ImageEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
-		m, input, err := readRequest(c, ml, appConfig, false)
+		m, input, err := readRequest(c, cl, ml, appConfig, false)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
--- a/core/http/endpoints/openai/list.go
+++ b/core/http/endpoints/openai/list.go
@@ -2,15 +2,17 @@ package openai
 import (
 	"github.com/gofiber/fiber/v2"
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/schema"
 	"github.com/mudler/LocalAI/core/services"
 	model "github.com/mudler/LocalAI/pkg/model"
 )
 // ListModelsEndpoint is the OpenAI Models API endpoint https://platform.openai.com/docs/api-reference/models
 // @Summary List and describe the various models available in the API.
 // @Success 200 {object} schema.ModelsDataResponse "Response"
 // @Router /v1/models [get]
-func ListModelsEndpoint(lms *services.ListModelsService) func(ctx *fiber.Ctx) error {
+func ListModelsEndpoint(bcl *config.BackendConfigLoader, ml *model.ModelLoader) func(ctx *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		// If blank, no filter is applied.
 		filter := c.Query("filter")
@@ -18,7 +20,7 @@ func ListModelsEndpoint(lms *services.ListModelsService) func(ctx *fiber.Ctx) er
 		// By default, exclude any loose files that are already referenced by a configuration file.
 		excludeConfigured := c.QueryBool("excludeConfigured", true)
-		dataModels, err := lms.ListModels(filter, excludeConfigured)
+		dataModels, err := modelList(bcl, ml, filter, excludeConfigured)
 		if err != nil {
 			return err
 		}
@@ -28,3 +30,20 @@ func ListModelsEndpoint(lms *services.ListModelsService) func(ctx *fiber.Ctx) er
 		})
 	}
 }
 func modelList(bcl *config.BackendConfigLoader, ml *model.ModelLoader, filter string, excludeConfigured bool) ([]schema.OpenAIModel, error) {
 	models, err := services.ListModels(bcl, ml, filter, excludeConfigured)
 	if err != nil {
 		return nil, err
 	}
 	dataModels := []schema.OpenAIModel{}
 	// Then iterate through the loose files:
 	for _, m := range models {
 		dataModels = append(dataModels, schema.OpenAIModel{ID: m, Object: "model"})
 	}
 	return dataModels, nil
 }
--- a/core/http/endpoints/openai/request.go
+++ b/core/http/endpoints/openai/request.go
@@ -15,7 +15,7 @@ import (
 	"github.com/rs/zerolog/log"
 )
-func readRequest(c *fiber.Ctx, ml *model.ModelLoader, o *config.ApplicationConfig, firstModel bool) (string, *schema.OpenAIRequest, error) {
+func readRequest(c *fiber.Ctx, cl *config.BackendConfigLoader, ml *model.ModelLoader, o *config.ApplicationConfig, firstModel bool) (string, *schema.OpenAIRequest, error) {
 	input := new(schema.OpenAIRequest)
 	// Get input data from the request body
@@ -31,7 +31,7 @@ func readRequest(c *fiber.Ctx, ml *model.ModelLoader, o *config.ApplicationConfi
 	log.Debug().Msgf("Request received: %s", string(received))
-	modelFile, err := fiberContext.ModelFromContext(c, ml, input.Model, firstModel)
+	modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.Model, firstModel)
 	return modelFile, input, err
 }
--- a/core/http/endpoints/openai/transcription.go
+++ b/core/http/endpoints/openai/transcription.go
@@ -25,7 +25,7 @@ import (
 // @Router /v1/audio/transcriptions [post]
 func TranscriptEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
-		m, input, err := readRequest(c, ml, appConfig, false)
+		m, input, err := readRequest(c, cl, ml, appConfig, false)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
--- a/core/http/routes/localai.go
+++ b/core/http/routes/localai.go
@@ -59,16 +59,8 @@ func RegisterLocalAIRoutes(app *fiber.App,
 	// p2p
 	if p2p.IsP2PEnabled() {
-		app.Get("/api/p2p", auth, func(c *fiber.Ctx) error {
+		app.Get("/api/p2p", auth, localai.ShowP2PNodes)
-			// Render index
+		app.Get("/api/p2p/token", auth, localai.ShowP2PToken(appConfig))
 			return c.JSON(map[string]interface{}{
 				"Nodes":          p2p.GetAvailableNodes(""),
 				"FederatedNodes": p2p.GetAvailableNodes(p2p.FederatedID),
 			})
 		})
 		app.Get("/api/p2p/token", auth, func(c *fiber.Ctx) error {
 			return c.Send([]byte(appConfig.P2PToken))
 		})
 	}
 	app.Get("/version", auth, func(c *fiber.Ctx) error {
--- a/core/http/routes/openai.go
+++ b/core/http/routes/openai.go
@@ -5,7 +5,6 @@ import (
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/http/endpoints/localai"
 	"github.com/mudler/LocalAI/core/http/endpoints/openai"
 	"github.com/mudler/LocalAI/core/services"
 	"github.com/mudler/LocalAI/pkg/model"
 )
@@ -81,8 +80,7 @@ func RegisterOpenAIRoutes(app *fiber.App,
 		app.Static("/generated-audio", appConfig.AudioDir)
 	}
-	// models
+	// List models
-	tmpLMS := services.NewListModelsService(ml, cl, appConfig) // TODO: once createApplication() is fully in use, reference the central instance.
+	app.Get("/v1/models", auth, openai.ListModelsEndpoint(cl, ml))
-	app.Get("/v1/models", auth, openai.ListModelsEndpoint(tmpLMS))
+	app.Get("/models", auth, openai.ListModelsEndpoint(cl, ml))
 	app.Get("/models", auth, openai.ListModelsEndpoint(tmpLMS))
 }
--- a/core/http/routes/ui.go
+++ b/core/http/routes/ui.go
@@ -27,7 +27,6 @@ func RegisterUIRoutes(app *fiber.App,
 	appConfig *config.ApplicationConfig,
 	galleryService *services.GalleryService,
 	auth func(*fiber.Ctx) error) {
 	tmpLMS := services.NewListModelsService(ml, cl, appConfig) // TODO: once createApplication() is fully in use, reference the central instance.
 	// keeps the state of models that are being installed from the UI
 	var processingModels = xsync.NewSyncedMap[string, string]()
@@ -270,7 +269,7 @@ func RegisterUIRoutes(app *fiber.App,
 	// Show the Chat page
 	app.Get("/chat/:model", auth, func(c *fiber.Ctx) error {
-		backendConfigs, _ := tmpLMS.ListModels("", true)
+		backendConfigs, _ := services.ListModels(cl, ml, "", true)
 		summary := fiber.Map{
 			"Title":        "LocalAI - Chat with " + c.Params("model"),
@@ -285,7 +284,7 @@ func RegisterUIRoutes(app *fiber.App,
 	})
 	app.Get("/talk/", auth, func(c *fiber.Ctx) error {
-		backendConfigs, _ := tmpLMS.ListModels("", true)
+		backendConfigs, _ := services.ListModels(cl, ml, "", true)
 		if len(backendConfigs) == 0 {
 			// If no model is available redirect to the index which suggests how to install models
@@ -295,7 +294,7 @@ func RegisterUIRoutes(app *fiber.App,
 		summary := fiber.Map{
 			"Title":        "LocalAI - Talk",
 			"ModelsConfig": backendConfigs,
-			"Model":        backendConfigs[0].ID,
+			"Model":        backendConfigs[0],
 			"IsP2PEnabled": p2p.IsP2PEnabled(),
 			"Version":      internal.PrintableVersion(),
 		}
@@ -306,7 +305,7 @@ func RegisterUIRoutes(app *fiber.App,
 	app.Get("/chat/", auth, func(c *fiber.Ctx) error {
-		backendConfigs, _ := tmpLMS.ListModels("", true)
+		backendConfigs, _ := services.ListModels(cl, ml, "", true)
 		if len(backendConfigs) == 0 {
 			// If no model is available redirect to the index which suggests how to install models
@@ -314,9 +313,9 @@ func RegisterUIRoutes(app *fiber.App,
 		}
 		summary := fiber.Map{
-			"Title":        "LocalAI - Chat with " + backendConfigs[0].ID,
+			"Title":        "LocalAI - Chat with " + backendConfigs[0],
 			"ModelsConfig": backendConfigs,
-			"Model":        backendConfigs[0].ID,
+			"Model":        backendConfigs[0],
 			"Version":      internal.PrintableVersion(),
 			"IsP2PEnabled": p2p.IsP2PEnabled(),
 		}
--- a/core/http/views/chat.html
+++ b/core/http/views/chat.html
@@ -100,10 +100,10 @@ SOFTWARE.
        <option value="" disabled class="text-gray-400" >Select a model</option>
        {{ $model:=.Model}}
        {{ range .ModelsConfig }}
-        {{ if eq .ID $model }}
+        {{ if eq . $model }}
-        <option value="/chat/{{.ID}}" selected  class="bg-gray-700 text-white">{{.ID}}</option>
+        <option value="/chat/{{.}}" selected  class="bg-gray-700 text-white">{{.}}</option>
        {{ else }}
-        <option value="/chat/{{.ID}}" class="bg-gray-700 text-white">{{.ID}}</option>
+        <option value="/chat/{{.}}" class="bg-gray-700 text-white">{{.}}</option>
        {{ end }}
        {{ end }}
      </select>
--- a/core/http/views/index.html
+++ b/core/http/views/index.html
@@ -17,15 +17,26 @@
        </div>
        <div class="models mt-4">
            {{template "views/partials/inprogress" .}}
            {{ if eq (len .ModelsConfig) 0 }}
-            <h2 class="text-center text-3xl font-semibold text-gray-100"> <i class="text-yellow-200 ml-2 fa-solid fa-triangle-exclamation animate-pulse"></i> Ouch! seems you don't have any models installed!</h2>
+            <h2 class="text-center text-3xl font-semibold text-gray-100"> <i class="text-yellow-200 ml-2 fa-solid fa-triangle-exclamation animate-pulse"></i> Ouch! seems you don't have any models installed from the LocalAI gallery!</h2>
            <p class="text-center mt-4 text-xl">..install something from the <a class="text-gray-400 hover:text-white ml-1 px-3 py-2 rounded" href="/browse">🖼️ Gallery</a> or check the <a href="https://localai.io/basics/getting_started/" class="text-gray-400 hover:text-white ml-1 px-3 py-2 rounded"> <i class="fa-solid fa-book"></i> Getting started documentation </a></p>
            {{ if ne (len .Models) 0 }}
            <hr class="my-4">
            <h3 class="text-center text-xl font-semibold text-gray-100"> 
                However, It seems you have installed some models installed without a configuration file:
            </h3>
            {{ range .Models }}
            <div class="bg-gray-800 border-b border-gray-700 p-4 mt-4">
                <h4 class="text-md font-bold text-gray-200">{{.}}</h4>
            </div>              
            {{end}}
            {{end}}
            {{ else }}
-            <h2 class="text-center text-3xl font-semibold text-gray-100">Installed models</h2>
+            {{ $modelsN := len .ModelsConfig}}
-            <p class="text-center mt-4 text-xl">We have {{len .ModelsConfig}} pre-loaded models available.</p>
+            {{ $modelsN = add $modelsN (len .Models)}}
            <h2 class="text-center text-3xl font-semibold text-gray-100">{{$modelsN}} Installed model(s)</h2>
            <table class="table-auto mt-4 w-full text-left text-gray-200">
                <thead class="text-xs text-gray-400 uppercase bg-gray-700">
                    <tr>
@@ -76,12 +87,29 @@
                            data-twe-ripple-color="light" data-twe-ripple-init="" hx-confirm="Are you sure you wish to delete the model?" hx-post="/browse/delete/model/{{.Name}}" hx-swap="outerHTML"><i class="fa-solid fa-cancel pr-2"></i>Delete</button>
                    </td>
                {{ end }}
                {{ range .Models }}
                <tr class="bg-gray-800 border-b border-gray-700">
                    <td class="px-4 py-3">
                            <img src="{{$noicon}}" class="rounded-t-lg max-h-24 max-w-24 object-cover mt-3">
                    </td>
                    <td class="px-4 py-3 font-bold">
                        <p class="font-bold text-white flex items-center"><i class="fas fa-brain pr-2"></i>{{.}}</p>
                    </td>
                    <td class="px-4 py-3 font-bold">
                        <span class="inline-block bg-yellow-500 text-white py-1 px-3 rounded-full text-xs">
                            auto
                        </span>
                    </td>
                    <td class="px-4 py-3">
                        <span class="float-right inline-block bg-red-800 text-white py-1 px-3 rounded-full text-xs">
                            No Configuration
                        </span>
                    </td>           
                {{end}}
                </tbody>
            </table>
            {{ end }}
        </div>
    </div>
--- a/core/http/views/talk.html
+++ b/core/http/views/talk.html
@@ -62,7 +62,7 @@
          <option value="" disabled class="text-gray-400" >Select a model</option>
          {{ range .ModelsConfig }}
-          <option value="{{.ID}}"  class="bg-gray-700 text-white">{{.ID}}</option>
+          <option value="{{.}}"  class="bg-gray-700 text-white">{{.}}</option>
          {{ end }}
        </select>
      </div>
@@ -76,7 +76,7 @@
            <option value="" disabled class="text-gray-400" >Select a model</option>
            {{ range .ModelsConfig }}
-            <option value="{{.ID}}"  class="bg-gray-700 text-white">{{.ID}}</option>
+            <option value="{{.}}"  class="bg-gray-700 text-white">{{.}}</option>
            {{ end }}
          </select>
      </div>
@@ -89,7 +89,7 @@
        >
          <option value="" disabled class="text-gray-400" >Select a model</option>
          {{ range .ModelsConfig }}
-          <option value="{{.ID}}"  class="bg-gray-700 text-white">{{.ID}}</option>
+          <option value="{{.}}"  class="bg-gray-700 text-white">{{.}}</option>
          {{ end }}
        </select>
      </div>
--- a/core/p2p/federated.go
+++ b/core/p2p/federated.go
@@ -0,0 +1,47 @@
 package p2p
 const FederatedID = "federated"
 type FederatedServer struct {
 	listenAddr, service, p2ptoken string
 	requestTable                  map[string]int
 	loadBalanced                  bool
 }
 func NewFederatedServer(listenAddr, service, p2pToken string, loadBalanced bool) *FederatedServer {
 	return &FederatedServer{
 		listenAddr:   listenAddr,
 		service:      service,
 		p2ptoken:     p2pToken,
 		requestTable: map[string]int{},
 		loadBalanced: loadBalanced,
 	}
 }
 func (fs *FederatedServer) SelectLeastUsedServer() string {
 	// cycle over requestTable and find the entry with the lower number
 	// if there are multiple entries with the same number, select one randomly
 	// if there are no entries, return an empty string
 	var min int
 	var minKey string
 	for k, v := range fs.requestTable {
 		if min == 0 || v < min {
 			min = v
 			minKey = k
 		}
 	}
 	return minKey
 }
 func (fs *FederatedServer) RecordRequest(nodeID string) {
 	// increment the counter for the nodeID in the requestTable
 	fs.requestTable[nodeID]++
 }
 func (fs *FederatedServer) EnsureRecordExist(nodeID string) {
 	// if the nodeID is not in the requestTable, add it with a counter of 0
 	_, ok := fs.requestTable[nodeID]
 	if !ok {
 		fs.requestTable[nodeID] = 0
 	}
 }
--- a/core/p2p/federated_server.go
+++ b/core/p2p/federated_server.go
@@ -0,0 +1,140 @@
 //go:build p2p
 // +build p2p
 package p2p
 import (
 	"context"
 	"errors"
 	"fmt"
 	"net"
 	"time"
 	"math/rand/v2"
 	"github.com/mudler/edgevpn/pkg/node"
 	"github.com/mudler/edgevpn/pkg/protocol"
 	"github.com/mudler/edgevpn/pkg/types"
 	"github.com/rs/zerolog/log"
 )
 func (f *FederatedServer) Start(ctx context.Context) error {
 	n, err := NewNode(f.p2ptoken)
 	if err != nil {
 		return fmt.Errorf("creating a new node: %w", err)
 	}
 	err = n.Start(ctx)
 	if err != nil {
 		return fmt.Errorf("creating a new node: %w", err)
 	}
 	if err := ServiceDiscoverer(ctx, n, f.p2ptoken, f.service, func(servicesID string, tunnel NodeData) {
 		log.Debug().Msgf("Discovered node: %s", tunnel.ID)
 	}); err != nil {
 		return err
 	}
 	return f.proxy(ctx, n)
 }
 func (fs *FederatedServer) proxy(ctx context.Context, node *node.Node) error {
 	log.Info().Msgf("Allocating service '%s' on: %s", fs.service, fs.listenAddr)
 	// Open local port for listening
 	l, err := net.Listen("tcp", fs.listenAddr)
 	if err != nil {
 		log.Error().Err(err).Msg("Error listening")
 		return err
 	}
 	//	ll.Info("Binding local port on", srcaddr)
 	ledger, _ := node.Ledger()
 	// Announce ourselves so nodes accepts our connection
 	ledger.Announce(
 		ctx,
 		10*time.Second,
 		func() {
 			// Retrieve current ID for ip in the blockchain
 			//_, found := ledger.GetKey(protocol.UsersLedgerKey, node.Host().ID().String())
 			// If mismatch, update the blockchain
 			//if !found {
 			updatedMap := map[string]interface{}{}
 			updatedMap[node.Host().ID().String()] = &types.User{
 				PeerID:    node.Host().ID().String(),
 				Timestamp: time.Now().String(),
 			}
 			ledger.Add(protocol.UsersLedgerKey, updatedMap)
 			//	}
 		},
 	)
 	defer l.Close()
 	for {
 		select {
 		case <-ctx.Done():
 			return errors.New("context canceled")
 		default:
 			log.Debug().Msg("New for connection")
 			// Listen for an incoming connection.
 			conn, err := l.Accept()
 			if err != nil {
 				fmt.Println("Error accepting: ", err.Error())
 				continue
 			}
 			// Handle connections in a new goroutine, forwarding to the p2p service
 			go func() {
 				var tunnelAddresses []string
 				for _, v := range GetAvailableNodes(fs.service) {
 					if v.IsOnline() {
 						tunnelAddresses = append(tunnelAddresses, v.TunnelAddress)
 					} else {
 						log.Info().Msgf("Node %s is offline", v.ID)
 					}
 				}
 				if len(tunnelAddresses) == 0 {
 					log.Error().Msg("No available nodes yet")
 					return
 				}
 				tunnelAddr := ""
 				if fs.loadBalanced {
 					for _, t := range tunnelAddresses {
 						fs.EnsureRecordExist(t)
 					}
 					tunnelAddr = fs.SelectLeastUsedServer()
 					log.Debug().Msgf("Selected tunnel %s", tunnelAddr)
 					if tunnelAddr == "" {
 						tunnelAddr = tunnelAddresses[rand.IntN(len(tunnelAddresses))]
 					}
 					fs.RecordRequest(tunnelAddr)
 				} else {
 					tunnelAddr = tunnelAddresses[rand.IntN(len(tunnelAddresses))]
 				}
 				tunnelConn, err := net.Dial("tcp", tunnelAddr)
 				if err != nil {
 					log.Error().Err(err).Msg("Error connecting to tunnel")
 					return
 				}
 				log.Info().Msgf("Redirecting %s to %s", conn.LocalAddr().String(), tunnelConn.RemoteAddr().String())
 				closer := make(chan struct{}, 2)
 				go copyStream(closer, tunnelConn, conn)
 				go copyStream(closer, conn, tunnelConn)
 				<-closer
 				tunnelConn.Close()
 				conn.Close()
 				//	ll.Infof("(service %s) Done handling %s", serviceID, l.Addr().String())
 			}()
 		}
 	}
 }
--- a/core/p2p/node.go
+++ b/core/p2p/node.go
@@ -6,7 +6,6 @@ import (
 )
 const defaultServicesID = "services_localai"
 const FederatedID = "federated"
 type NodeData struct {
 	Name          string
--- a/Show More
+++ b/Show More
`@@ -1,2 +1,2 @@`
	`grpcio==1.64.0`	`grpcio==1.65.0`
	`protobuf`	`protobuf`