experiment: build with a single image with all the deps

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-02-03 03:02:38 -05:00 · 2024-07-01 19:43:18 +02:00
43 changed files with 241 additions and 1822 deletions
--- a/.github/release.yml
+++ b/.github/release.yml
@@ -13,9 +13,6 @@ changelog:
      labels:
        - bug
        - regression
-    - title: "🖧 P2P area"
-      labels:
-         - area/p2p
    - title: Exciting New Features 🎉
      labels:
        - Semver-Minor
--- a/.github/workflows/image-pr.yml
+++ b/.github/workflows/image-pr.yml
@@ -46,7 +46,7 @@ jobs:
            makeflags: "--jobs=3 --output-sync=target"
          - build-type: 'cublas'
            cuda-major-version: "12"
-            cuda-minor-version: "4"
+            cuda-minor-version: "5"
            platforms: 'linux/amd64'
            tag-latest: 'false'
            tag-suffix: '-cublas-cuda12-ffmpeg'
@@ -119,7 +119,7 @@ jobs:
            makeflags: "--jobs=3 --output-sync=target"
          - build-type: 'cublas'
            cuda-major-version: "12"
-            cuda-minor-version: "4"
+            cuda-minor-version: "5"
            platforms: 'linux/amd64'
            tag-latest: 'false'
            tag-suffix: '-cublas-cuda12-ffmpeg-core'
--- a/.github/workflows/image.yml
+++ b/.github/workflows/image.yml
@@ -64,7 +64,7 @@ jobs:
            makeflags: "--jobs=3 --output-sync=target"
          - build-type: 'cublas'
            cuda-major-version: "11"
-            cuda-minor-version: "7"
+            cuda-minor-version: "8"
            platforms: 'linux/amd64'
            tag-latest: 'false'
            tag-suffix: '-cublas-cuda11'
@@ -75,7 +75,7 @@ jobs:
            makeflags: "--jobs=3 --output-sync=target"
          - build-type: 'cublas'
            cuda-major-version: "12"
-            cuda-minor-version: "4"
+            cuda-minor-version: "5"
            platforms: 'linux/amd64'
            tag-latest: 'false'
            tag-suffix: '-cublas-cuda12'
@@ -86,7 +86,7 @@ jobs:
            makeflags: "--jobs=3 --output-sync=target"
          - build-type: 'cublas'
            cuda-major-version: "11"
-            cuda-minor-version: "7"
+            cuda-minor-version: "8"
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-cublas-cuda11-ffmpeg'
@@ -100,7 +100,7 @@ jobs:
            makeflags: "--jobs=3 --output-sync=target"
          - build-type: 'cublas'
            cuda-major-version: "12"
-            cuda-minor-version: "4"
+            cuda-minor-version: "5"
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-cublas-cuda12-ffmpeg'
@@ -274,7 +274,7 @@ jobs:
            makeflags: "--jobs=4 --output-sync=target"
          - build-type: 'cublas'
            cuda-major-version: "11"
-            cuda-minor-version: "7"
+            cuda-minor-version: "8"
            platforms: 'linux/amd64'
            tag-latest: 'false'
            tag-suffix: '-cublas-cuda11-core'
@@ -285,7 +285,7 @@ jobs:
            makeflags: "--jobs=4 --output-sync=target"
          - build-type: 'cublas'
            cuda-major-version: "12"
-            cuda-minor-version: "4"
+            cuda-minor-version: "5"
            platforms: 'linux/amd64'
            tag-latest: 'false'
            tag-suffix: '-cublas-cuda12-core'
@@ -296,7 +296,7 @@ jobs:
            makeflags: "--jobs=4 --output-sync=target"
          - build-type: 'cublas'
            cuda-major-version: "11"
-            cuda-minor-version: "7"
+            cuda-minor-version: "8"
            platforms: 'linux/amd64'
            tag-latest: 'false'
            tag-suffix: '-cublas-cuda11-ffmpeg-core'
@@ -307,7 +307,7 @@ jobs:
            makeflags: "--jobs=4 --output-sync=target"
          - build-type: 'cublas'
            cuda-major-version: "12"
-            cuda-minor-version: "4"
+            cuda-minor-version: "5"
            platforms: 'linux/amd64'
            tag-latest: 'false'
            tag-suffix: '-cublas-cuda12-ffmpeg-core'
--- a/.github/workflows/image_build.yml
+++ b/.github/workflows/image_build.yml
@@ -23,7 +23,7 @@ on:
        type: string
      cuda-minor-version:
        description: 'CUDA minor version'
-        default: "4"
+        default: "5"
        type: string
      platforms:
        description: 'Platforms'
--- a/.github/workflows/notify-models.yaml
+++ b/.github/workflows/notify-models.yaml
@@ -1,170 +0,0 @@
-name: Notifications for new models
-on:
-  pull_request:
-     types:
-       - closed
-
-jobs:
-  notify-discord:
-    if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'area/ai-model')) }}
-    env:
-        MODEL_NAME: hermes-2-theta-llama-3-8b
-    runs-on: ubuntu-latest
-    steps:
-    - uses: actions/checkout@v4
-      with:
-        fetch-depth: 0 # needed to checkout all branches for this Action to work
-    - name: Start LocalAI
-      run: |
-        echo "Starting LocalAI..."
-        docker run -e -ti -d --name local-ai -p 8080:8080 localai/localai:master-ffmpeg-core run --debug $MODEL_NAME
-        until [ "`docker inspect -f {{.State.Health.Status}} local-ai`" == "healthy" ]; do echo "Waiting for container to be ready";  docker logs --tail 10 local-ai; sleep 2; done
-      # Check the PR diff using the current branch and the base branch of the PR
-    - uses: GrantBirki/git-diff-action@v2.7.0
-      id: git-diff-action
-      with:
-            json_diff_file_output: diff.json
-            raw_diff_file_output: diff.txt
-            file_output_only: "true"
-    - name: Summarize
-      env:
-        DIFF: ${{ steps.git-diff-action.outputs.raw-diff-path }}
-      id: summarize
-      run: |
-            input="$(cat $DIFF)"
-
-            # Define the LocalAI API endpoint
-            API_URL="http://localhost:8080/chat/completions"
-
-            # Create a JSON payload using jq to handle special characters
-            json_payload=$(jq -n --arg input "$input" '{
-            model: "'$MODEL_NAME'",
-            messages: [
-                {
-                role: "system",
-                content: "You are LocalAI-bot. Write a discord message to notify everyone about the new model from the git diff. Make it informal. An example can include: the URL of the model, the name, and a brief description of the model if exists. Also add an hint on how to install it in LocalAI and that can be browsed over https://models.localai.io. For example: local-ai run model_name_here"
-                },
-                {
-                role: "user",
-                content: $input
-                }
-            ]
-            }')
-
-            # Send the request to LocalAI
-            response=$(curl -s -X POST $API_URL \
-            -H "Content-Type: application/json" \
-            -d "$json_payload")
-
-            # Extract the summary from the response
-            summary="$(echo $response | jq -r '.choices[0].message.content')"
-
-            # Print the summary
-            #  -H "Authorization: Bearer $API_KEY" \
-            echo "Summary:"
-            echo "$summary"
-            echo "payload sent"
-            echo "$json_payload"
-            {
-                echo 'message<<EOF'
-                echo "$summary"
-                echo EOF
-              } >> "$GITHUB_OUTPUT"
-            docker logs --tail 10 local-ai
-    - name: Discord notification
-      env:
-        DISCORD_WEBHOOK: ${{ secrets.DISCORD_WEBHOOK_URL }}
-        DISCORD_USERNAME: "LocalAI-Bot"
-        DISCORD_AVATAR: "https://avatars.githubusercontent.com/u/139863280?v=4"
-      uses: Ilshidur/action-discord@master
-      with:
-        args: ${{ steps.summarize.outputs.message }}
-    - name: Setup tmate session if fails
-      if: ${{ failure() }}
-      uses: mxschmitt/action-tmate@v3.18
-      with:
-        detached: true
-        connect-timeout-seconds: 180
-        limit-access-to-actor: true
-  notify-twitter:
-    if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'area/ai-model')) }}
-    env:
-        MODEL_NAME: hermes-2-theta-llama-3-8b
-    runs-on: ubuntu-latest
-    steps:
-    - uses: actions/checkout@v4
-      with:
-        fetch-depth: 0 # needed to checkout all branches for this Action to work
-    - name: Start LocalAI
-      run: |
-        echo "Starting LocalAI..."
-        docker run -e -ti -d --name local-ai -p 8080:8080 localai/localai:master-ffmpeg-core run --debug $MODEL_NAME
-        until [ "`docker inspect -f {{.State.Health.Status}} local-ai`" == "healthy" ]; do echo "Waiting for container to be ready";  docker logs --tail 10 local-ai; sleep 2; done
-      # Check the PR diff using the current branch and the base branch of the PR
-    - uses: GrantBirki/git-diff-action@v2.7.0
-      id: git-diff-action
-      with:
-            json_diff_file_output: diff.json
-            raw_diff_file_output: diff.txt
-            file_output_only: "true"
-    - name: Summarize
-      env:
-        DIFF: ${{ steps.git-diff-action.outputs.raw-diff-path }}
-      id: summarize
-      run: |
-            input="$(cat $DIFF)"
-
-            # Define the LocalAI API endpoint
-            API_URL="http://localhost:8080/chat/completions"
-
-            # Create a JSON payload using jq to handle special characters
-            json_payload=$(jq -n --arg input "$input" '{
-            model: "'$MODEL_NAME'",
-            messages: [
-                {
-                role: "system",
-                content: "You are LocalAI-bot. Write a twitter message to notify everyone about the new model from the git diff. Make it informal and really short. An example can include: the name, and a brief description of the model if exists. Also add an hint on how to install it in LocalAI. For example: local-ai run model_name_here"
-                },
-                {
-                role: "user",
-                content: $input
-                }
-            ]
-            }')
-
-            # Send the request to LocalAI
-            response=$(curl -s -X POST $API_URL \
-            -H "Content-Type: application/json" \
-            -d "$json_payload")
-
-            # Extract the summary from the response
-            summary="$(echo $response | jq -r '.choices[0].message.content')"
-
-            # Print the summary
-            #  -H "Authorization: Bearer $API_KEY" \
-            echo "Summary:"
-            echo "$summary"
-            echo "payload sent"
-            echo "$json_payload"
-            {
-                echo 'message<<EOF'
-                echo "$summary"
-                echo EOF
-              } >> "$GITHUB_OUTPUT"
-            docker logs --tail 10 local-ai
-    - uses: Eomm/why-don-t-you-tweet@v2
-      with:
-        tweet-message: ${{ steps.summarize.outputs.message }}
-      env:
-        # Get your tokens from https://developer.twitter.com/apps
-        TWITTER_CONSUMER_API_KEY: ${{ secrets.TWITTER_APP_KEY }}
-        TWITTER_CONSUMER_API_SECRET: ${{ secrets.TWITTER_APP_SECRET }}
-        TWITTER_ACCESS_TOKEN: ${{ secrets.TWITTER_ACCESS_TOKEN }}
-        TWITTER_ACCESS_TOKEN_SECRET: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }}
-    - name: Setup tmate session if fails
-      if: ${{ failure() }}
-      uses: mxschmitt/action-tmate@v3.18
-      with:
-        detached: true
-        connect-timeout-seconds: 180
-        limit-access-to-actor: true
--- a/.github/workflows/notify-releases.yaml
+++ b/.github/workflows/notify-releases.yaml
@@ -1,65 +0,0 @@
-name: Release notifications
-on:
-  release:
-    types:
-      - published
-
-jobs:
-  notify-discord:
-    runs-on: ubuntu-latest
-    env:
-        RELEASE_BODY: ${{ github.event.release.body }}
-        RELEASE_TITLE: ${{ github.event.release.name }}
-        RELEASE_TAG_NAME: ${{ github.event.release.tag_name }}
-    steps:
-    - name: Start LocalAI
-      run: |
-        echo "Starting LocalAI..."
-        docker run -e -ti -d --name local-ai -p 8080:8080 localai/localai:master-ffmpeg-core run --debug $MODEL_NAME
-        until [ "`docker inspect -f {{.State.Health.Status}} local-ai`" == "healthy" ]; do echo "Waiting for container to be ready";  docker logs --tail 10 local-ai; sleep 2; done
-    - name: Summarize
-      id: summarize
-      run: |
-            input="$RELEASE_TITLE\b$RELEASE_BODY"
-
-            # Define the LocalAI API endpoint
-            API_URL="http://localhost:8080/chat/completions"
-
-            # Create a JSON payload using jq to handle special characters
-            json_payload=$(jq -n --arg input "$input" '{
-            model: "'$MODEL_NAME'",
-            messages: [
-                {
-                role: "system",
-                content: "Write a discord message with a bullet point summary of the release notes."
-                },
-                {
-                role: "user",
-                content: $input
-                }
-            ]
-            }')
-
-            # Send the request to LocalAI API
-            response=$(curl -s -X POST $API_URL \
-            -H "Content-Type: application/json" \
-            -d "$json_payload")
-
-            # Extract the summary from the response
-            summary=$(echo $response | jq -r '.choices[0].message.content')
-
-            # Print the summary
-            #  -H "Authorization: Bearer $API_KEY" \
-            {
-                echo 'message<<EOF'
-                echo "$summary"
-                echo EOF
-              } >> "$GITHUB_OUTPUT"
-    - name: Discord notification
-      env:
-        DISCORD_WEBHOOK: ${{ secrets.DISCORD_WEBHOOK_URL_RELEASE }}
-        DISCORD_USERNAME: "LocalAI-Bot"
-        DISCORD_AVATAR: "https://avatars.githubusercontent.com/u/139863280?v=4"
-      uses: Ilshidur/action-discord@master
-      with:
-        args: ${{ steps.summarize.outputs.message }}
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -31,7 +31,7 @@ jobs:
      - name: Dependencies
        run: |
          sudo apt-get update
-          sudo apt-get install build-essential ffmpeg protobuf-compiler ccache gawk
+          sudo apt-get install build-essential ffmpeg protobuf-compiler ccache
          sudo apt-get install -qy binutils-aarch64-linux-gnu gcc-aarch64-linux-gnu g++-aarch64-linux-gnu
      - name: Install CUDA Dependencies
        run: |
@@ -40,7 +40,7 @@ jobs:
          sudo apt-get update
          sudo apt-get install -y cuda-cross-aarch64 cuda-nvcc-cross-aarch64-${CUDA_VERSION} libcublas-cross-aarch64-${CUDA_VERSION}
        env:
-          CUDA_VERSION: 12-4
+          CUDA_VERSION: 12-5
      - name: Cache grpc
        id: cache-grpc
        uses: actions/cache@v4
@@ -77,16 +77,6 @@ jobs:
            echo "set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)" >> $CMAKE_CROSS_TOOLCHAIN && \
            echo "set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)" >> $CMAKE_CROSS_TOOLCHAIN
          GRPC_DIR=$PWD/grpc
-
-          # http://google.github.io/googletest/quickstart-cmake.html
-          # Seems otherwise cross-arch fails to find it
-          echo "include(FetchContent)" >> $GRPC_DIR/CMakeLists.txt
-          echo "FetchContent_Declare(" >> $GRPC_DIR/CMakeLists.txt
-          echo "  googletest" >> $GRPC_DIR/CMakeLists.txt
-          echo "  URL https://github.com/google/googletest/archive/03597a01ee50ed33e9dfd640b249b4be3799d395.zip" >> $GRPC_DIR/CMakeLists.txt
-          echo ")" >> $GRPC_DIR/CMakeLists.txt
-          echo "FetchContent_MakeAvailable(googletest)" >> $GRPC_DIR/CMakeLists.txt
-
          cd grpc && cd cmake/build && sudo make --jobs 5 --output-sync=target install && \
          GRPC_CROSS_BUILD_DIR=$GRPC_DIR/cmake/cross_build && \
          mkdir -p $GRPC_CROSS_BUILD_DIR && \
@@ -106,7 +96,6 @@ jobs:
          CROSS_TOOLCHAIN=/usr/$GNU_HOST
          CROSS_STAGING_PREFIX=$CROSS_TOOLCHAIN/stage
          CMAKE_CROSS_TOOLCHAIN=/tmp/arm.toolchain.cmake
-
          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b
          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
          export PATH=$PATH:$GOPATH/bin
@@ -114,6 +103,7 @@ jobs:
          sudo rm -rf /usr/aarch64-linux-gnu/lib/libstdc++.so.6
          sudo cp -rf /usr/aarch64-linux-gnu/lib/libstdc++.so* /usr/aarch64-linux-gnu/lib/libstdc++.so.6
          sudo cp /usr/aarch64-linux-gnu/lib/ld-linux-aarch64.so.1 ld.so
+          GO_TAGS=p2p \
          BACKEND_LIBS="./grpc/cmake/cross_build/third_party/re2/libre2.a ./grpc/cmake/cross_build/libgrpc.a ./grpc/cmake/cross_build/libgrpc++.a ./grpc/cmake/cross_build/third_party/protobuf/libprotobuf.a /usr/aarch64-linux-gnu/lib/libc.so.6 /usr/aarch64-linux-gnu/lib/libstdc++.so.6 /usr/aarch64-linux-gnu/lib/libgomp.so.1 /usr/aarch64-linux-gnu/lib/libm.so.6 /usr/aarch64-linux-gnu/lib/libgcc_s.so.1 /usr/aarch64-linux-gnu/lib/libdl.so.2 /usr/aarch64-linux-gnu/lib/libpthread.so.0 ./ld.so" \
          GOOS=linux \
          GOARCH=arm64 \
@@ -157,7 +147,7 @@ jobs:
      - name: Dependencies
        run: |
          sudo apt-get update
-          sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache gawk cmake
+          sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache cmake
      - name: Intel Dependencies
        run: |
          wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null
@@ -224,7 +214,8 @@ jobs:
          export PATH=/opt/rocm/bin:$PATH
          source /opt/intel/oneapi/setvars.sh
          sudo cp /lib64/ld-linux-x86-64.so.2 ld.so
-          BACKEND_LIBS="./ld.so ./sources/go-piper/piper/build/fi/lib/libfmt.a ./sources/go-piper/piper-phonemize/pi/lib/libonnxruntime.so.1.14.1 ./sources/go-piper/piper-phonemize/pi/src/libespeak-ng/libespeak-ng.so /usr/lib/x86_64-linux-gnu/libdl.so.2 /usr/lib/x86_64-linux-gnu/librt.so.1 /usr/lib/x86_64-linux-gnu/libpthread.so.0 ./sources/go-piper/piper-phonemize/pi/lib/libpiper_phonemize.so.1 ./sources/go-piper/piper/build/si/lib/libspdlog.a ./sources/go-piper/espeak/ei/lib/libucd.so" \
+          GO_TAGS=p2p \
+          BACKEND_LIBS="./ld.so /usr/lib/x86_64-linux-gnu/libstdc++.so.6 /usr/lib/x86_64-linux-gnu/libm.so.6 /usr/lib/x86_64-linux-gnu/libgcc_s.so.1 /usr/lib/x86_64-linux-gnu/libc.so.6 /usr/lib/x86_64-linux-gnu/libgomp.so.1" \
          make -j4 dist
      - uses: actions/upload-artifact@v4
        with:
@@ -278,48 +269,6 @@ jobs:
          files: |
            release/*

-  build-macOS-x86_64:
-    runs-on: macos-13
-    steps:
-      - name: Clone
-        uses: actions/checkout@v4
-        with:
-          submodules: true
-      - uses: actions/setup-go@v5
-        with:
-          go-version: '1.21.x'
-          cache: false
-      - name: Dependencies
-        run: |
-          brew install protobuf grpc
-          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b
-          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
-      - name: Build
-        id: build
-        run: |
-          export C_INCLUDE_PATH=/usr/local/include
-          export CPLUS_INCLUDE_PATH=/usr/local/include
-          export PATH=$PATH:$GOPATH/bin
-
-          make dist
-      - uses: actions/upload-artifact@v4
-        with:
-          name: LocalAI-MacOS-x86_64
-          path: release/
-      - name: Release
-        uses: softprops/action-gh-release@v2
-        if: startsWith(github.ref, 'refs/tags/')
-        with:
-          files: |
-            release/*
-      - name: Setup tmate session if tests fail
-        if: ${{ failure() }}
-        uses: mxschmitt/action-tmate@v3.18
-        with:
-          detached: true
-          connect-timeout-seconds: 180
-          limit-access-to-actor: true
-
  build-macOS-arm64:
    runs-on: macos-14
    steps:
@@ -343,7 +292,7 @@ jobs:
          export CPLUS_INCLUDE_PATH=/usr/local/include
          export PATH=$PATH:$GOPATH/bin

-          make dist
+          BACKEND_LIBS="$(ls /opt/homebrew/opt/grpc/lib/*.dylib /opt/homebrew/opt/re2/lib/*.dylib /opt/homebrew/opt/openssl@3/lib/*.dylib /opt/homebrew/opt/protobuf/lib/*.dylib /opt/homebrew/opt/abseil/lib/*.dylib | xargs)" GO_TAGS=p2p make dist
      - uses: actions/upload-artifact@v4
        with:
          name: LocalAI-MacOS-arm64
--- a/105
+++ b/105
@@ -5,6 +5,12 @@ ARG INTEL_BASE_IMAGE=${BASE_IMAGE}

 # The requirements-core target is common to all images.  It should not be placed in requirements-core unless every single build will use it.
 FROM ${BASE_IMAGE} AS requirements-core
+# TODO(mudler): install all accellerators here
+# and use make dist instead of build.
+# TODO(mudler): modify make dist to build also go-piper and stablediffusion
+# This way the same binary can work for everything(!)
+# TODO(mudler): also make sure that we bundle all the required libs in the backend-assets/lib
+# For the GPU-accell we are going to generate a tar file instead that will be extracted by the bash installer, and the libs will also be installed in the final docker image, so no need to pull ALL the dependencies

 USER root

@@ -49,10 +55,12 @@ ENV PATH /usr/local/cuda/bin:${PATH}
 # HipBLAS requirements
 ENV PATH /opt/rocm/bin:${PATH}

-# OpenBLAS requirements and stable diffusion
+# OpenBLAS requirements and stable diffusion, tts (espeak)
 RUN apt-get update && \
    apt-get install -y --no-install-recommends \
        libopenblas-dev \
+        espeak-ng \
+        espeak \
        libopencv-dev && \
    apt-get clean && \
    rm -rf /var/lib/apt/lists/*
@@ -77,8 +85,6 @@ ENV PATH="/root/.cargo/bin:${PATH}"
 RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
 RUN apt-get update && \
    apt-get install -y --no-install-recommends \
-        espeak-ng \
-        espeak \
        python3-pip \
        python-is-python3 \
        python3-dev \
@@ -93,13 +99,12 @@ RUN pip install --user grpcio-tools
 ###################################
 ###################################

-# The requirements-drivers target is for BUILD_TYPE specific items.  If you need to install something specific to CUDA, or specific to ROCM, it goes here.
-# This target will be built on top of requirements-core or requirements-extras as retermined by the IMAGE_TYPE build-arg
-FROM requirements-${IMAGE_TYPE} AS requirements-drivers
+# Base image for the build-type. 
+FROM requirements-${IMAGE_TYPE} AS run-requirements-drivers

 ARG BUILD_TYPE
 ARG CUDA_MAJOR_VERSION=12
-ARG CUDA_MINOR_VERSION=4
+ARG CUDA_MINOR_VERSION=5

 ENV BUILD_TYPE=${BUILD_TYPE}

@@ -186,6 +191,82 @@ RUN if [ "${BUILD_TYPE}" = "hipblas" ]; then \
        ldconfig \
    ; fi

+# The build-requirements-drivers target is for BUILD_TYPE specific items.  If you need to install something specific to CUDA, or specific to ROCM, it goes here.
+# This target will be built on top of requirements-core or requirements-extras as retermined by the IMAGE_TYPE build-arg
+FROM requirements-${IMAGE_TYPE} AS build-requirements-drivers
+
+ARG BUILD_TYPE
+ARG CUDA_MAJOR_VERSION=12
+ARG CUDA_MINOR_VERSION=5
+
+ENV BUILD_TYPE=${BUILD_TYPE}
+
+# Vulkan requirements
+RUN <<EOT bash
+        apt-get update && \
+        apt-get install -y  --no-install-recommends \
+                        software-properties-common pciutils wget gpg-agent && \
+        wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
+        wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
+        apt-get update && \
+            apt-get install -y \
+            vulkan-sdk && \
+        apt-get clean && \
+        rm -rf /var/lib/apt/lists/*
+EOT
+
+# CuBLAS requirements
+RUN <<EOT bash
+    apt-get update && \
+    apt-get install -y  --no-install-recommends \
+                    software-properties-common pciutils
+    if [ "amd64" = "$TARGETARCH" ]; then
+        curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
+        fi
+    if [ "arm64" = "$TARGETARCH" ]; then
+        curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/arm64/cuda-keyring_1.1-1_all.deb
+    fi
+    dpkg -i cuda-keyring_1.1-1_all.deb && \
+        rm -f cuda-keyring_1.1-1_all.deb && \
+        apt-get update && \
+        apt-get install -y --no-install-recommends \
+            cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
+            libcufft-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
+            libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
+            libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
+            libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
+            libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \
+        apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+EOT
+
+# clblas
+RUN apt-get update && \
+        apt-get install -y --no-install-recommends \
+            libclblast-dev && \
+        apt-get clean && \
+        rm -rf /var/lib/apt/lists/*
+
+# intel
+RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null && echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | tee /etc/apt/sources.list.d/oneAPI.list && apt update && apt install -y intel-basekit && apt-get clean && \
+rm -rf /var/lib/apt/lists/*
+
+# hipblas
+RUN wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | \
+        gpg --dearmor | tee /etc/apt/keyrings/rocm.gpg > /dev/null && apt-get update && \
+        echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/amdgpu/6.1.2/ubuntu jammy main" \
+        | tee /etc/apt/sources.list.d/amdgpu.list && \
+        echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/6.1.2 jammy main" |  tee --append /etc/apt/sources.list.d/rocm.list && printf 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' | tee /etc/apt/preferences.d/rocm-pin-600 && \
+        apt update && \
+        apt-get install -y --no-install-recommends \
+            hipblas-dev rocm-dev \
+            rocblas-dev && \
+        apt-get clean && \
+        rm -rf /var/lib/apt/lists/* && \
+        # I have no idea why, but the ROCM lib packages don't trigger ldconfig after they install, which results in local-ai and others not being able
+        # to locate the libraries. We run ldconfig ourselves to work around this packaging deficiency
+        ldconfig
+
 ###################################
 ###################################

@@ -237,7 +318,7 @@ RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shall

 # The builder target compiles LocalAI. This target is not the target that will be uploaded to the registry.
 # Adjustments to the build process should likely be made here.
-FROM requirements-drivers AS builder
+FROM build-requirements-drivers AS builder

 ARG GO_TAGS="stablediffusion tts p2p"
 ARG GRPC_BACKENDS
@@ -282,9 +363,8 @@ COPY --from=grpc /opt/grpc /usr/local

 # Rebuild with defaults backends
 WORKDIR /build
-
-## Build the binary
-RUN make build
+# Need to build tts and stablediffusion separately first (?)
+RUN make dist && rm release/*.sha256 && mv release/* local-ai

 RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \
        mkdir -p /build/sources/go-piper/piper-phonemize/pi/lib/ \
@@ -296,7 +376,7 @@ RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \

 # This is the final target. The result of this target will be the image uploaded to the registry.
 # If you cannot find a more suitable place for an addition, this layer is a suitable place for it.
-FROM requirements-drivers
+FROM run-requirements-drivers

 ARG FFMPEG
 ARG BUILD_TYPE
@@ -341,6 +421,7 @@ RUN make prepare-sources
 COPY --from=builder /build/local-ai ./

 # Copy shared libraries for piper
+# TODO(mudler): bundle these libs in backend-assets/lib/ (like we do for llama.cpp deps)
 COPY --from=builder /build/sources/go-piper/piper-phonemize/pi/lib/* /usr/lib/

 # do not let stablediffusion rebuild (requires an older version of absl)
--- a/59
+++ b/59
@@ -3,11 +3,9 @@ GOTEST=$(GOCMD) test
 GOVET=$(GOCMD) vet
 BINARY_NAME=local-ai

-DETECT_LIBS?=true
-
 # llama.cpp versions
 GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=c4dd11d1d3903e1922c06242e189f6310fc4d8c3
+CPPLLAMA_VERSION?=9ef07800622e4c371605f9419864d15667c3558f

 # gpt4all version
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
@@ -18,7 +16,7 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
 RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6

 # whisper.cpp version
-WHISPER_CPP_VERSION?=1c31f9d4a8936aec550e6c4dc9ca5cae3b4f304a
+WHISPER_CPP_VERSION?=b29b3b29240aac8b71ce8e5a4360c1f1562ad66f

 # bert.cpp version
 BERT_VERSION?=710044b124545415f555e4260d16b146c725a6e4
@@ -35,11 +33,9 @@ TINYDREAM_VERSION?=c04fa463ace9d9a6464313aa5f9cd0f953b6c057
 export BUILD_TYPE?=
 export STABLE_BUILD_TYPE?=$(BUILD_TYPE)
 export CMAKE_ARGS?=
-export BACKEND_LIBS?=

 CGO_LDFLAGS?=
 CGO_LDFLAGS_WHISPER?=
-CGO_LDFLAGS_WHISPER+=-lggml
 CUDA_LIBPATH?=/usr/local/cuda/lib64/
 GO_TAGS?=
 BUILD_ID?=
@@ -53,8 +49,8 @@ RANDOM := $(shell bash -c 'echo $$RANDOM')
 VERSION?=$(shell git describe --always --tags || echo "dev" )
 # go tool nm ./local-ai | grep Commit
 LD_FLAGS?=
-override LD_FLAGS += -X "github.com/mudler/LocalAI/internal.Version=$(VERSION)"
-override LD_FLAGS += -X "github.com/mudler/LocalAI/internal.Commit=$(shell git rev-parse HEAD)"
+override LD_FLAGS += -X "github.com/go-skynet/LocalAI/internal.Version=$(VERSION)"
+override LD_FLAGS += -X "github.com/go-skynet/LocalAI/internal.Commit=$(shell git rev-parse HEAD)"

 OPTIONAL_TARGETS?=

@@ -86,25 +82,24 @@ ifeq ($(OS),Darwin)
 	else ifneq ($(BUILD_TYPE),metal)
 		CMAKE_ARGS+=-DGGML_METAL=OFF
 		export GGML_NO_ACCELERATE=1
-		export GGML_NO_METAL=1
 	endif

 	ifeq ($(BUILD_TYPE),metal)
 #			-lcblas 	removed: it seems to always be listed as a duplicate flag.
 		CGO_LDFLAGS += -framework Accelerate
 	endif
-else
-CGO_LDFLAGS_WHISPER+=-lgomp
 endif

 ifeq ($(BUILD_TYPE),openblas)
 	CGO_LDFLAGS+=-lopenblas
-	export GGML_OPENBLAS=1
+	export WHISPER_OPENBLAS=1
 endif

+
 ifeq ($(BUILD_TYPE),cublas)
 	CGO_LDFLAGS+=-lcublas -lcudart -L$(CUDA_LIBPATH)
 	export GGML_CUDA=1
+	export WHISPER_CUDA=1
 	CGO_LDFLAGS_WHISPER+=-L$(CUDA_LIBPATH)/stubs/ -lcuda -lcufft
 endif

@@ -112,14 +107,6 @@ ifeq ($(BUILD_TYPE),vulkan)
 	CMAKE_ARGS+=-DGGML_VULKAN=1
 endif

-ifneq (,$(findstring sycl,$(BUILD_TYPE)))
-	export GGML_SYCL=1
-endif
-
-ifeq ($(BUILD_TYPE),sycl_f16)
-	export GGML_SYCL_F16=1
-endif
-
 ifeq ($(BUILD_TYPE),hipblas)
 	ROCM_HOME ?= /opt/rocm
 	ROCM_PATH ?= /opt/rocm
@@ -128,7 +115,7 @@ ifeq ($(BUILD_TYPE),hipblas)
 	export CC=$(ROCM_HOME)/llvm/bin/clang
 	# llama-ggml has no hipblas support, so override it here.
 	export STABLE_BUILD_TYPE=
-	export GGML_HIPBLAS=1
+	export WHISPER_HIPBLAS=1
 	GPU_TARGETS ?= gfx900,gfx906,gfx908,gfx940,gfx941,gfx942,gfx90a,gfx1030,gfx1031,gfx1100,gfx1101
 	AMDGPU_TARGETS ?= "$(GPU_TARGETS)"
 	CMAKE_ARGS+=-DGGML_HIPBLAS=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
@@ -138,16 +125,17 @@ endif
 ifeq ($(BUILD_TYPE),metal)
 	CGO_LDFLAGS+=-framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders
 	export GGML_METAL=1
+	export WHISPER_METAL=1
 endif

 ifeq ($(BUILD_TYPE),clblas)
 	CGO_LDFLAGS+=-lOpenCL -lclblast
-	export GGML_OPENBLAS=1
+	export WHISPER_CLBLAST=1
 endif

 # glibc-static or glibc-devel-static required
 ifeq ($(STATIC),true)
-	LD_FLAGS+=-linkmode external -extldflags -static
+	LD_FLAGS=-linkmode external -extldflags -static
 endif

 ifeq ($(findstring stablediffusion,$(GO_TAGS)),stablediffusion)
@@ -181,8 +169,6 @@ ALL_GRPC_BACKENDS+=backend-assets/grpc/rwkv
 ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper
 ALL_GRPC_BACKENDS+=backend-assets/grpc/local-store
 ALL_GRPC_BACKENDS+=$(OPTIONAL_GRPC)
-# Use filter-out to remove the specified backends
-ALL_GRPC_BACKENDS := $(filter-out $(SKIP_GRPC_BACKEND),$(ALL_GRPC_BACKENDS))

 GRPC_BACKENDS?=$(ALL_GRPC_BACKENDS) $(OPTIONAL_GRPC)
 TEST_PATHS?=./api/... ./pkg/... ./core/...
@@ -262,7 +248,7 @@ sources/whisper.cpp:
 	cd sources/whisper.cpp && git checkout -b build $(WHISPER_CPP_VERSION) && git submodule update --init --recursive --depth 1

 sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
-	cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a
+	cd sources/whisper.cpp && $(MAKE) libwhisper.a

 get-sources: sources/go-llama.cpp sources/gpt4all sources/go-piper sources/go-rwkv.cpp sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream

@@ -333,7 +319,7 @@ build: prepare backend-assets grpcs ## Build the project
 	$(info ${GREEN}I LD_FLAGS: ${YELLOW}$(LD_FLAGS)${RESET})
 ifneq ($(BACKEND_LIBS),)
 	$(MAKE) backend-assets/lib
-	cp -f $(BACKEND_LIBS) backend-assets/lib/
+	cp $(BACKEND_LIBS) backend-assets/lib/
 endif
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(BINARY_NAME) ./

@@ -348,22 +334,17 @@ backend-assets/lib:

 dist:
 	$(MAKE) backend-assets/grpc/llama-cpp-avx2
-ifeq ($(DETECT_LIBS),true)
-	scripts/prepare-libs.sh backend-assets/grpc/llama-cpp-avx2
-endif
 ifeq ($(OS),Darwin)
 	$(info ${GREEN}I Skip CUDA/hipblas build on MacOS${RESET})
 else
+ifneq ($(ARCH),arm64)
 	$(MAKE) backend-assets/grpc/llama-cpp-cuda
 	$(MAKE) backend-assets/grpc/llama-cpp-hipblas
 	$(MAKE) backend-assets/grpc/llama-cpp-sycl_f16
 	$(MAKE) backend-assets/grpc/llama-cpp-sycl_f32
 endif
-	GO_TAGS="tts p2p" $(MAKE) build
-ifeq ($(DETECT_LIBS),true)
-	scripts/prepare-libs.sh backend-assets/grpc/piper
 endif
-	GO_TAGS="tts p2p" STATIC=true $(MAKE) build
+	STATIC=true $(MAKE) build
 	mkdir -p release
 # if BUILD_ID is empty, then we don't append it to the binary name
 ifeq ($(BUILD_ID),)
@@ -374,7 +355,7 @@ else
 	shasum -a 256 release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-$(ARCH) > release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-$(ARCH).sha256
 endif

-dist-cross-linux-arm64:
+dist-cross-linux-arm64: 
 	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_NATIVE=off" GRPC_BACKENDS="backend-assets/grpc/llama-cpp-fallback backend-assets/grpc/llama-cpp-grpc backend-assets/util/llama-cpp-rpc-server" \
 	STATIC=true $(MAKE) build
 	mkdir -p release
@@ -425,7 +406,7 @@ prepare-e2e:
 	mkdir -p $(TEST_DIR)
 	cp -rfv $(abspath ./tests/e2e-fixtures)/gpu.yaml $(TEST_DIR)/gpu.yaml
 	test -e $(TEST_DIR)/ggllm-test-model.bin || wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(TEST_DIR)/ggllm-test-model.bin
-	docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=4 --build-arg FFMPEG=true -t localai-tests .
+	docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=5 --build-arg FFMPEG=true -t localai-tests .

 run-e2e-image:
 	ls -liah $(abspath ./tests/e2e-fixtures)
@@ -813,7 +794,7 @@ backend-assets/grpc/tinydream: sources/go-tiny-dream sources/go-tiny-dream/libti
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/tinydream ./backend/go/image/tinydream

 backend-assets/grpc/whisper: sources/whisper.cpp sources/whisper.cpp/libwhisper.a backend-assets/grpc
-	CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_WHISPER)" C_INCLUDE_PATH="$(CURDIR)/sources/whisper.cpp/include:$(CURDIR)/sources/whisper.cpp/ggml/include" LIBRARY_PATH=$(CURDIR)/sources/whisper.cpp \
+	CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_WHISPER)" C_INCLUDE_PATH=$(CURDIR)/sources/whisper.cpp LIBRARY_PATH=$(CURDIR)/sources/whisper.cpp \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/

 backend-assets/grpc/local-store: backend-assets/grpc
@@ -882,7 +863,7 @@ gen-assets:
 	$(GOCMD) run core/dependencies_manager/manager.go embedded/webui_static.yaml core/http/static/assets

 ## Documentation
-docs/layouts/_default:
+docs/layouts/_default: 
 	mkdir -p docs/layouts/_default

 docs/static/gallery.html: docs/layouts/_default
@@ -897,4 +878,4 @@ docs-clean:

 .PHONY: docs
 docs: docs/static/gallery.html
-	cd docs && hugo serve
+	cd docs && hugo serve
--- a/README.md
+++ b/README.md
@@ -72,7 +72,6 @@ docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu

 [Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)

- 🔥🔥 🆕 P2P Dashboard, LocalAI Federated mode and AI Swarms: https://github.com/mudler/LocalAI/pull/2723
 - 🆕 You can browse now the model gallery without LocalAI! Check out https://models.localai.io
 - 🔥🔥 Decentralized llama.cpp:  https://github.com/mudler/LocalAI/pull/2343 (peer2peer llama.cpp!) 👉 Docs  https://localai.io/features/distribute/
 - 🔥🔥 Openvoice: https://github.com/mudler/LocalAI/pull/2334
@@ -105,7 +104,6 @@ If you want to help and contribute, issues up for grabs: https://github.com/mudl
 - 🥽 [Vision API](https://localai.io/features/gpt-vision/)
 - 📈 [Reranker API](https://localai.io/features/reranker/)
 - 🆕🖧 [P2P Inferencing](https://localai.io/features/distribute/)
- 🌍 Integrated WebUI!

 ## 💻 Usage

--- a/backend/cpp/llama/CMakeLists.txt
+++ b/backend/cpp/llama/CMakeLists.txt
@@ -83,11 +83,4 @@ target_link_libraries(${TARGET} PRIVATE common llama myclip ${CMAKE_THREAD_LIBS_
 target_compile_features(${TARGET} PRIVATE cxx_std_11)
 if(TARGET BUILD_INFO)
  add_dependencies(${TARGET} BUILD_INFO)
-endif()
-
-include(FetchContent)
-FetchContent_Declare(
-  googletest
-  URL https://github.com/google/googletest/archive/03597a01ee50ed33e9dfd640b249b4be3799d395.zip
-)
-FetchContent_MakeAvailable(googletest)
+endif()
--- a/backend/cpp/llama/Makefile
+++ b/backend/cpp/llama/Makefile
@@ -71,9 +71,9 @@ clean: purge
 grpc-server: llama.cpp llama.cpp/examples/grpc-server
 	@echo "Building grpc-server with $(BUILD_TYPE) build type and $(CMAKE_ARGS)"
 ifneq (,$(findstring sycl,$(BUILD_TYPE)))
-	+bash -c "source $(ONEAPI_VARS); \
+	bash -c "source $(ONEAPI_VARS); \
 	cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release $(TARGET)"
 else
-	+cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release $(TARGET)
+	cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release $(TARGET)
 endif
 	cp llama.cpp/build/bin/grpc-server .
--- a/backend/python/common/libbackend.sh
+++ b/backend/python/common/libbackend.sh
@@ -148,13 +148,13 @@ function startBackend() {
    ensureVenv

    if [ ! -z ${BACKEND_FILE} ]; then
-        exec python ${BACKEND_FILE} $@
+        python ${BACKEND_FILE} $@
    elif [ -e "${MY_DIR}/server.py" ]; then
-        exec python ${MY_DIR}/server.py $@
+        python ${MY_DIR}/server.py $@
    elif [ -e "${MY_DIR}/backend.py" ]; then
-        exec python ${MY_DIR}/backend.py $@
+        python ${MY_DIR}/backend.py $@
    elif [ -e "${MY_DIR}/${BACKEND_NAME}.py" ]; then
-        exec python ${MY_DIR}/${BACKEND_NAME}.py $@
+        python ${MY_DIR}/${BACKEND_NAME}.py $@
    fi
 }

@@ -210,4 +210,4 @@ function checkTargets() {
    echo false
 }

-init
+init
--- a/core/cli/cli.go
+++ b/core/cli/cli.go
@@ -9,7 +9,6 @@ var CLI struct {
 	cliContext.Context `embed:""`

 	Run        RunCMD        `cmd:"" help:"Run LocalAI, this the default command if no other command is specified. Run 'local-ai run --help' for more information" default:"withargs"`
-	Federated  FederatedCLI  `cmd:"" help:"Run LocalAI in federated mode"`
 	Models     ModelsCMD     `cmd:"" help:"Manage LocalAI models and definitions"`
 	TTS        TTSCMD        `cmd:"" help:"Convert text to speech"`
 	Transcript TranscriptCMD `cmd:"" help:"Convert audio to text"`
--- a/core/cli/federated.go
+++ b/core/cli/federated.go
@@ -1,130 +0,0 @@
-package cli
-
-import (
-	"context"
-	"errors"
-	"fmt"
-	"io"
-	"net"
-	"time"
-
-	"math/rand/v2"
-
-	cliContext "github.com/mudler/LocalAI/core/cli/context"
-	"github.com/mudler/LocalAI/core/p2p"
-	"github.com/mudler/edgevpn/pkg/node"
-	"github.com/mudler/edgevpn/pkg/protocol"
-	"github.com/mudler/edgevpn/pkg/types"
-	"github.com/rs/zerolog/log"
-)
-
-type FederatedCLI struct {
-	Address        string `env:"LOCALAI_ADDRESS,ADDRESS" default:":8080" help:"Bind address for the API server" group:"api"`
-	Peer2PeerToken string `env:"LOCALAI_P2P_TOKEN,P2P_TOKEN,TOKEN" name:"p2ptoken" help:"Token for P2P mode (optional)" group:"p2p"`
-}
-
-func (f *FederatedCLI) Run(ctx *cliContext.Context) error {
-
-	n, err := p2p.NewNode(f.Peer2PeerToken)
-	if err != nil {
-		return fmt.Errorf("creating a new node: %w", err)
-	}
-	err = n.Start(context.Background())
-	if err != nil {
-		return fmt.Errorf("creating a new node: %w", err)
-	}
-
-	if err := p2p.ServiceDiscoverer(context.Background(), n, f.Peer2PeerToken, p2p.FederatedID, nil); err != nil {
-		return err
-	}
-
-	return Proxy(context.Background(), n, f.Address, p2p.FederatedID)
-}
-
-func Proxy(ctx context.Context, node *node.Node, listenAddr, service string) error {
-
-	log.Info().Msgf("Allocating service '%s' on: %s", service, listenAddr)
-	// Open local port for listening
-	l, err := net.Listen("tcp", listenAddr)
-	if err != nil {
-		log.Error().Err(err).Msg("Error listening")
-		return err
-	}
-	//	ll.Info("Binding local port on", srcaddr)
-
-	ledger, _ := node.Ledger()
-
-	// Announce ourselves so nodes accepts our connection
-	ledger.Announce(
-		ctx,
-		10*time.Second,
-		func() {
-			// Retrieve current ID for ip in the blockchain
-			//_, found := ledger.GetKey(protocol.UsersLedgerKey, node.Host().ID().String())
-			// If mismatch, update the blockchain
-			//if !found {
-			updatedMap := map[string]interface{}{}
-			updatedMap[node.Host().ID().String()] = &types.User{
-				PeerID:    node.Host().ID().String(),
-				Timestamp: time.Now().String(),
-			}
-			ledger.Add(protocol.UsersLedgerKey, updatedMap)
-			//	}
-		},
-	)
-
-	defer l.Close()
-	for {
-		select {
-		case <-ctx.Done():
-			return errors.New("context canceled")
-		default:
-			log.Debug().Msg("New for connection")
-			// Listen for an incoming connection.
-			conn, err := l.Accept()
-			if err != nil {
-				fmt.Println("Error accepting: ", err.Error())
-				continue
-			}
-
-			// Handle connections in a new goroutine, forwarding to the p2p service
-			go func() {
-				var tunnelAddresses []string
-				for _, v := range p2p.GetAvailableNodes(p2p.FederatedID) {
-					if v.IsOnline() {
-						tunnelAddresses = append(tunnelAddresses, v.TunnelAddress)
-					} else {
-						log.Info().Msgf("Node %s is offline", v.ID)
-					}
-				}
-
-				// open a TCP stream to one of the tunnels
-				// chosen randomly
-				// TODO: optimize this and track usage
-				tunnelAddr := tunnelAddresses[rand.IntN(len(tunnelAddresses))]
-
-				tunnelConn, err := net.Dial("tcp", tunnelAddr)
-				if err != nil {
-					log.Error().Err(err).Msg("Error connecting to tunnel")
-					return
-				}
-
-				log.Info().Msgf("Redirecting %s to %s", conn.LocalAddr().String(), tunnelConn.RemoteAddr().String())
-				closer := make(chan struct{}, 2)
-				go copyStream(closer, tunnelConn, conn)
-				go copyStream(closer, conn, tunnelConn)
-				<-closer
-
-				tunnelConn.Close()
-				conn.Close()
-				//	ll.Infof("(service %s) Done handling %s", serviceID, l.Addr().String())
-			}()
-		}
-	}
-
-}
-
-func copyStream(closer chan struct{}, dst io.Writer, src io.Reader) {
-	defer func() { closer <- struct{}{} }() // connection is closed, send signal to stop proxy
-	io.Copy(dst, src)
-}
--- a/core/cli/run.go
+++ b/core/cli/run.go
@@ -3,8 +3,6 @@ package cli
 import (
 	"context"
 	"fmt"
-	"net"
-	"os"
 	"strings"
 	"time"

@@ -52,7 +50,7 @@ type RunCMD struct {
 	DisableWebUI         bool     `env:"LOCALAI_DISABLE_WEBUI,DISABLE_WEBUI" default:"false" help:"Disable webui" group:"api"`
 	OpaqueErrors         bool     `env:"LOCALAI_OPAQUE_ERRORS" default:"false" help:"If true, all error responses are replaced with blank 500 errors. This is intended only for hardening against information leaks and is normally not recommended." group:"api"`
 	Peer2Peer            bool     `env:"LOCALAI_P2P,P2P" name:"p2p" default:"false" help:"Enable P2P mode" group:"p2p"`
-	Peer2PeerToken       string   `env:"LOCALAI_P2P_TOKEN,P2P_TOKEN,TOKEN" name:"p2ptoken" help:"Token for P2P mode (optional)" group:"p2p"`
+	Peer2PeerToken       string   `env:"LOCALAI_P2P_TOKEN,P2P_TOKEN" name:"p2ptoken" help:"Token for P2P mode (optional)" group:"p2p"`
 	ParallelRequests     bool     `env:"LOCALAI_PARALLEL_REQUESTS,PARALLEL_REQUESTS" help:"Enable backends to handle multiple requests in parallel if they support it (e.g.: llama.cpp or vllm)" group:"backends"`
 	SingleActiveBackend  bool     `env:"LOCALAI_SINGLE_ACTIVE_BACKEND,SINGLE_ACTIVE_BACKEND" help:"Allow only one backend to be run at a time" group:"backends"`
 	PreloadBackendOnly   bool     `env:"LOCALAI_PRELOAD_BACKEND_ONLY,PRELOAD_BACKEND_ONLY" default:"false" help:"Do not launch the API services, only the preloaded models / backends are started (useful for multi-node setups)" group:"backends"`
@@ -61,7 +59,6 @@ type RunCMD struct {
 	WatchdogIdleTimeout  string   `env:"LOCALAI_WATCHDOG_IDLE_TIMEOUT,WATCHDOG_IDLE_TIMEOUT" default:"15m" help:"Threshold beyond which an idle backend should be stopped" group:"backends"`
 	EnableWatchdogBusy   bool     `env:"LOCALAI_WATCHDOG_BUSY,WATCHDOG_BUSY" default:"false" help:"Enable watchdog for stopping backends that are busy longer than the watchdog-busy-timeout" group:"backends"`
 	WatchdogBusyTimeout  string   `env:"LOCALAI_WATCHDOG_BUSY_TIMEOUT,WATCHDOG_BUSY_TIMEOUT" default:"5m" help:"Threshold beyond which a busy backend should be stopped" group:"backends"`
-	Federated            bool     `env:"LOCALAI_FEDERATED,FEDERATED" help:"Enable federated instance" group:"federated"`
 }

 func (r *RunCMD) Run(ctx *cliContext.Context) error {
@@ -94,10 +91,9 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
 		config.WithOpaqueErrors(r.OpaqueErrors),
 	}

-	token := ""
 	if r.Peer2Peer || r.Peer2PeerToken != "" {
 		log.Info().Msg("P2P mode enabled")
-		token = r.Peer2PeerToken
+		token := r.Peer2PeerToken
 		if token == "" {
 			// IF no token is provided, and p2p is enabled,
 			// we generate one and wait for the user to pick up the token (this is for interactive)
@@ -108,46 +104,14 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {

 			log.Info().Msg("To use the token, you can run the following command in another node or terminal:")
 			fmt.Printf("export TOKEN=\"%s\"\nlocal-ai worker p2p-llama-cpp-rpc\n", token)
-		}
-		opts = append(opts, config.WithP2PToken(token))

-		node, err := p2p.NewNode(token)
-		if err != nil {
-			return err
+			// Ask for user confirmation
+			log.Info().Msg("Press a button to proceed")
+			var input string
+			fmt.Scanln(&input)
 		}
-
 		log.Info().Msg("Starting P2P server discovery...")
-		if err := p2p.ServiceDiscoverer(context.Background(), node, token, "", func() {
-			var tunnelAddresses []string
-			for _, v := range p2p.GetAvailableNodes("") {
-				if v.IsOnline() {
-					tunnelAddresses = append(tunnelAddresses, v.TunnelAddress)
-				} else {
-					log.Info().Msgf("Node %s is offline", v.ID)
-				}
-			}
-			tunnelEnvVar := strings.Join(tunnelAddresses, ",")
-
-			os.Setenv("LLAMACPP_GRPC_SERVERS", tunnelEnvVar)
-			log.Debug().Msgf("setting LLAMACPP_GRPC_SERVERS to %s", tunnelEnvVar)
-		}); err != nil {
-			return err
-		}
-	}
-
-	if r.Federated {
-		_, port, err := net.SplitHostPort(r.Address)
-		if err != nil {
-			return err
-		}
-		if err := p2p.ExposeService(context.Background(), "localhost", port, token, p2p.FederatedID); err != nil {
-			return err
-		}
-		node, err := p2p.NewNode(token)
-		if err != nil {
-			return err
-		}
-		if err := p2p.ServiceDiscoverer(context.Background(), node, token, p2p.FederatedID, nil); err != nil {
+		if err := p2p.LLamaCPPRPCServerDiscoverer(context.Background(), token); err != nil {
 			return err
 		}
 	}
--- a/core/cli/worker/worker_p2p.go
+++ b/core/cli/worker/worker_p2p.go
@@ -20,7 +20,7 @@ import (

 type P2P struct {
 	WorkerFlags       `embed:""`
-	Token             string   `env:"LOCALAI_TOKEN,LOCALAI_P2P_TOKEN,TOKEN" help:"P2P token to use"`
+	Token             string   `env:"LOCALAI_TOKEN,TOKEN" help:"JSON list of galleries"`
 	NoRunner          bool     `env:"LOCALAI_NO_RUNNER,NO_RUNNER" help:"Do not start the llama-cpp-rpc-server"`
 	RunnerAddress     string   `env:"LOCALAI_RUNNER_ADDRESS,RUNNER_ADDRESS" help:"Address of the llama-cpp-rpc-server"`
 	RunnerPort        string   `env:"LOCALAI_RUNNER_PORT,RUNNER_PORT" help:"Port of the llama-cpp-rpc-server"`
@@ -59,7 +59,7 @@ func (r *P2P) Run(ctx *cliContext.Context) error {
 			p = r.RunnerPort
 		}

-		err = p2p.ExposeService(context.Background(), address, p, r.Token, "")
+		err = p2p.BindLLamaCPPWorker(context.Background(), address, p, r.Token)
 		if err != nil {
 			return err
 		}
@@ -99,7 +99,7 @@ func (r *P2P) Run(ctx *cliContext.Context) error {
 		}
 	}()

-	err = p2p.ExposeService(context.Background(), address, fmt.Sprint(port), r.Token, "")
+	err = p2p.BindLLamaCPPWorker(context.Background(), address, fmt.Sprint(port), r.Token)
 	if err != nil {
 		return err
 	}
--- a/core/config/application_config.go
+++ b/core/config/application_config.go
@@ -32,7 +32,6 @@ type ApplicationConfig struct {
 	CORSAllowOrigins                    string
 	ApiKeys                             []string
 	OpaqueErrors                        bool
-	P2PToken                            string

 	ModelLibraryURL string

@@ -96,12 +95,6 @@ func WithCsrf(b bool) AppOption {
 	}
 }

-func WithP2PToken(s string) AppOption {
-	return func(o *ApplicationConfig) {
-		o.P2PToken = s
-	}
-}
-
 func WithModelLibraryURL(url string) AppOption {
 	return func(o *ApplicationConfig) {
 		o.ModelLibraryURL = url
--- a/core/http/elements/gallery.go
+++ b/core/http/elements/gallery.go
@@ -7,7 +7,6 @@ import (
 	"github.com/chasefleming/elem-go"
 	"github.com/chasefleming/elem-go/attrs"
 	"github.com/mudler/LocalAI/core/gallery"
-	"github.com/mudler/LocalAI/core/p2p"
 	"github.com/mudler/LocalAI/core/services"
 	"github.com/mudler/LocalAI/pkg/xsync"
 )
@@ -16,14 +15,6 @@ const (
 	noImage = "https://upload.wikimedia.org/wikipedia/commons/6/65/No-Image-Placeholder.svg"
 )

-func renderElements(n []elem.Node) string {
-	render := ""
-	for _, r := range n {
-		render += r.Render()
-	}
-	return render
-}
-
 func DoneProgress(galleryID, text string, showDelete bool) string {
 	var modelName = galleryID
 	// Split by @ and grab the name
@@ -81,135 +72,6 @@ func ProgressBar(progress string) string {
 	).Render()
 }

-func P2PNodeStats(nodes []p2p.NodeData) string {
-	/*
-	   <div class="bg-gray-800 p-6 rounded-lg shadow-lg text-left">
-	                       <p class="text-xl font-semibold text-gray-200">Total Workers Detected: {{ len .Nodes }}</p>
-	                       {{ $online := 0 }}
-	                       {{ range .Nodes }}
-	                           {{ if .IsOnline }}
-	                               {{ $online = add $online 1 }}
-	                           {{ end }}
-	                       {{ end }}
-	                       <p class="text-xl font-semibold text-gray-200">Total Online Workers: {{$online}}</p>
-	                   </div>
-	*/
-
-	online := 0
-	for _, n := range nodes {
-		if n.IsOnline() {
-			online++
-		}
-	}
-
-	class := "text-green-500"
-	if online == 0 {
-		class = "text-red-500"
-	}
-	/*
-	   <i class="fas fa-circle animate-pulse text-green-500 ml-2 mr-1"></i>
-	*/
-	circle := elem.I(attrs.Props{
-		"class": "fas fa-circle animate-pulse " + class + " ml-2 mr-1",
-	})
-	nodesElements := []elem.Node{
-		elem.Span(
-			attrs.Props{
-				"class": class,
-			},
-			circle,
-			elem.Text(fmt.Sprintf("%d", online)),
-		),
-		elem.Span(
-			attrs.Props{
-				"class": "text-gray-200",
-			},
-			elem.Text(fmt.Sprintf("/%d", len(nodes))),
-		),
-	}
-
-	return renderElements(nodesElements)
-}
-
-func P2PNodeBoxes(nodes []p2p.NodeData) string {
-	/*
-			<div class="bg-gray-800 p-4 rounded-lg shadow-lg text-left">
-			<div class="flex items-center mb-2">
-				<i class="fas fa-desktop text-gray-400 mr-2"></i>
-				<span class="text-gray-200 font-semibold">{{.ID}}</span>
-			</div>
-			<p class="text-sm text-gray-400 mt-2 flex items-center">
-				Status:
-				<i class="fas fa-circle {{ if .IsOnline }}text-green-500{{ else }}text-red-500{{ end }} ml-2 mr-1"></i>
-				<span class="{{ if .IsOnline }}text-green-400{{ else }}text-red-400{{ end }}">
-					{{ if .IsOnline }}Online{{ else }}Offline{{ end }}
-				</span>
-			</p>
-		</div>
-	*/
-
-	nodesElements := []elem.Node{}
-
-	for _, n := range nodes {
-
-		nodesElements = append(nodesElements,
-			elem.Div(
-				attrs.Props{
-					"class": "bg-gray-700 p-6 rounded-lg shadow-lg text-left",
-				},
-				elem.P(
-					attrs.Props{
-						"class": "text-sm text-gray-400 mt-2 flex",
-					},
-					elem.I(
-						attrs.Props{
-							"class": "fas fa-desktop text-gray-400 mr-2",
-						},
-					),
-					elem.Text("Name: "),
-					elem.Span(
-						attrs.Props{
-							"class": "text-gray-200 font-semibold ml-2 mr-1",
-						},
-						elem.Text(n.ID),
-					),
-					elem.Text("Status: "),
-					elem.If(
-						n.IsOnline(),
-						elem.I(
-							attrs.Props{
-								"class": "fas fa-circle animate-pulse text-green-500 ml-2 mr-1",
-							},
-						),
-						elem.I(
-							attrs.Props{
-								"class": "fas fa-circle animate-pulse text-red-500 ml-2 mr-1",
-							},
-						),
-					),
-					elem.If(
-						n.IsOnline(),
-						elem.Span(
-							attrs.Props{
-								"class": "text-green-400",
-							},
-
-							elem.Text("Online"),
-						),
-						elem.Span(
-							attrs.Props{
-								"class": "text-red-400",
-							},
-							elem.Text("Offline"),
-						),
-					),
-				),
-			))
-	}
-
-	return renderElements(nodesElements)
-}
-
 func StartProgressBar(uid, progress, text string) string {
 	if progress == "" {
 		progress = "0"
--- a/core/http/endpoints/localai/welcome.go
+++ b/core/http/endpoints/localai/welcome.go
@@ -4,7 +4,6 @@ import (
 	"github.com/gofiber/fiber/v2"
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/gallery"
-	"github.com/mudler/LocalAI/core/p2p"
 	"github.com/mudler/LocalAI/internal"
 	"github.com/mudler/LocalAI/pkg/model"
 )
@@ -34,7 +33,6 @@ func WelcomeEndpoint(appConfig *config.ApplicationConfig,
 			"Models":            models,
 			"ModelsConfig":      backendConfigs,
 			"GalleryConfig":     galleryConfigs,
-			"IsP2PEnabled":      p2p.IsP2PEnabled(),
 			"ApplicationConfig": appConfig,
 			"ProcessingModels":  processingModels,
 			"TaskTypes":         taskTypes,
--- a/core/http/endpoints/openai/files.go
+++ b/core/http/endpoints/openai/files.go
@@ -123,10 +123,7 @@ func getFileFromRequest(c *fiber.Ctx) (*File, error) {
 	return nil, fmt.Errorf("unable to find file id %s", id)
 }

-// GetFilesEndpoint is the OpenAI API endpoint to get files https://platform.openai.com/docs/api-reference/files/retrieve
-// @Summary Returns information about a specific file.
-// @Success 200 {object} File "Response"
-// @Router /v1/files/{file_id} [get]
+// GetFilesEndpoint https://platform.openai.com/docs/api-reference/files/retrieve
 func GetFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		file, err := getFileFromRequest(c)
@@ -138,17 +135,13 @@ func GetFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.Applicat
 	}
 }

-type DeleteStatus struct {
-	Id      string
-	Object  string
-	Deleted bool
-}
-
-// DeleteFilesEndpoint is the OpenAI API endpoint to delete files https://platform.openai.com/docs/api-reference/files/delete
-// @Summary Delete a file.
-// @Success 200 {object} DeleteStatus "Response"
-// @Router /v1/files/{file_id} [delete]
+// DeleteFilesEndpoint https://platform.openai.com/docs/api-reference/files/delete
 func DeleteFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
+	type DeleteStatus struct {
+		Id      string
+		Object  string
+		Deleted bool
+	}

 	return func(c *fiber.Ctx) error {
 		file, err := getFileFromRequest(c)
@@ -181,11 +174,7 @@ func DeleteFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.Appli
 	}
 }

-// GetFilesContentsEndpoint is the OpenAI API endpoint to get files content https://platform.openai.com/docs/api-reference/files/retrieve-contents
-// @Summary Returns information about a specific file.
-// @Success	200		{string}	binary				"file"
-// @Router /v1/files/{file_id}/content [get]
-// GetFilesContentsEndpoint
+// GetFilesContentsEndpoint https://platform.openai.com/docs/api-reference/files/retrieve-contents
 func GetFilesContentsEndpoint(cm *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		file, err := getFileFromRequest(c)
--- a/core/http/endpoints/openai/list.go
+++ b/core/http/endpoints/openai/list.go
@@ -6,10 +6,6 @@ import (
 	"github.com/mudler/LocalAI/core/services"
 )

-// ListModelsEndpoint is the OpenAI Models API endpoint https://platform.openai.com/docs/api-reference/models
-// @Summary List and describe the various models available in the API.
-// @Success 200 {object} schema.ModelsDataResponse "Response"
-// @Router /v1/models [get]
 func ListModelsEndpoint(lms *services.ListModelsService) func(ctx *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		// If blank, no filter is applied.
@@ -22,7 +18,10 @@ func ListModelsEndpoint(lms *services.ListModelsService) func(ctx *fiber.Ctx) er
 		if err != nil {
 			return err
 		}
-		return c.JSON(schema.ModelsDataResponse{
+		return c.JSON(struct {
+			Object string               `json:"object"`
+			Data   []schema.OpenAIModel `json:"data"`
+		}{
 			Object: "list",
 			Data:   dataModels,
 		})
--- a/core/http/routes/localai.go
+++ b/core/http/routes/localai.go
@@ -5,7 +5,6 @@ import (
 	"github.com/gofiber/swagger"
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/http/endpoints/localai"
-	"github.com/mudler/LocalAI/core/p2p"
 	"github.com/mudler/LocalAI/core/services"
 	"github.com/mudler/LocalAI/internal"
 	"github.com/mudler/LocalAI/pkg/model"
@@ -57,20 +56,6 @@ func RegisterLocalAIRoutes(app *fiber.App,
 	app.Get("/backend/monitor", auth, localai.BackendMonitorEndpoint(backendMonitorService))
 	app.Post("/backend/shutdown", auth, localai.BackendShutdownEndpoint(backendMonitorService))

-	// p2p
-	if p2p.IsP2PEnabled() {
-		app.Get("/api/p2p", auth, func(c *fiber.Ctx) error {
-			// Render index
-			return c.JSON(map[string]interface{}{
-				"Nodes":          p2p.GetAvailableNodes(""),
-				"FederatedNodes": p2p.GetAvailableNodes(p2p.FederatedID),
-			})
-		})
-		app.Get("/api/p2p/token", auth, func(c *fiber.Ctx) error {
-			return c.Send([]byte(appConfig.P2PToken))
-		})
-	}
-
 	app.Get("/version", auth, func(c *fiber.Ctx) error {
 		return c.JSON(struct {
 			Version string `json:"version"`
--- a/core/http/routes/ui.go
+++ b/core/http/routes/ui.go
@@ -10,7 +10,6 @@ import (
 	"github.com/mudler/LocalAI/core/gallery"
 	"github.com/mudler/LocalAI/core/http/elements"
 	"github.com/mudler/LocalAI/core/http/endpoints/localai"
-	"github.com/mudler/LocalAI/core/p2p"
 	"github.com/mudler/LocalAI/core/services"
 	"github.com/mudler/LocalAI/internal"
 	"github.com/mudler/LocalAI/pkg/model"
@@ -54,37 +53,6 @@ func RegisterUIRoutes(app *fiber.App,

 	app.Get("/", auth, localai.WelcomeEndpoint(appConfig, cl, ml, modelStatus))

-	if p2p.IsP2PEnabled() {
-		app.Get("/p2p", auth, func(c *fiber.Ctx) error {
-			summary := fiber.Map{
-				"Title":   "LocalAI - P2P dashboard",
-				"Version": internal.PrintableVersion(),
-				//"Nodes":          p2p.GetAvailableNodes(""),
-				//"FederatedNodes": p2p.GetAvailableNodes(p2p.FederatedID),
-				"IsP2PEnabled": p2p.IsP2PEnabled(),
-				"P2PToken":     appConfig.P2PToken,
-			}
-
-			// Render index
-			return c.Render("views/p2p", summary)
-		})
-
-		/* show nodes live! */
-		app.Get("/p2p/ui/workers", auth, func(c *fiber.Ctx) error {
-			return c.SendString(elements.P2PNodeBoxes(p2p.GetAvailableNodes("")))
-		})
-		app.Get("/p2p/ui/workers-federation", auth, func(c *fiber.Ctx) error {
-			return c.SendString(elements.P2PNodeBoxes(p2p.GetAvailableNodes(p2p.FederatedID)))
-		})
-
-		app.Get("/p2p/ui/workers-stats", auth, func(c *fiber.Ctx) error {
-			return c.SendString(elements.P2PNodeStats(p2p.GetAvailableNodes("")))
-		})
-		app.Get("/p2p/ui/workers-federation-stats", auth, func(c *fiber.Ctx) error {
-			return c.SendString(elements.P2PNodeStats(p2p.GetAvailableNodes(p2p.FederatedID)))
-		})
-	}
-
 	// Show the Models page (all models)
 	app.Get("/browse", auth, func(c *fiber.Ctx) error {
 		term := c.Query("term")
@@ -119,9 +87,7 @@ func RegisterUIRoutes(app *fiber.App,
 			"AllTags":          tags,
 			"ProcessingModels": processingModelsData,
 			"AvailableModels":  len(models),
-			"IsP2PEnabled":     p2p.IsP2PEnabled(),
-
-			"TaskTypes": taskTypes,
+			"TaskTypes":        taskTypes,
 			//	"ApplicationConfig": appConfig,
 		}

@@ -277,7 +243,6 @@ func RegisterUIRoutes(app *fiber.App,
 			"ModelsConfig": backendConfigs,
 			"Model":        c.Params("model"),
 			"Version":      internal.PrintableVersion(),
-			"IsP2PEnabled": p2p.IsP2PEnabled(),
 		}

 		// Render index
@@ -296,7 +261,6 @@ func RegisterUIRoutes(app *fiber.App,
 			"Title":        "LocalAI - Talk",
 			"ModelsConfig": backendConfigs,
 			"Model":        backendConfigs[0].ID,
-			"IsP2PEnabled": p2p.IsP2PEnabled(),
 			"Version":      internal.PrintableVersion(),
 		}

@@ -318,7 +282,6 @@ func RegisterUIRoutes(app *fiber.App,
 			"ModelsConfig": backendConfigs,
 			"Model":        backendConfigs[0].ID,
 			"Version":      internal.PrintableVersion(),
-			"IsP2PEnabled": p2p.IsP2PEnabled(),
 		}

 		// Render index
@@ -333,7 +296,6 @@ func RegisterUIRoutes(app *fiber.App,
 			"ModelsConfig": backendConfigs,
 			"Model":        c.Params("model"),
 			"Version":      internal.PrintableVersion(),
-			"IsP2PEnabled": p2p.IsP2PEnabled(),
 		}

 		// Render index
@@ -354,7 +316,6 @@ func RegisterUIRoutes(app *fiber.App,
 			"ModelsConfig": backendConfigs,
 			"Model":        backendConfigs[0].Name,
 			"Version":      internal.PrintableVersion(),
-			"IsP2PEnabled": p2p.IsP2PEnabled(),
 		}

 		// Render index
@@ -369,7 +330,6 @@ func RegisterUIRoutes(app *fiber.App,
 			"ModelsConfig": backendConfigs,
 			"Model":        c.Params("model"),
 			"Version":      internal.PrintableVersion(),
-			"IsP2PEnabled": p2p.IsP2PEnabled(),
 		}

 		// Render index
@@ -389,7 +349,6 @@ func RegisterUIRoutes(app *fiber.App,
 			"Title":        "LocalAI - Generate audio with " + backendConfigs[0].Name,
 			"ModelsConfig": backendConfigs,
 			"Model":        backendConfigs[0].Name,
-			"IsP2PEnabled": p2p.IsP2PEnabled(),
 			"Version":      internal.PrintableVersion(),
 		}

--- a/core/http/static/assets/tw-elements.js
+++ b/core/http/static/assets/tw-elements.js
--- a/core/http/static/general.css
+++ b/core/http/static/general.css
@@ -81,10 +81,10 @@ ul {
 li {
    font-size: 0.875rem; /* Small text size */
    color: #4a5568; /* Dark gray text */
-   /*  background-color: #f7fafc; Very light gray background */
+    background-color: #f7fafc; /* Very light gray background */
    border-radius: 0.375rem; /* Rounded corners */
    padding: 0.5rem; /* Padding inside each list item */
-    /*box-shadow: 0 1px 3px 0 rgba(0, 0, 0, 0.1), 0 1px 2px 0 rgba(0, 0, 0, 0.06);  Subtle shadow */
+    box-shadow: 0 1px 3px 0 rgba(0, 0, 0, 0.1), 0 1px 2px 0 rgba(0, 0, 0, 0.06); /* Subtle shadow */
    margin-bottom: 0.5rem; /* Vertical space between list items */
 }

--- a/core/http/views/chat.html
+++ b/core/http/views/chat.html
@@ -37,7 +37,7 @@ SOFTWARE.
  <body class="bg-gray-900 text-gray-200" x-data="{ key: $store.chat.key }">
    <div class="flex flex-col min-h-screen">

-    {{template "views/partials/navbar" .}}
+    {{template "views/partials/navbar"}}
    <div class="chat-container mt-2 mr-2 ml-2 mb-2 bg-gray-800 shadow-lg rounded-lg" >
     <!-- Chat Header -->
    <div class="border-b border-gray-700 p-4"  x-data="{ component: 'menu' }">
--- a/core/http/views/p2p.html
+++ b/core/http/views/p2p.html
@@ -1,150 +0,0 @@
-<!DOCTYPE html>
-<html lang="en">
-{{template "views/partials/head" .}}
-
-<body class="bg-gray-900 text-gray-200">
-<div class="flex flex-col min-h-screen">
-   
-    {{template "views/partials/navbar" .}}
-    <div class="container mx-auto px-4 flex-grow">
-        <div class="workers mt-12 text-center">
-      
-            <h2 class="text-3xl font-semibold text-gray-100 mb-8">
-                <i class="fa-solid fa-circle-nodes"></i> Distributed inference with P2P   
-                <a href="https://localai.io/features/distribute/" target="_blank">
-                    <i class="fas fa-circle-info pr-2"></i>
-                </a> 
-            </h2> 
-            <h5 class="mb-4 text-justify">LocalAI uses P2P technologies to enable distribution of work between peers. It is possible to share an instance with Federation and/or split the weights of a model across peers (only available with llama.cpp models). You can now share computational resources between your devices or your friends!</h5>
-            
-            <!-- Federation Box -->
-            <div class="bg-gray-800 p-6 rounded-lg shadow-lg mb-12 text-left">
-
-                <p class="text-xl font-semibold text-gray-200"> <i class="text-gray-200 fa-solid fa-circle-nodes"></i> Federated Nodes: <span hx-get="/p2p/ui/workers-federation-stats" hx-trigger="every 1s"></span> </p>
-                <p class="mb-4">You can start LocalAI in federated mode to share your instance, or start the federated server to balance requests between nodes of the federation.</p>
-
-                <div class="grid grid-cols-1 sm:grid-cols-2 md:grid-cols-3 gap-4 mb-12">
-                    <div hx-get="/p2p/ui/workers-federation" hx-trigger="every 1s"></div>
-                </div>
-
-                <hr class="border-gray-700 mb-12">
-
-                <h3 class="text-2xl font-semibold text-gray-100 mb-6"><i class="fa-solid fa-book"></i> Start a federated instance</h3>
-
-                
-                <!-- Tabs navigation -->
-                <ul class="mb-5 flex list-none flex-row flex-wrap ps-0" role="tablist" data-twe-nav-ref>
-                    <li role="presentation" class="flex-auto text-center">
-                        <a href="#tabs-federated-cli" class="tablink my-2 block border-0 bg-gray-800 px-7 pb-3.5 pt-4 text-xs font-medium uppercase leading-tight text-white hover:bg-gray-700 focus:bg-gray-700 data-[twe-nav-active]:border-yellow-500 data-[twe-nav-active]:text-yellow-500 data-[twe-nav-active]:bg-gray-700 active" data-twe-toggle="pill" data-twe-target="#tabs-federated-cli" data-twe-nav-active role="tab" aria-controls="tabs-federated-cli" aria-selected="true"><i class="fa-solid fa-terminal"></i> CLI</a>
-                    </li>
-                    <li role="presentation" class="flex-auto text-center">
-                        <a href="#tabs-federated-docker" class="tablink my-2 block border-0 bg-gray-800 px-7 pb-3.5 pt-4 text-xs font-medium uppercase leading-tight text-white hover:bg-gray-700 focus:bg-gray-700 data-[twe-nav-active]:border-yellow-500 data-[twe-nav-active]:text-yellow-500 data-[twe-nav-active]:bg-gray-700" data-twe-toggle="pill" data-twe-target="#tabs-federated-docker" role="tab" aria-controls="tabs-federated-docker" aria-selected="false"><i class="fa-solid fa-box-open"></i> Container images</a>
-                    </li>
-                </ul>
-
-                <!-- Tabs content -->
-                <div class="mb-6">
-                    
-                    <div class="tabcontent hidden opacity-100 transition-opacity duration-150 ease-linear data-[twe-tab-active]:block p-4" id="tabs-federated-cli" role="tabpanel" aria-labelledby="tabs-federated-cli" data-twe-tab-active>
-
-
-                        <p class="mb-2">To start a new instance to share:</p>
-                        <code class="block bg-gray-700 text-yellow-300 p-4 rounded-lg break-words">
-                            # Start a new instance to share with --federated and a TOKEN<br>
-                            export TOKEN="<span class="token">{{.P2PToken}}</span>"<br>
-                            local-ai run --federated --p2p
-                        </code>
-
-                        <p class="mt-2">Note: If you don't have a token do not specify it and use the generated one that you can find in this page.</p>
-
-                        <p class="mb-2">To start a new federated load balancer:</p>
-                        <code class="block bg-gray-700 text-yellow-300 p-4 rounded-lg break-words">
-                            export TOKEN="<span class="token">{{.P2PToken}}</span>"<br>
-                            local-ai federated
-                        </code>
-
-                        <p class="mt-2">Note: Token is needed when starting the federated server.</p>
-
-                        <p class="mt-2">For all the options available, please refer to the <a href="https://localai.io/features/distribute/#starting-workers" target="_blank" class="text-yellow-300 hover:text-yellow-400">documentation</a>.</p>
-                    </div>
-                    <div class="tabcontent hidden opacity-0 transition-opacity duration-150 ease-linear data-[twe-tab-active]:block p-4" id="tabs-federated-docker" role="tabpanel" aria-labelledby="tabs-federated-docker">
-                        <p class="mb-2">To start a new federated instance:</p>
-                        <code class="block bg-gray-700 text-yellow-300 p-4 rounded-lg break-words">
-                            docker run -ti --net host -e TOKEN="<span class="token">{{.P2PToken}}</span>" --name local-ai -p 8080:8080 localai/localai:latest-cpu run --federated --p2p
-                        </code>
-
-                        <p class="mb-2">To start a new federated server (port to 9090):</p>
-                        <code class="block bg-gray-700 text-yellow-300 p-4 rounded-lg break-words">
-                            docker run -ti --net host -e TOKEN="<span class="token">{{.P2PToken}}</span>" --name local-ai -p 9090:8080 localai/localai:latest-cpu federated
-                        </code>
-
-                        <p class="mt-2">For all the options available and see what image to use, please refer to the <a href="https://localai.io/basics/container/" target="_blank" class="text-yellow-300 hover:text-yellow-400">Container images documentation</a> and <a href="https://localai.io/advanced/#cli-parameters" target="_blank" class="text-yellow-300 hover:text-yellow-400">CLI parameters documentation</a>.</p>
-                    </div>
-                </div>
-            </div>
-
-            <!-- Llama.cpp Box -->
-
-            <div class="bg-gray-800 p-6 rounded-lg shadow-lg mb-12 text-left">
-
-                <p class="text-xl font-semibold text-gray-200"> <i class="text-gray-200 fa-solid fa-circle-nodes"></i> Workers (llama.cpp): <span hx-get="/p2p/ui/workers-stats" hx-trigger="every 1s"></span> </p>
-                <p class="mb-4">You can start llama.cpp workers to distribute weights between the workers and offload part of the computation. To start a new worker, you can use the CLI or Docker.</p>
-
-                <div class="grid grid-cols-1 sm:grid-cols-2 md:grid-cols-3 gap-4 mb-12">
-                    <div hx-get="/p2p/ui/workers" hx-trigger="every 1s"></div>
-                </div>
-                <hr class="border-gray-700 mb-12">
-
-                <h3 class="text-2xl font-semibold text-gray-100 mb-6"><i class="fa-solid fa-book"></i> Start a new llama.cpp P2P worker</h3>
-                
-                <!-- Tabs navigation -->
-                <ul class="mb-5 flex list-none flex-row flex-wrap ps-0" role="tablist" data-twe-nav-ref>
-                    <li role="presentation" class="flex-auto text-center">
-                        <a href="#tabs-cli" class="tablink my-2 block border-0 bg-gray-800 px-7 pb-3.5 pt-4 text-xs font-medium uppercase leading-tight text-white hover:bg-gray-700 focus:bg-gray-700 data-[twe-nav-active]:border-yellow-500 data-[twe-nav-active]:text-yellow-500 data-[twe-nav-active]:bg-gray-700 active" data-twe-toggle="pill" data-twe-target="#tabs-cli" data-twe-nav-active role="tab" aria-controls="tabs-cli" aria-selected="true"><i class="fa-solid fa-terminal"></i> CLI</a>
-                    </li>
-                    <li role="presentation" class="flex-auto text-center">
-                        <a href="#tabs-docker" class="tablink my-2 block border-0 bg-gray-800 px-7 pb-3.5 pt-4 text-xs font-medium uppercase leading-tight text-white hover:bg-gray-700 focus:bg-gray-700 data-[twe-nav-active]:border-yellow-500 data-[twe-nav-active]:text-yellow-500 data-[twe-nav-active]:bg-gray-700" data-twe-toggle="pill" data-twe-target="#tabs-docker" role="tab" aria-controls="tabs-docker" aria-selected="false"><i class="fa-solid fa-box-open"></i> Container images</a>
-                    </li>
-                </ul>
-
-                <!-- Tabs content -->
-                <div class="mb-6">
-                    <div class="tabcontent hidden opacity-100 transition-opacity duration-150 ease-linear data-[twe-tab-active]:block p-4" id="tabs-cli" role="tabpanel" aria-labelledby="tabs-cli" data-twe-tab-active>
-                        <p class="mb-2">To start a new worker, run the following command:</p>
-                        <code class="block bg-gray-700 text-yellow-300 p-4 rounded-lg break-words">
-                            export TOKEN="<span class="token">{{.P2PToken}}</span>"<br>
-                            local-ai worker p2p-llama-cpp-rpc
-                        </code>
-
-                        <p class="mt-2">For all the options available, please refer to the <a href="https://localai.io/features/distribute/#starting-workers" target="_blank" class="text-yellow-300 hover:text-yellow-400">documentation</a>.</p>
-                    </div>
-                    <div class="tabcontent hidden opacity-0 transition-opacity duration-150 ease-linear data-[twe-tab-active]:block p-4" id="tabs-docker" role="tabpanel" aria-labelledby="tabs-docker">
-                        <p class="mb-2">To start a new worker with docker, run the following command:</p>
-                        <code class="block bg-gray-700 text-yellow-300 p-4 rounded-lg break-words">
-                            docker run -ti --net host -e TOKEN="<span class="token">{{.P2PToken}}</span>" --name local-ai -p 8080:8080 localai/localai:latest-cpu worker p2p-llama-cpp-rpc
-                        </code>
-
-                        <p class="mt-2">For all the options available and see what image to use, please refer to the <a href="https://localai.io/basics/container/" target="_blank" class="text-yellow-300 hover:text-yellow-400">Container images documentation</a> and <a href="https://localai.io/advanced/#cli-parameters" target="_blank" class="text-yellow-300 hover:text-yellow-400">CLI parameters documentation</a>.</p>
-                    </div>
-                </div>
-            </div>
-            <!-- Llama.cpp Box END -->       
-        </div>
-    </div>
-
-    {{template "views/partials/footer" .}}
-</div>
-
-<style>
-    .token {
-        word-break: break-all;
-    }
-    .workers .grid div {
-        display: flex;
-        flex-direction: column;
-        justify-content: space-between;
-    }
-</style>
-
-</body>
-</html>
--- a/core/http/views/partials/footer.html
+++ b/core/http/views/partials/footer.html
@@ -1,5 +1,4 @@
 <footer class="text-center py-8">
    LocalAI Version {{.Version}}<br>
    <a href='https://localai.io' class="text-blue-400 hover:text-blue-600" target="_blank">LocalAI</a> © 2023-2024 <a href='https://mudler.pm' class="text-blue-400 hover:text-blue-600" target="_blank">Ettore Di Giacinto</a>
-</footer>
-<script src="/static/assets/tw-elements.js"></script>
+</footer>
--- a/core/http/views/partials/navbar.html
+++ b/core/http/views/partials/navbar.html
@@ -21,9 +21,6 @@
                <a href="/text2image/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-image pr-2"></i> Generate images</a>
                <a href="/tts/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fa-solid fa-music pr-2"></i> TTS </a>
                <a href="/talk/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fa-solid fa-phone pr-2"></i> Talk </a>
-                {{ if .IsP2PEnabled }}
-                <a href="/p2p/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fa-solid fa-circle-nodes"></i> Swarm </a>
-                {{ end }}
                <a href="/swagger/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-code pr-2"></i> API</a>
            </div>
        </div>
@@ -37,9 +34,6 @@
                <a href="/text2image/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fas fa-image pr-2"></i> Generate images</a>
                <a href="/tts/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fa-solid fa-music pr-2"></i> TTS </a>
                <a href="/talk/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fa-solid fa-phone pr-2"></i> Talk </a>
-                {{ if .IsP2PEnabled }}
-                <a href="/p2p/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fa-solid fa-circle-nodes"></i> Swarm </a>
-                {{ end }}
                <a href="/swagger/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fas fa-code pr-2"></i> API</a>
            </div>
        </div>
--- a/core/http/views/talk.html
+++ b/core/http/views/talk.html
@@ -10,7 +10,7 @@
  <body class="bg-gray-900 text-gray-200" x-data="{ key: $store.chat.key }">
    <div class="flex flex-col min-h-screen">

-    {{template "views/partials/navbar" .}}
+    {{template "views/partials/navbar"}}
    <div class="chat-container mt-2 mr-2 ml-2 mb-2 bg-gray-800 shadow-lg rounded-lg " >
     <!-- Chat Header -->
      <div class="border-b border-gray-700 p-4"  x-data="{ component: 'menu' }">
--- a/core/p2p/node.go
+++ b/core/p2p/node.go
@@ -1,50 +0,0 @@
-package p2p
-
-import (
-	"sync"
-	"time"
-)
-
-const defaultServicesID = "services_localai"
-const FederatedID = "federated"
-
-type NodeData struct {
-	Name          string
-	ID            string
-	TunnelAddress string
-	LastSeen      time.Time
-}
-
-func (d NodeData) IsOnline() bool {
-	now := time.Now()
-	// if the node was seen in the last 40 seconds, it's online
-	return now.Sub(d.LastSeen) < 40*time.Second
-}
-
-var mu sync.Mutex
-var nodes = map[string]map[string]NodeData{}
-
-func GetAvailableNodes(serviceID string) []NodeData {
-	if serviceID == "" {
-		serviceID = defaultServicesID
-	}
-	mu.Lock()
-	defer mu.Unlock()
-	var availableNodes = []NodeData{}
-	for _, v := range nodes[serviceID] {
-		availableNodes = append(availableNodes, v)
-	}
-	return availableNodes
-}
-
-func AddNode(serviceID string, node NodeData) {
-	if serviceID == "" {
-		serviceID = defaultServicesID
-	}
-	mu.Lock()
-	defer mu.Unlock()
-	if nodes[serviceID] == nil {
-		nodes[serviceID] = map[string]NodeData{}
-	}
-	nodes[serviceID][node.ID] = node
-}
--- a/core/p2p/p2p.go
+++ b/core/p2p/p2p.go
@@ -10,18 +10,19 @@ import (
 	"io"
 	"net"
 	"os"
-	"sync"
+	"strings"
 	"time"

-	"github.com/ipfs/go-log"
 	"github.com/libp2p/go-libp2p/core/peer"
 	"github.com/mudler/LocalAI/pkg/utils"
-	"github.com/mudler/edgevpn/pkg/config"
 	"github.com/mudler/edgevpn/pkg/node"
 	"github.com/mudler/edgevpn/pkg/protocol"
-	"github.com/mudler/edgevpn/pkg/services"
 	"github.com/mudler/edgevpn/pkg/types"
 	"github.com/phayes/freeport"
+
+	"github.com/ipfs/go-log"
+	"github.com/mudler/edgevpn/pkg/config"
+	"github.com/mudler/edgevpn/pkg/services"
 	zlog "github.com/rs/zerolog/log"

 	"github.com/mudler/edgevpn/pkg/logger"
@@ -33,15 +34,6 @@ func GenerateToken() string {
 	return newData.Base64()
 }

-func IsP2PEnabled() bool {
-	return true
-}
-
-func nodeID(s string) string {
-	hostname, _ := os.Hostname()
-	return fmt.Sprintf("%s-%s", hostname, s)
-}
-
 func allocateLocalService(ctx context.Context, node *node.Node, listenAddr, service string) error {

 	zlog.Info().Msgf("Allocating service '%s' on: %s", service, listenAddr)
@@ -61,16 +53,16 @@ func allocateLocalService(ctx context.Context, node *node.Node, listenAddr, serv
 		10*time.Second,
 		func() {
 			// Retrieve current ID for ip in the blockchain
-			//_, found := ledger.GetKey(protocol.UsersLedgerKey, node.Host().ID().String())
+			_, found := ledger.GetKey(protocol.UsersLedgerKey, node.Host().ID().String())
 			// If mismatch, update the blockchain
-			//if !found {
-			updatedMap := map[string]interface{}{}
-			updatedMap[node.Host().ID().String()] = &types.User{
-				PeerID:    node.Host().ID().String(),
-				Timestamp: time.Now().String(),
+			if !found {
+				updatedMap := map[string]interface{}{}
+				updatedMap[node.Host().ID().String()] = &types.User{
+					PeerID:    node.Host().ID().String(),
+					Timestamp: time.Now().String(),
+				}
+				ledger.Add(protocol.UsersLedgerKey, updatedMap)
 			}
-			ledger.Add(protocol.UsersLedgerKey, updatedMap)
-			//	}
 		},
 	)

@@ -88,6 +80,7 @@ func allocateLocalService(ctx context.Context, node *node.Node, listenAddr, serv
 				continue
 			}

+			//	ll.Info("New connection from", l.Addr().String())
 			// Handle connections in a new goroutine, forwarding to the p2p service
 			go func() {
 				// Retrieve current ID for ip in the blockchain
@@ -144,30 +137,24 @@ func copyStream(closer chan struct{}, dst io.Writer, src io.Reader) {

 // This is the main of the server (which keeps the env variable updated)
 // This starts a goroutine that keeps LLAMACPP_GRPC_SERVERS updated with the discovered services
-func ServiceDiscoverer(ctx context.Context, n *node.Node, token, servicesID string, discoveryFunc func()) error {
-	if servicesID == "" {
-		servicesID = defaultServicesID
-	}
-	tunnels, err := discoveryTunnels(ctx, n, token, servicesID)
+func LLamaCPPRPCServerDiscoverer(ctx context.Context, token string) error {
+	tunnels, err := discoveryTunnels(ctx, token)
 	if err != nil {
 		return err
 	}
-	// TODO: discoveryTunnels should return all the nodes that are available?
-	// In this way we updated availableNodes here instead of appending
-	// e.g. we have a LastSeen field in NodeData that is updated in discoveryTunnels
-	// each time the node is seen
-	// In this case the below function should be idempotent and just keep track of the nodes
+
 	go func() {
+		totalTunnels := []string{}
 		for {
 			select {
 			case <-ctx.Done():
 				zlog.Error().Msg("Discoverer stopped")
 				return
 			case tunnel := <-tunnels:
-				AddNode(servicesID, tunnel)
-				if discoveryFunc != nil {
-					discoveryFunc()
-				}
+
+				totalTunnels = append(totalTunnels, tunnel)
+				os.Setenv("LLAMACPP_GRPC_SERVERS", strings.Join(totalTunnels, ","))
+				zlog.Debug().Msgf("setting LLAMACPP_GRPC_SERVERS to %s", strings.Join(totalTunnels, ","))
 			}
 		}
 	}()
@@ -175,10 +162,19 @@ func ServiceDiscoverer(ctx context.Context, n *node.Node, token, servicesID stri
 	return nil
 }

-func discoveryTunnels(ctx context.Context, n *node.Node, token, servicesID string) (chan NodeData, error) {
-	tunnels := make(chan NodeData)
+func discoveryTunnels(ctx context.Context, token string) (chan string, error) {
+	tunnels := make(chan string)

-	err := n.Start(ctx)
+	nodeOpts, err := newNodeOpts(token)
+	if err != nil {
+		return nil, err
+	}
+
+	n, err := node.New(nodeOpts...)
+	if err != nil {
+		return nil, fmt.Errorf("creating a new node: %w", err)
+	}
+	err = n.Start(ctx)
 	if err != nil {
 		return nil, fmt.Errorf("creating a new node: %w", err)
 	}
@@ -188,14 +184,8 @@ func discoveryTunnels(ctx context.Context, n *node.Node, token, servicesID strin
 	}

 	// get new services, allocate and return to the channel
-
-	// TODO:
-	// a function ensureServices that:
-	// - starts a service if not started, if the worker is Online
-	// - checks that workers are Online, if not cancel the context of allocateLocalService
-	// - discoveryTunnels should return all the nodes and addresses associated with it
-	// - the caller should take now care of the fact that we are always returning fresh informations
 	go func() {
+		emitted := map[string]bool{}
 		for {
 			select {
 			case <-ctx.Done():
@@ -205,20 +195,20 @@ func discoveryTunnels(ctx context.Context, n *node.Node, token, servicesID strin
 				time.Sleep(5 * time.Second)
 				zlog.Debug().Msg("Searching for workers")

-				data := ledger.LastBlock().Storage[servicesID]
-				for k, v := range data {
+				data := ledger.LastBlock().Storage["services_localai"]
+				for k := range data {
 					zlog.Info().Msgf("Found worker %s", k)
-					nd := &NodeData{}
-					if err := v.Unmarshal(nd); err != nil {
-						zlog.Error().Msg("cannot unmarshal node data")
-						continue
+					if _, found := emitted[k]; !found {
+						emitted[k] = true
+						//discoveredPeers <- k
+						port, err := freeport.GetFreePort()
+						if err != nil {
+							fmt.Print(err)
+						}
+						tunnelAddress := fmt.Sprintf("127.0.0.1:%d", port)
+						go allocateLocalService(ctx, n, tunnelAddress, k)
+						tunnels <- tunnelAddress
 					}
-					ensureService(ctx, n, nd, k)
-					muservice.Lock()
-					if _, ok := service[nd.Name]; ok {
-						tunnels <- service[nd.Name].NodeData
-					}
-					muservice.Unlock()
 				}
 			}
 		}
@@ -227,60 +217,8 @@ func discoveryTunnels(ctx context.Context, n *node.Node, token, servicesID strin
 	return tunnels, err
 }

-type nodeServiceData struct {
-	NodeData   NodeData
-	CancelFunc context.CancelFunc
-}
-
-var service = map[string]nodeServiceData{}
-var muservice sync.Mutex
-
-func ensureService(ctx context.Context, n *node.Node, nd *NodeData, sserv string) {
-	muservice.Lock()
-	defer muservice.Unlock()
-	if ndService, found := service[nd.Name]; !found {
-		if !nd.IsOnline() {
-			// if node is offline and not present, do nothing
-			return
-		}
-		newCtxm, cancel := context.WithCancel(ctx)
-		// Start the service
-		port, err := freeport.GetFreePort()
-		if err != nil {
-			fmt.Print(err)
-		}
-		tunnelAddress := fmt.Sprintf("127.0.0.1:%d", port)
-		nd.TunnelAddress = tunnelAddress
-		service[nd.Name] = nodeServiceData{
-			NodeData:   *nd,
-			CancelFunc: cancel,
-		}
-		go allocateLocalService(newCtxm, n, tunnelAddress, sserv)
-		zlog.Debug().Msgf("Starting service %s on %s", sserv, tunnelAddress)
-	} else {
-		// Check if the service is still alive
-		// if not cancel the context
-		if !nd.IsOnline() && !ndService.NodeData.IsOnline() {
-			ndService.CancelFunc()
-			delete(service, nd.Name)
-			zlog.Info().Msgf("Node %s is offline, deleting", nd.ID)
-		} else if nd.IsOnline() {
-			// update last seen inside service
-			nd.TunnelAddress = ndService.NodeData.TunnelAddress
-			service[nd.Name] = nodeServiceData{
-				NodeData:   *nd,
-				CancelFunc: ndService.CancelFunc,
-			}
-			zlog.Debug().Msgf("Node %s is still online", nd.ID)
-		}
-	}
-}
-
 // This is the P2P worker main
-func ExposeService(ctx context.Context, host, port, token, servicesID string) error {
-	if servicesID == "" {
-		servicesID = defaultServicesID
-	}
+func BindLLamaCPPWorker(ctx context.Context, host, port, token string) error {
 	llger := logger.New(log.LevelFatal)

 	nodeOpts, err := newNodeOpts(token)
@@ -310,53 +248,31 @@ func ExposeService(ctx context.Context, host, port, token, servicesID string) er

 	ledger.Announce(
 		ctx,
-		20*time.Second,
+		10*time.Second,
 		func() {
 			// Retrieve current ID for ip in the blockchain
-			//_, found := ledger.GetKey("services_localai", name)
+			_, found := ledger.GetKey("services_localai", name)
 			// If mismatch, update the blockchain
-			//if !found {
-			updatedMap := map[string]interface{}{}
-			updatedMap[name] = &NodeData{
-				Name:     name,
-				LastSeen: time.Now(),
-				ID:       nodeID(name),
+			if !found {
+				updatedMap := map[string]interface{}{}
+				updatedMap[name] = "p2p"
+				ledger.Add("services_localai", updatedMap)
 			}
-			ledger.Add(servicesID, updatedMap)
-			//	}
 		},
 	)

 	return err
 }

-func NewNode(token string) (*node.Node, error) {
-	nodeOpts, err := newNodeOpts(token)
-	if err != nil {
-		return nil, err
-	}
-
-	n, err := node.New(nodeOpts...)
-	if err != nil {
-		return nil, fmt.Errorf("creating a new node: %w", err)
-	}
-
-	return n, nil
-}
-
 func newNodeOpts(token string) ([]node.Option, error) {
 	llger := logger.New(log.LevelFatal)
 	defaultInterval := 10 * time.Second

-	// TODO: move this up, expose more config options when creating a node
-	noDHT := os.Getenv("LOCALAI_P2P_DISABLE_DHT") == "true"
-	noLimits := os.Getenv("LOCALAI_P2P_DISABLE_LIMITS") == "true"
-
 	loglevel := "info"

 	c := config.Config{
 		Limit: config.ResourceLimit{
-			Enable:   !noLimits,
+			Enable:   true,
 			MaxConns: 100,
 		},
 		NetworkToken:   token,
@@ -376,7 +292,7 @@ func newNodeOpts(token string) ([]node.Option, error) {
 			RateLimitInterval: defaultInterval,
 		},
 		Discovery: config.Discovery{
-			DHT:      noDHT,
+			DHT:      true,
 			MDNS:     true,
 			Interval: 30 * time.Second,
 		},
--- a/core/p2p/p2p_disabled.go
+++ b/core/p2p/p2p_disabled.go
@@ -6,26 +6,16 @@ package p2p
 import (
 	"context"
 	"fmt"
-
-	"github.com/mudler/edgevpn/pkg/node"
 )

 func GenerateToken() string {
 	return "not implemented"
 }

-func ServiceDiscoverer(ctx context.Context, node *node.Node, token, servicesID string, fn func()) error {
+func LLamaCPPRPCServerDiscoverer(ctx context.Context, token string) error {
 	return fmt.Errorf("not implemented")
 }

-func ExposeService(ctx context.Context, host, port, token, servicesID string) error {
+func BindLLamaCPPWorker(ctx context.Context, host, port, token string) error {
 	return fmt.Errorf("not implemented")
 }
-
-func IsP2PEnabled() bool {
-	return false
-}
-
-func NewNode(token string) (*node.Node, error) {
-	return nil, fmt.Errorf("not implemented")
-}
--- a/core/schema/openai.go
+++ b/core/schema/openai.go
@@ -155,8 +155,3 @@ type OpenAIRequest struct {
 	// AutoGPTQ
 	ModelBaseName string `json:"model_base_name" yaml:"model_base_name"`
 }
-
-type ModelsDataResponse struct {
-	Object string        `json:"object"`
-	Data   []OpenAIModel `json:"data"`
-}
--- a/docs/content/docs/features/distributed_inferencing.md
+++ b/docs/content/docs/features/distributed_inferencing.md
@@ -98,14 +98,3 @@ The server logs should indicate that new workers are being discovered.
 - If running in p2p mode with container images, make sure you start the container with `--net host` or `network_mode: host` in the docker-compose file.
 - Only a single model is supported currently.
 - Ensure the server detects new workers before starting inference. Currently, additional workers cannot be added once inference has begun.
-
-
-## Environment Variables
-
-There are options that can be tweaked or parameters that can be set using environment variables
-
-| Environment Variable | Description |
-|----------------------|-------------|
-| **LOCALAI_P2P_DISABLE_DHT** | Set to "true" to disable DHT and enable p2p layer to be local only (mDNS) |
-| **LOCALAI_P2P_DISABLE_LIMITS** | Set to "true" to disable connection limits and resources management |
-| **LOCALAI_P2P_TOKEN** | Set the token for the p2p network |
--- a/docs/data/version.json
+++ b/docs/data/version.json
@@ -1,3 +1,3 @@
 {
-  "version": "v2.18.1"
+  "version": "v2.18.0"
 }
--- a/docs/static/install.sh
+++ b/docs/static/install.sh
@@ -76,8 +76,6 @@ DOCKER_INSTALL=${DOCKER_INSTALL:-$docker_found}
 USE_AIO=${USE_AIO:-false}
 API_KEY=${API_KEY:-}
 CORE_IMAGES=${CORE_IMAGES:-false}
-P2P_TOKEN=${P2P_TOKEN:-}
-WORKER=${WORKER:-false}
 # nprocs -1
 if available nproc; then
    procs=$(nproc)
@@ -134,14 +132,7 @@ configure_systemd() {

    info "Adding current user to local-ai group..."
    $SUDO usermod -a -G local-ai $(whoami)
-    STARTCOMMAND="run"
-    if [ "$WORKER" = true ]; then
-        if [ -n "$P2P_TOKEN" ]; then
-            STARTCOMMAND="worker p2p-llama-cpp-rpc"
-        else 
-            STARTCOMMAND="worker llama-cpp-rpc"
-        fi
-    fi
+
    info "Creating local-ai systemd service..."
    cat <<EOF | $SUDO tee /etc/systemd/system/local-ai.service >/dev/null
 [Unit]
@@ -149,7 +140,7 @@ Description=LocalAI Service
 After=network-online.target

 [Service]
-ExecStart=$BINDIR/local-ai $STARTCOMMAND
+ExecStart=$BINDIR/local-ai run
 User=local-ai
 Group=local-ai
 Restart=always
@@ -168,11 +159,6 @@ EOF
    $SUDO echo "THREADS=$THREADS" | $SUDO tee -a /etc/localai.env >/dev/null
    $SUDO echo "MODELS_PATH=$MODELS_PATH" | $SUDO tee -a /etc/localai.env >/dev/null

-    if [ -n "$P2P_TOKEN" ]; then
-        $SUDO echo "LOCALAI_P2P_TOKEN=$P2P_TOKEN" | $SUDO tee -a /etc/localai.env >/dev/null
-        $SUDO echo "LOCALAI_P2P=true" | $SUDO tee -a /etc/localai.env >/dev/null
-    fi
-
    SYSTEMCTL_RUNNING="$(systemctl is-system-running || true)"
    case $SYSTEMCTL_RUNNING in
        running|degraded)
@@ -421,19 +407,6 @@ install_docker() {
        # exit 0
    fi

-    STARTCOMMAND="run"
-    if [ "$WORKER" = true ]; then
-        if [ -n "$P2P_TOKEN" ]; then
-            STARTCOMMAND="worker p2p-llama-cpp-rpc"
-        else 
-            STARTCOMMAND="worker llama-cpp-rpc"
-        fi
-    fi
-    envs=""
-    if [ -n "$P2P_TOKEN" ]; then
-        envs="-e LOCALAI_P2P_TOKEN=$P2P_TOKEN -e LOCALAI_P2P=true"
-    fi
-
    IMAGE_TAG=
    if [ "$HAS_CUDA" ]; then
        IMAGE_TAG=${VERSION}-cublas-cuda12-ffmpeg
@@ -457,8 +430,7 @@ install_docker() {
            --restart=always \
            -e API_KEY=$API_KEY \
            -e THREADS=$THREADS \
-            $envs \
-            -d -p $PORT:8080 --name local-ai localai/localai:$IMAGE_TAG $STARTCOMMAND
+            -d -p $PORT:8080 --name local-ai localai/localai:$IMAGE_TAG
    elif [ "$HAS_AMD" ]; then
        IMAGE_TAG=${VERSION}-hipblas-ffmpeg
        # CORE
@@ -476,8 +448,7 @@ install_docker() {
            --restart=always \
            -e API_KEY=$API_KEY \
            -e THREADS=$THREADS \
-            $envs \
-            -d -p $PORT:8080 --name local-ai localai/localai:$IMAGE_TAG $STARTCOMMAND
+            -d -p $PORT:8080 --name local-ai localai/localai:$IMAGE_TAG
    elif [ "$HAS_INTEL" ]; then
        IMAGE_TAG=${VERSION}-sycl-f32-ffmpeg
        # CORE
@@ -494,8 +465,7 @@ install_docker() {
            --restart=always \
            -e API_KEY=$API_KEY \
            -e THREADS=$THREADS \
-            $envs \
-            -d -p $PORT:8080 --name local-ai localai/localai:$IMAGE_TAG $STARTCOMMAND
+            -d -p $PORT:8080 --name local-ai localai/localai:$IMAGE_TAG
    else
        IMAGE_TAG=${VERSION}-ffmpeg
        # CORE
@@ -511,8 +481,7 @@ install_docker() {
                -e MODELS_PATH=/models \
                -e API_KEY=$API_KEY \
                -e THREADS=$THREADS \
-                $envs \
-                -d -p $PORT:8080 --name local-ai localai/localai:$IMAGE_TAG $STARTCOMMAND
+                -d -p $PORT:8080 --name local-ai localai/localai:$IMAGE_TAG
    fi

    install_success
--- a/embedded/webui_static.yaml
+++ b/embedded/webui_static.yaml
@@ -16,9 +16,6 @@
 - filename: "tw-elements.css"
  url: "https://cdn.jsdelivr.net/npm/tw-elements/css/tw-elements.min.css"
  sha: "72746af5326d6eb3647f504efa81b5e0f50ed486f37cc8262a4169781ad310d3"
- filename: "tw-elements.js"
-  url: "https://cdn.jsdelivr.net/npm/tw-elements/js/tw-elements.umd.min.js"
-  sha: "2985706362e92360b65c8697cc32490bb9c0a5df9cd9b7251a97c1c5a661a40a"
 - filename: "tailwindcss.js"
  url: "https://cdn.tailwindcss.com/3.3.0"
  sha: "dbff048aa4581e6eae7f1cb2c641f72655ea833b3bb82923c4a59822e11ca594"
--- a/examples/github-actions/workflow.yml
+++ b/examples/github-actions/workflow.yml
@@ -1,83 +0,0 @@
-name: Use LocalAI in GHA
-on:
-  pull_request:
-     types:
-       - closed
-
-jobs:
-  notify-discord:
-    if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'area/ai-model')) }}
-    env:
-        MODEL_NAME: hermes-2-theta-llama-3-8b
-    runs-on: ubuntu-latest
-    steps:
-    - uses: actions/checkout@v4
-      with:
-        fetch-depth: 0 # needed to checkout all branches for this Action to work
-    # Starts the LocalAI container
-    - name: Start LocalAI
-      run: |
-        echo "Starting LocalAI..."
-        docker run -e -ti -d --name local-ai -p 8080:8080 localai/localai:master-ffmpeg-core run --debug $MODEL_NAME
-        until [ "`docker inspect -f {{.State.Health.Status}} local-ai`" == "healthy" ]; do echo "Waiting for container to be ready";  docker logs --tail 10 local-ai; sleep 2; done
-    # Check the PR diff using the current branch and the base branch of the PR
-    - uses: GrantBirki/git-diff-action@v2.7.0
-      id: git-diff-action
-      with:
-            json_diff_file_output: diff.json
-            raw_diff_file_output: diff.txt
-            file_output_only: "true"
-    # Ask to explain the diff to LocalAI
-    - name: Summarize
-      env:
-        DIFF: ${{ steps.git-diff-action.outputs.raw-diff-path }}
-      id: summarize
-      run: |
-            input="$(cat $DIFF)"
-
-            # Define the LocalAI API endpoint
-            API_URL="http://localhost:8080/chat/completions"
-
-            # Create a JSON payload using jq to handle special characters
-            json_payload=$(jq -n --arg input "$input" '{
-            model: "'$MODEL_NAME'",
-            messages: [
-                {
-                role: "system",
-                content: "Write a message summarizing the change diffs"
-                },
-                {
-                role: "user",
-                content: $input
-                }
-            ]
-            }')
-
-            # Send the request to LocalAI
-            response=$(curl -s -X POST $API_URL \
-            -H "Content-Type: application/json" \
-            -d "$json_payload")
-
-            # Extract the summary from the response
-            summary="$(echo $response | jq -r '.choices[0].message.content')"
-
-            # Print the summary
-            #  -H "Authorization: Bearer $API_KEY" \
-            echo "Summary:"
-            echo "$summary"
-            echo "payload sent"
-            echo "$json_payload"
-            {
-                echo 'message<<EOF'
-                echo "$summary"
-                echo EOF
-              } >> "$GITHUB_OUTPUT"
-    # Send the summary somewhere (e.g. Discord)
-    - name: Discord notification
-      env:
-        DISCORD_WEBHOOK: ${{ secrets.DISCORD_WEBHOOK_URL }}
-        DISCORD_USERNAME: "discord-bot"
-        DISCORD_AVATAR: ""
-      uses: Ilshidur/action-discord@master
-      with:
-        args: ${{ steps.summarize.outputs.message }}
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -127,56 +127,6 @@
    - filename: Arcee-Spark-Q4_K_M.gguf
      sha256: 44123276d7845dc13f73ca4aa431dc4c931104eb7d2186f2a73d076fa0ee2330
      uri: huggingface://arcee-ai/Arcee-Spark-GGUF/Arcee-Spark-Q4_K_M.gguf
- !!merge <<: *qwen2
-  name: "hercules-5.0-qwen2-7b"
-  description: |
-    Locutusque/Hercules-5.0-Qwen2-7B is a fine-tuned language model derived from Qwen2-7B. It is specifically designed to excel in instruction following, function calls, and conversational interactions across various scientific and technical domains. This fine-tuning has hercules-v5.0 with enhanced abilities in:
-
-        Complex Instruction Following: Understanding and accurately executing multi-step instructions, even those involving specialized terminology.
-        Function Calling: Seamlessly interpreting and executing function calls, providing appropriate input and output values.
-        Domain-Specific Knowledge: Engaging in informative and educational conversations about Biology, Chemistry, Physics, Mathematics, Medicine, Computer Science, and more.
-  urls:
-    - https://huggingface.co/Locutusque/Hercules-5.0-Qwen2-7B
-    - https://huggingface.co/bartowski/Hercules-5.0-Qwen2-7B-GGUF
-  overrides:
-    parameters:
-      model: Hercules-5.0-Qwen2-7B-Q4_K_M.gguf
-  files:
-    - filename: Hercules-5.0-Qwen2-7B-Q4_K_M.gguf
-      sha256: 8ebae4ffd43b906ddb938c3a611060ee5f99c35014e5ffe23ca35714361b5693
-      uri: huggingface://Hercules-5.0-Qwen2-7B-Q4_K_M.gguf/Hercules-5.0-Qwen2-7B-Q4_K_M.gguf
- !!merge <<: *qwen2
-  name: "arcee-agent"
-  icon: https://i.ibb.co/CBHmTDn/136719a5-6d8a-4654-a618-46eabc788953.jpg
-  description: |
-    Arcee Agent is a cutting-edge 7B parameter language model specifically designed for function calling and tool use. Initialized from Qwen2-7B, it rivals the performance of much larger models while maintaining efficiency and speed. This model is particularly suited for developers, researchers, and businesses looking to implement sophisticated AI-driven solutions without the computational overhead of larger language models. Compute for training Arcee-Agent was provided by CrusoeAI. Arcee-Agent was trained using Spectrum.
-  urls:
-    - https://huggingface.co/crusoeai/Arcee-Agent-GGUF
-    - https://huggingface.co/arcee-ai/Arcee-Agent
-  overrides:
-    parameters:
-      model: arcee-agent.Q4_K_M.gguf
-  files:
-    - filename: arcee-agent.Q4_K_M.gguf
-      sha256: ebb49943a66c1e717f9399a555aee0af28a40bfac7500f2ad8dd05f211b62aac
-      uri: huggingface://crusoeai/Arcee-Agent-GGUF/arcee-agent.Q4_K_M.gguf
- !!merge <<: *qwen2
-  name: "qwen2-7b-instruct-v0.8"
-  icon: https://huggingface.co/MaziyarPanahi/Qwen2-7B-Instruct-v0.8/resolve/main/qwen2-fine-tunes-maziyar-panahi.webp
-  description: |
-    MaziyarPanahi/Qwen2-7B-Instruct-v0.8
-
-    This is a fine-tuned version of the Qwen/Qwen2-7B model. It aims to improve the base model across all benchmarks.
-  urls:
-    - https://huggingface.co/MaziyarPanahi/Qwen2-7B-Instruct-v0.8
-    - https://huggingface.co/MaziyarPanahi/Qwen2-7B-Instruct-v0.8-GGUF
-  overrides:
-    parameters:
-      model: Qwen2-7B-Instruct-v0.8.Q4_K_M.gguf
-  files:
-    - filename: Qwen2-7B-Instruct-v0.8.Q4_K_M.gguf
-      sha256: 8c1b3efe9fa6ae1b37942ef26473cb4e0aed0f8038b60d4b61e5bffb61e49b7e
-      uri: huggingface://MaziyarPanahi/Qwen2-7B-Instruct-v0.8-GGUF/Qwen2-7B-Instruct-v0.8.Q4_K_M.gguf
 - &mistral03
  ## START Mistral
  url: "github:mudler/LocalAI/gallery/mistral-0.3.yaml@master"
@@ -375,7 +325,7 @@
  files:
    - filename: gemma-2-27b-it-Q4_K_M.gguf
      uri: huggingface://bartowski/gemma-2-27b-it-GGUF/gemma-2-27b-it-Q4_K_M.gguf
-      sha256: 69a0cdba2bc2e56d8298a9330f2a050ebecd657ed315beb3e51ae427b224dbc7
+      sha256: ca86fbdb791842cf2e5eb276a6916e326b3b5d58d9ab60ee3e18b1c6f01fc181
 - !!merge <<: *gemma
  name: "gemma-2-9b-it"
  urls:
@@ -389,100 +339,7 @@
  files:
    - filename: gemma-2-9b-it-Q4_K_M.gguf
      uri: huggingface://bartowski/gemma-2-9b-it-GGUF/gemma-2-9b-it-Q4_K_M.gguf
-      sha256: 05390244866abc0e7108a2b1e3db07b82df3cd82f006256a75fc21137054151f
- !!merge <<: *gemma
-  name: "tess-v2.5-gemma-2-27b-alpha"
-  urls:
-    - https://huggingface.co/migtissera/Tess-v2.5-Gemma-2-27B-alpha
-    - https://huggingface.co/bartowski/Tess-v2.5-Gemma-2-27B-alpha-GGUF
-  icon: https://huggingface.co/migtissera/Tess-v2.5-Qwen2-72B/resolve/main/Tess-v2.5.png
-  description: |
-    Great at reasoning, but woke as fuck! This is a fine-tune over the Gemma-2-27B-it, since the base model fine-tuning is not generating coherent content.
-
-    Tess-v2.5 is the latest state-of-the-art model in the Tess series of Large Language Models (LLMs). Tess, short for Tesoro (Treasure in Italian), is the flagship LLM series created by Migel Tissera. Tess-v2.5 brings significant improvements in reasoning capabilities, coding capabilities and mathematics
-  overrides:
-    parameters:
-      model: Tess-v2.5-Gemma-2-27B-alpha-Q4_K_M.gguf
-  files:
-    - filename: Tess-v2.5-Gemma-2-27B-alpha-Q4_K_M.gguf
-      uri: huggingface://bartowski/Tess-v2.5-Gemma-2-27B-alpha-GGUF/Tess-v2.5-Gemma-2-27B-alpha-Q4_K_M.gguf
-      sha256: d7be7092d28aefbdcd1ee4f4d8503d169d0a649f763e169d4b179aef20d69c21
- !!merge <<: *gemma
-  name: "gemma2-9b-daybreak-v0.5"
-  urls:
-    - https://huggingface.co/crestf411/gemma2-9B-daybreak-v0.5
-    - https://huggingface.co/Vdr1/gemma2-9B-daybreak-v0.5-GGUF-Imatrix-IQ
-  description: |
-    THIS IS A PRE-RELEASE. BEGONE.
-
-    Beware, depraved. Not suitable for any audience.
-
-    Dataset curation to remove slop-perceived expressions continues. Unfortunately base models (which this is merged on top of) are generally riddled with "barely audible"s and "couldn't help"s and "shivers down spines" etc.
-  overrides:
-    parameters:
-      model: gemma2-9B-daybreak-v0.5-Q4_K_M-imat.gguf
-  files:
-    - filename: gemma2-9B-daybreak-v0.5-Q4_K_M-imat.gguf
-      uri: huggingface://Vdr1/gemma2-9B-daybreak-v0.5-GGUF-Imatrix-IQ/gemma2-9B-daybreak-v0.5-Q4_K_M-imat.gguf
-      sha256: 6add4d12052918986af935d686773e4e89fddd1bbf7941911cf3fbeb1b1862c0
- !!merge <<: *gemma
-  name: "gemma-2-9b-it-sppo-iter3"
-  urls:
-    - https://huggingface.co/UCLA-AGI/Gemma-2-9B-It-SPPO-Iter3
-    - https://huggingface.co/bartowski/Gemma-2-9B-It-SPPO-Iter3-GGUF
-  description: |
-    Self-Play Preference Optimization for Language Model Alignment (https://arxiv.org/abs/2405.00675)
-    Gemma-2-9B-It-SPPO-Iter3
-
-    This model was developed using Self-Play Preference Optimization at iteration 3, based on the google/gemma-2-9b-it architecture as starting point. We utilized the prompt sets from the openbmb/UltraFeedback dataset, splited to 3 parts for 3 iterations by snorkelai/Snorkel-Mistral-PairRM-DPO-Dataset. All responses used are synthetic.
-  overrides:
-    parameters:
-      model: Gemma-2-9B-It-SPPO-Iter3-Q4_K_M.gguf
-  files:
-    - filename: Gemma-2-9B-It-SPPO-Iter3-Q4_K_M.gguf
-      uri: huggingface://bartowski/Gemma-2-9B-It-SPPO-Iter3-GGUF/Gemma-2-9B-It-SPPO-Iter3-Q4_K_M.gguf
-      sha256: 7aac221f548beef8d45106eabbec6b2c4e1669a51ad14e4bf640d463dadf36e7
- !!merge <<: *gemma
-  name: "smegmma-9b-v1"
-  icon: https://cdn-uploads.huggingface.co/production/uploads/65f2fd1c25b848bd061b5c2e/RSuc5p9Sm6CYj6lGOxvx4.gif
-  urls:
-    - https://huggingface.co/TheDrummer/Smegmma-9B-v1
-    - https://huggingface.co/bartowski/Smegmma-9B-v1-GGUF
-  description: |
-    Smegmma 9B v1 🧀
-
-    The sweet moist of Gemma 2, unhinged.
-
-    image/gif
-
-    smeg - ghem - mah
-
-    An eRP model that will blast you with creamy moist. Finetuned by yours truly.
-
-    The first Gemma 2 9B RP finetune attempt!
-    What's New?
-
-        Engaging roleplay
-            Less refusals / censorship
-            Less commentaries / summaries
-            More willing AI
-            Better formatting
-            Better creativity
-        Moist alignment
-
-    Notes
-
-        Refusals still exist, but a couple of re-gens may yield the result you want
-        Formatting and logic may be weaker at the start
-            Make sure to start strong
-        May be weaker with certain cards, YMMV and adjust accordingly!
-  overrides:
-    parameters:
-      model: Smegmma-9B-v1-Q4_K_M.gguf
-  files:
-    - filename: Smegmma-9B-v1-Q4_K_M.gguf
-      uri: huggingface://bartowski/Smegmma-9B-v1-GGUF/Smegmma-9B-v1-Q4_K_M.gguf
-      sha256: abd9da0a6bf5cbc0ed6bb0d7e3ee7aea3f6b1edbf8c64e51d0fa25001975aed7
+      sha256: c70fd20caec79fb953b83031c46ddea4e99905835a66af7b8a856aa1b2534614
 - &llama3
  url: "github:mudler/LocalAI/gallery/llama3-instruct.yaml@master"
  icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png
@@ -618,8 +475,8 @@
      model: Llama-3-SauerkrautLM-8b-Instruct-Q4_K_M.gguf
  files:
    - filename: Llama-3-SauerkrautLM-8b-Instruct-Q4_K_M.gguf
+      sha256: 5833d99d5596cade0d02e61cddaa6dac49170864ee56d0b602933c6f9fbae314
      uri: huggingface://bartowski/Llama-3-SauerkrautLM-8b-Instruct-GGUF/Llama-3-SauerkrautLM-8b-Instruct-Q4_K_M.gguf
-      sha256: e5ae69b6f59b3f207fa6b435490286b365add846a310c46924fa784b5a7d73e3
 - !!merge <<: *llama3
  name: "llama-3-13b-instruct-v0.1"
  urls:
@@ -1473,31 +1330,6 @@
    - filename: llama3-8B-DarkIdol-2.1-Uncensored-1048K-Q4_K_M-imat.gguf
      sha256: 86f0f1e10fc315689e09314aebb7354bb40d8fe95de008d21a75dc8fff1cd2fe
      uri: huggingface://LWDCLS/llama3-8B-DarkIdol-2.1-Uncensored-1048K-GGUF-IQ-Imatrix-Request/llama3-8B-DarkIdol-2.1-Uncensored-1048K-Q4_K_M-imat.gguf
- !!merge <<: *llama3
-  name: "llama3-8b-darkidol-2.2-uncensored-1048k-iq-imatrix"
-  urls:
-    - https://huggingface.co/aifeifei798/llama3-8B-DarkIdol-2.2-Uncensored-1048K
-    - https://huggingface.co/LWDCLS/llama3-8B-DarkIdol-2.2-Uncensored-1048K-GGUF-IQ-Imatrix-Request
-  icon: https://huggingface.co/aifeifei798/llama3-8B-DarkIdol-2.2-Uncensored-1048K/resolve/main/llama3-8B-DarkIdol-2.2-Uncensored-1048K.png
-  description: |
-    The module combination has been readjusted to better fulfill various roles and has been adapted for mobile phones.
-
-    - Saving money(LLama 3)
-    - Uncensored
-    - Quick response
-    - The underlying model used is winglian/Llama-3-8b-1048k-PoSE
-    - A scholarly response akin to a thesis.(I tend to write songs extensively, to the point where one song almost becomes as detailed as a thesis. :)
-    - DarkIdol:Roles that you can imagine and those that you cannot imagine.
-    - Roleplay
-    - Specialized in various role-playing scenarios more look at test role. (https://huggingface.co/aifeifei798/llama3-8B-DarkIdol-1.2/tree/main/test)
-    - more look at LM Studio presets (https://huggingface.co/aifeifei798/llama3-8B-DarkIdol-1.2/tree/main/config-presets)
-  overrides:
-    parameters:
-      model: llama3-8B-DarkIdol-2.2-Uncensored-1048K-Q4_K_M-imat.gguf
-  files:
-    - filename: llama3-8B-DarkIdol-2.2-Uncensored-1048K-Q4_K_M-imat.gguf
-      sha256: 7714947799d4e6984cf9106244ee24aa821778936ad1a81023480a774e255f52
-      uri: huggingface://LWDCLS/llama3-8B-DarkIdol-2.2-Uncensored-1048K-GGUF-IQ-Imatrix-Request/llama3-8B-DarkIdol-2.2-Uncensored-1048K-Q4_K_M-imat.gguf
 - !!merge <<: *llama3
  name: "llama3-turbcat-instruct-8b"
  urls:
@@ -1513,76 +1345,6 @@
    - filename: llama3-turbcat-instruct-8b-Q4_K_M.gguf
      sha256: a9a36e3220d901a8ad80c75608a81aaeed3a9cdf111247462bf5e3443aad5461
      uri: huggingface://bartowski/llama3-turbcat-instruct-8b-GGUF/llama3-turbcat-instruct-8b-Q4_K_M.gguf
- !!merge <<: *llama3
-  name: "l3-8b-everything-cot"
-  urls:
-    - https://huggingface.co/FPHam/L3-8B-Everything-COT
-    - https://huggingface.co/bartowski/L3-8B-Everything-COT-GGUF
-  icon: https://huggingface.co/FPHam/L3-8B-Everything-COT/resolve/main/cot2.png
-  description: |
-    Everything COT is an investigative self-reflecting general model that uses Chain of Thought for everything. And I mean everything.
-
-    Instead of confidently proclaiming something (or confidently hallucinating other things) like most models, it caries an internal dialogue with itself and often cast doubts over uncertain topics while looking at it from various sides.
-  overrides:
-    parameters:
-      model: L3-8B-Everything-COT-Q4_K_M.gguf
-  files:
-    - filename: L3-8B-Everything-COT-Q4_K_M.gguf
-      sha256: b220b0e2f8fb1c8a491d10dbd054269ed078ee5e2e62dc9d2e3b97b06f52e987
-      uri: huggingface://bartowski/L3-8B-Everything-COT-GGUF/L3-8B-Everything-COT-Q4_K_M.gguf
- !!merge <<: *llama3
-  name: "llama-3-llamilitary"
-  urls:
-    - https://huggingface.co/Heralax/llama-3-llamilitary
-    - https://huggingface.co/mudler/llama-3-llamilitary-Q4_K_M-GGUF
-  icon: https://cdn-uploads.huggingface.co/production/uploads/64825ebceb4befee377cf8ac/ea2C9laq24V6OuxwhzJZS.png
-  description: |
-    This is a model trained on [instruct data generated from old historical war books] as well as on the books themselves, with the goal of creating a joke LLM knowledgeable about the (long gone) kind of warfare involving muskets, cavalry, and cannon.
-
-    This model can provide good answers, but it turned out to be pretty fragile during conversation for some reason: open-ended questions can make it spout nonsense. Asking facts is more reliable but not guaranteed to work.
-
-    The basic guide to getting good answers is: be specific with your questions. Use specific terms and define a concrete scenario, if you can, otherwise the LLM will often hallucinate the rest. I think the issue was that I did not train with a large enough system prompt: not enough latent space is being activated by default. (I'll try to correct this in future runs).
-  overrides:
-    parameters:
-      model: llama-3-llamilitary-q4_k_m.gguf
-  files:
-    - filename: llama-3-llamilitary-q4_k_m.gguf
-      sha256: f3684f2f0845f9aead884fa9a52ea67bed53856ebeedef1620ca863aba57e458
-      uri: huggingface://mudler/llama-3-llamilitary-Q4_K_M-GGUF/llama-3-llamilitary-q4_k_m.gguf
- !!merge <<: *llama3
-  name: "l3-stheno-maid-blackroot-grand-horror-16b"
-  urls:
-    - https://huggingface.co/DavidAU/L3-Stheno-Maid-Blackroot-Grand-HORROR-16B-GGUF
-  icon: https://huggingface.co/DavidAU/L3-Stheno-Maid-Blackroot-Grand-HORROR-16B-GGUF/resolve/main/hm.jpg
-  description: |
-    Rebuilt and Powered Up.
-
-    WARNING: NSFW. Graphic HORROR. Extreme swearing. UNCENSORED. SMART.
-
-    The author took the original models in "L3-Stheno-Maid-Blackroot 8B" and completely rebuilt it a new pass-through merge (everything preserved) and blew it out to over 16.5 billion parameters - 642 tensors, 71 layers (8B original has 32 layers).
-
-    This is not an "upscale" or "franken merge" but a completely new model based on the models used to construct "L3-Stheno-Maid-Blackroot 8B".
-
-    The result is a take no prisoners, totally uncensored, fiction writing monster and roleplay master as well just about... any general fiction activity "AI guru" including scene generation and scene continuation.
-
-    As a result of the expansion / merge re-build its level of prose and story generation has significantly improved as well as word choice, sentence structure as well as default output levels and lengths.
-
-    It also has a STRONG horror bias, although it will generate content for almost any genre. That being said if there is a "hint" of things going wrong... they will.
-
-    It will also swear (R-18) like there is no tomorrow at times and "dark" characters will be VERY dark so to speak.
-
-    Model is excels in details (real and "constructed"), descriptions, similes and metaphors.
-
-    It can have a sense of humor ... ah... dark humor.
-
-    Because of the nature of this merge most attributes of each of the 3 models will be in this rebuilt 16.5B model as opposed to the original 8B model where some of one or more of the model's features and/or strengths maybe reduced or overshadowed.
-  overrides:
-    parameters:
-      model: L3-Stheno-Maid-Blackroot-Grand-HORROR-16B-D_AU-Q4_K_M.gguf
-  files:
-    - filename: L3-Stheno-Maid-Blackroot-Grand-HORROR-16B-D_AU-Q4_K_M.gguf
-      sha256: ae29f38d73dfb04415821405cf8b319fc42d78d0cdd0da91db147d12e68030fe
-      uri: huggingface://DavidAU/L3-Stheno-Maid-Blackroot-Grand-HORROR-16B-GGUF/L3-Stheno-Maid-Blackroot-Grand-HORROR-16B-D_AU-Q4_K_M.gguf
 - &dolphin
  name: "dolphin-2.9-llama3-8b"
  url: "github:mudler/LocalAI/gallery/hermes-2-pro-mistral.yaml@master"
@@ -2379,176 +2141,6 @@
    - filename: Llama-3-Update-3.0-mmproj-model-f16.gguf
      sha256: 3d2f36dff61d6157cadf102df86a808eb9f8a230be1bc0bc99039d81a895468a
      uri: huggingface://Nitral-AI/Llama-3-Update-3.0-mmproj-model-f16/Llama-3-Update-3.0-mmproj-model-f16.gguf
- !!merge <<: *llama3
-  name: "llama-3_8b_unaligned_alpha"
-  urls:
-    - https://huggingface.co/SicariusSicariiStuff/LLAMA-3_8B_Unaligned_Alpha
-    - https://huggingface.co/bartowski/LLAMA-3_8B_Unaligned_Alpha-GGUF
-  description: |
-      Model card description:
-      As of June 11, 2024, I've finally started training the model! The training is progressing smoothly, although it will take some time. I used a combination of model merges and an abliterated model as base, followed by a comprehensive deep unalignment protocol to unalign the model to its core. A common issue with uncensoring and unaligning models is that it often significantly impacts their base intelligence. To mitigate these drawbacks, I've included a substantial corpus of common sense, theory of mind, and various other elements to counteract the effects of the deep uncensoring process. Given the extensive corpus involved, the training will require at least a week of continuous training. Expected early results: in about 3-4 days.
-      Additional info:
-      As of June 13, 2024, I've observed that even after two days of continuous training, the model is still resistant to learning certain aspects.
-      For example, some of the validation data still shows a loss over , whereas other parts have a loss of < or lower. This is after the model was initially abliterated.
-      June 18, 2024 Update, After extensive testing of the intermediate checkpoints, significant progress has been made.
-      The model is slowly — I mean, really slowly — unlearning its alignment. By significantly lowering the learning rate, I was able to visibly observe deep behavioral changes, this process is taking longer than anticipated, but it's going to be worth it. Estimated time to completion: 4 more days.. I'm pleased to report that in several tests, the model not only maintained its intelligence but actually showed a slight improvement, especially in terms of common sense. An intermediate checkpoint of this model was used to create invisietch/EtherealRainbow-v0.3-rc7, with promising results. Currently, it seems like I'm on the right track. I hope this model will serve as a solid foundation for further merges, whether for role-playing (RP) or for uncensoring. This approach also allows us to save on actual fine-tuning, thereby reducing our carbon footprint. The merge process takes just a few minutes of CPU time, instead of days of GPU work.
-      June 20, 2024 Update, Unaligning was partially successful, and the results are decent, but I am not fully satisfied. I decided to bite the bullet, and do a full finetune, god have mercy on my GPUs. I am also releasing the intermediate checkpoint of this model.
-  icon: https://i.imgur.com/Kpk1PgZ.png
-  overrides:
-    parameters:
-      model: LLAMA-3_8B_Unaligned_Alpha-Q4_K_M.gguf
-  files:
-    - filename: LLAMA-3_8B_Unaligned_Alpha-Q4_K_M.gguf
-      sha256: 93ddb5f9f525586d2578186c61e39f96461c26c0b38631de89aa30b171774515
-      uri: huggingface://bartowski/LLAMA-3_8B_Unaligned_Alpha-GGUF/LLAMA-3_8B_Unaligned_Alpha-Q4_K_M.gguf
- !!merge <<: *llama3
-  name: "l3-8b-lunaris-v1"
-  urls:
-    - https://huggingface.co/Sao10K/L3-8B-Lunaris-v1
-    - https://huggingface.co/bartowski/L3-8B-Lunaris-v1-GGUF
-  description: |
-      A generalist / roleplaying model merge based on Llama 3. Models are selected from my personal experience while using them.
-
-      I personally think this is an improvement over Stheno v3.2, considering the other models helped balance out its creativity and at the same time improving its logic.
-  overrides:
-    parameters:
-      model: L3-8B-Lunaris-v1-Q4_K_M.gguf
-  files:
-    - filename: L3-8B-Lunaris-v1-Q4_K_M.gguf
-      sha256: ef1d393f125be8c608859eeb4f26185ad90c7fc9cba41c96e847e77cdbcada18
-      uri: huggingface://bartowski/L3-8B-Lunaris-v1-GGUF/L3-8B-Lunaris-v1-Q4_K_M.gguf
- !!merge <<: *llama3
-  name: "llama-3_8b_unaligned_alpha_rp_soup-i1"
-  icon: https://i.imgur.com/pXcjpoV.png
-  urls:
-    - https://huggingface.co/SicariusSicariiStuff/LLAMA-3_8B_Unaligned_Alpha_RP_Soup
-    - https://huggingface.co/mradermacher/LLAMA-3_8B_Unaligned_Alpha_RP_Soup-i1-GGUF
-  description: |
-    Censorship level: Medium
-
-    This model is the outcome of multiple merges, starting with the base model SicariusSicariiStuff/LLAMA-3_8B_Unaligned_Alpha. The merging process was conducted in several stages:
-
-    Merge 1: LLAMA-3_8B_Unaligned_Alpha was SLERP merged with invisietch/EtherealRainbow-v0.3-8B.
-    Merge 2: LLAMA-3_8B_Unaligned_Alpha was SLERP merged with TheDrummer/Llama-3SOME-8B-v2.
-    Soup 1: Merge 1 was combined with Merge 2.
-    Final Merge: Soup 1 was SLERP merged with Nitral-Archive/Hathor_Enigmatica-L3-8B-v0.4.
-
-    The final model is surprisingly coherent (although slightly more censored), which is a bit unexpected, since all the intermediate merge steps were pretty incoherent.
-  overrides:
-    parameters:
-      model: LLAMA-3_8B_Unaligned_Alpha_RP_Soup.i1-Q4_K_M.gguf
-  files:
-    - filename: LLAMA-3_8B_Unaligned_Alpha_RP_Soup.i1-Q4_K_M.gguf
-      sha256: 94347eb5125d9092e286730ae0ccc78374d68663c16ad2265005d8721eb8807b
-      uri: huggingface://mradermacher/LLAMA-3_8B_Unaligned_Alpha_RP_Soup-i1-GGUF/LLAMA-3_8B_Unaligned_Alpha_RP_Soup.i1-Q4_K_M.gguf
- !!merge <<: *llama3
-  name: "hathor_respawn-l3-8b-v0.8"
-  icon: https://cdn-uploads.huggingface.co/production/uploads/642265bc01c62c1e4102dc36/sWyipsXI-Wl-uEm57SRwM.png
-  urls:
-    - https://huggingface.co/Nitral-AI/Hathor_Respawn-L3-8B-v0.8
-    - https://huggingface.co/bartowski/Hathor_Respawn-L3-8B-v0.8-GGUF
-  description: |
-    Hathor_Aleph-v0.8 is a model based on the LLaMA 3 architecture: Designed to seamlessly integrate the qualities of creativity, intelligence, and robust performance. Making it an ideal tool for a wide range of applications; such as creative writing, educational support and human/computer interaction.
-    Hathor 0.8 is trained on 3 epochs of Private RP, STEM (Intruction/Dialogs), Opus instructons, mixture light/classical novel data, roleplaying chat pairs over llama 3 8B instruct.
-  overrides:
-    parameters:
-      model: Hathor_Respawn-L3-8B-v0.8-Q4_K_M.gguf
-  files:
-    - filename: Hathor_Respawn-L3-8B-v0.8-Q4_K_M.gguf
-      sha256: d0cdfa8951ee80b252bf1dc183403ca9b48bc3de1578cb8e7fe321af753e661c
-      uri: huggingface://bartowski/Hathor_Respawn-L3-8B-v0.8-GGUF/Hathor_Respawn-L3-8B-v0.8-Q4_K_M.gguf
- !!merge <<: *llama3
-  name: "llama3-8b-instruct-replete-adapted"
-  icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/-0dERC793D9XeFsJ9uHbx.png
-  urls:
-    - https://huggingface.co/Replete-AI/Llama3-8B-Instruct-Replete-Adapted
-    - https://huggingface.co/bartowski/Llama3-8B-Instruct-Replete-Adapted-GGUF
-  description: |
-    Replete-Coder-llama3-8b is a general purpose model that is specially trained in coding in over 100 coding languages. The data used to train the model contains 25% non-code instruction data and 75% coding instruction data totaling up to 3.9 million lines, roughly 1 billion tokens, or 7.27gb of instruct data. The data used to train this model was 100% uncensored, then fully deduplicated, before training happened.
-
-    More than just a coding model!
-
-    Although Replete-Coder has amazing coding capabilities, its trained on vaste amount of non-coding data, fully cleaned and uncensored. Dont just use it for coding, use it for all your needs! We are truly trying to make the GPT killer!
-  overrides:
-    parameters:
-      model: Llama3-8B-Instruct-Replete-Adapted-Q4_K_M.gguf
-  files:
-    - filename: Llama3-8B-Instruct-Replete-Adapted-Q4_K_M.gguf
-      sha256: 9e9a142f6fb5fc812b17bfc30230582ae50ac22b93dea696b6887cde815c1cb4
-      uri: huggingface://bartowski/Llama3-8B-Instruct-Replete-Adapted-GGUF/Llama3-8B-Instruct-Replete-Adapted-Q4_K_M.gguf
- !!merge <<: *llama3
-  name: "llama-3-perky-pat-instruct-8b"
-  urls:
-    - https://huggingface.co/grimjim/Llama-3-Perky-Pat-Instruct-8B
-    - https://huggingface.co/bartowski/Llama-3-Perky-Pat-Instruct-8B-GGUF
-  description: |
-    we explore negative weight merger, and propose Orthogonalized Vector Adaptation, or OVA.
-
-    This is a merge of pre-trained language models created using mergekit.
-
-    "One must imagine Sisyphys happy."
-
-    Task arithmetic was used to invert the intervention vector that was applied in MopeyMule, via application of negative weight -1.0. The combination of model weights (Instruct - MopeyMule) comprises an Orthogonalized Vector Adaptation that can subsequently be applied to the base Instruct model, and could in principle be applied to other models derived from fine-tuning the Instruct model.
-
-    This model is meant to continue exploration of behavioral changes that can be achieved via orthogonalized steering. The result appears to be more enthusiastic and lengthy responses in chat, though it is also clear that the merged model has some unhealed damage.
-
-    Built with Meta Llama 3.
-  overrides:
-    parameters:
-      model: Llama-3-Perky-Pat-Instruct-8B-Q4_K_M.gguf
-  files:
-    - filename: Llama-3-Perky-Pat-Instruct-8B-Q4_K_M.gguf
-      sha256: b0eae5d9d58a7101a30693c267097a90f4a005c81fda801b40ab2c25e788a93e
-      uri: huggingface://bartowski/Llama-3-Perky-Pat-Instruct-8B-GGUF/Llama-3-Perky-Pat-Instruct-8B-Q4_K_M.gguf
- !!merge <<: *llama3
-  name: "l3-uncen-merger-omelette-rp-v0.2-8b"
-  icon: https://cdn-uploads.huggingface.co/production/uploads/65d4cf2693a0a3744a27536c/m0YKWwK9n7w8rnKOzduu4.png
-  urls:
-    - https://huggingface.co/Casual-Autopsy/L3-Uncen-Merger-Omelette-RP-v0.2-8B
-    - https://huggingface.co/LWDCLS/L3-Uncen-Merger-Omelette-RP-v0.2-8B-GGUF-IQ-Imatrix-Request
-  description: |
-    L3-Uncen-Merger-Omelette-RP-v0.2-8B is a merge of the following models using LazyMergekit:
-
-        Sao10K/L3-8B-Stheno-v3.2
-        Casual-Autopsy/L3-Umbral-Mind-RP-v1.0-8B
-        bluuwhale/L3-SthenoMaidBlackroot-8B-V1
-        Cas-Warehouse/Llama-3-Mopeyfied-Psychology-v2
-        migtissera/Llama-3-8B-Synthia-v3.5
-        tannedbum/L3-Nymeria-Maid-8B
-        Casual-Autopsy/L3-Umbral-Mind-RP-v0.3-8B
-        tannedbum/L3-Nymeria-8B
-        ChaoticNeutrals/Hathor_RP-v.01-L3-8B
-        cgato/L3-TheSpice-8b-v0.8.3
-        Sao10K/L3-8B-Stheno-v3.1
-        Nitral-AI/Hathor_Stable-v0.2-L3-8B
-        aifeifei798/llama3-8B-DarkIdol-1.0
-        ChaoticNeutrals/Poppy_Porpoise-1.4-L3-8B
-        ResplendentAI/Nymph_8B
-  overrides:
-    parameters:
-      model: L3-Uncen-Merger-Omelette-RP-v0.2-8B-Q4_K_M-imat.gguf
-  files:
-    - filename: L3-Uncen-Merger-Omelette-RP-v0.2-8B-Q4_K_M-imat.gguf
-      sha256: 6bbc42a4c3b25f2b854d76a6e32746b9b3b21dd8856f8f2bc1a5b1269aa8fca1
-      uri: huggingface://LWDCLS/L3-Uncen-Merger-Omelette-RP-v0.2-8B-GGUF-IQ-Imatrix-Request/L3-Uncen-Merger-Omelette-RP-v0.2-8B-Q4_K_M-imat.gguf
- !!merge <<: *llama3
-  name: "nymph_8b-i1"
-  icon: https://cdn-uploads.huggingface.co/production/uploads/626dfb8786671a29c715f8a9/9U_eJCDzLJ8nxb6qfuICc.jpeg
-  urls:
-    - https://huggingface.co/ResplendentAI/Nymph_8B
-    - https://huggingface.co/mradermacher/Nymph_8B-i1-GGUF?not-for-all-audiences=true
-  description: |
-    Model card:
-    Nymph is the culmination of everything I have learned with the T-series project. This model aims to be a unique and full featured RP juggernaut.
-
-    The finetune incorporates 1.6 Million tokens of RP data sourced from Bluemoon, FreedomRP, Aesir-Preview, and Claude Opus logs. I made sure to use the multi-turn sharegpt datasets this time instead of alpaca conversions. I have also included three of my personal datasets. The final touch is an ORPO based upon Openhermes Roleplay preferences.
-  overrides:
-    parameters:
-      model: Nymph_8B.i1-Q4_K_M.gguf
-  files:
-    - filename: Nymph_8B.i1-Q4_K_M.gguf
-      sha256: 5b35794539d9cd262720f47a54f59dbffd5bf6c601950359b5c68d13f1ce13a0
-      uri: huggingface://mradermacher/Nymph_8B-i1-GGUF/Nymph_8B.i1-Q4_K_M.gguf
 - &chatml
  ### ChatML
  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
@@ -2745,33 +2337,6 @@
    - filename: "phi-2-orange.Q4_0.gguf"
      sha256: "49cb710ae688e1b19b1b299087fa40765a0cd677e3afcc45e5f7ef6750975dcf"
      uri: "huggingface://TheBloke/phi-2-orange-GGUF/phi-2-orange.Q4_0.gguf"
-### Internlm2
- name: "internlm2_5-7b-chat-1m"
-  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
-  urls:
-    - https://huggingface.co/internlm/internlm2_5-7b-chat-1m
-    - https://huggingface.co/bartowski/internlm2_5-7b-chat-1m-GGUF
-  icon: https://github.com/InternLM/InternLM/assets/22529082/b9788105-8892-4398-8b47-b513a292378e
-  tags:
-    - internlm2
-    - gguf
-    - cpu
-    - gpu
-  description: |
-    InternLM2.5 has open-sourced a 7 billion parameter base model and a chat model tailored for practical scenarios. The model has the following characteristics:
-
-    Outstanding reasoning capability: State-of-the-art performance on Math reasoning, surpassing models like Llama3 and Gemma2-9B.
-
-    1M Context window: Nearly perfect at finding needles in the haystack with 1M-long context, with leading performance on long-context tasks like LongBench. Try it with LMDeploy for 1M-context inference and a file chat demo.
-
-    Stronger tool use: InternLM2.5 supports gathering information from more than 100 web pages, corresponding implementation will be released in Lagent soon. InternLM2.5 has better tool utilization-related capabilities in instruction following, tool selection and reflection. See examples.
-  overrides:
-    parameters:
-      model: internlm2_5-7b-chat-1m-Q4_K_M.gguf
-  files:
-    - filename: internlm2_5-7b-chat-1m-Q4_K_M.gguf
-      uri: huggingface://bartowski/internlm2_5-7b-chat-1m-GGUF/internlm2_5-7b-chat-1m-Q4_K_M.gguf
-      sha256: 10d5e18a4125f9d4d74a9284a21e0c820b150af06dee48665e54ff6e1be3a564
 - &phi-3
  ### START Phi-3
  url: "github:mudler/LocalAI/gallery/phi-3-chat.yaml@master"
--- a/pkg/model/initializers.go
+++ b/pkg/model/initializers.go
@@ -247,23 +247,14 @@ func selectGRPCProcess(backend, assetDir string, f16 bool) string {
 	}

 	if xsysinfo.HasCPUCaps(cpuid.AVX2) {
-		p := backendPath(assetDir, LLamaCPPAVX2)
-		if _, err := os.Stat(p); err == nil {
-			log.Info().Msgf("[%s] attempting to load with AVX2 variant", backend)
-			grpcProcess = p
-		}
+		log.Info().Msgf("[%s] attempting to load with AVX2 variant", backend)
+		grpcProcess = backendPath(assetDir, LLamaCPPAVX2)
 	} else if xsysinfo.HasCPUCaps(cpuid.AVX) {
-		p := backendPath(assetDir, LLamaCPPAVX)
-		if _, err := os.Stat(p); err == nil {
-			log.Info().Msgf("[%s] attempting to load with AVX variant", backend)
-			grpcProcess = p
-		}
+		log.Info().Msgf("[%s] attempting to load with AVX variant", backend)
+		grpcProcess = backendPath(assetDir, LLamaCPPAVX)
 	} else {
-		p := backendPath(assetDir, LLamaCPPFallback)
-		if _, err := os.Stat(p); err == nil {
-			log.Info().Msgf("[%s] attempting to load with fallback variant", backend)
-			grpcProcess = p
-		}
+		log.Info().Msgf("[%s] attempting to load with fallback variant", backend)
+		grpcProcess = backendPath(assetDir, LLamaCPPFallback)
 	}

 	return grpcProcess
@@ -518,39 +509,6 @@ func (ml *ModelLoader) GreedyLoader(opts ...Option) (grpc.Backend, error) {
 			err = errors.Join(err, fmt.Errorf("backend %s returned no usable model", key))
 			log.Info().Msgf("[%s] Fails: %s", key, "backend returned no usable model")
 		}
-
-		if autoDetect && key == LLamaCPP && err != nil {
-			// try as hard as possible to run the llama.cpp variants
-			backendToUse := ""
-			if xsysinfo.HasCPUCaps(cpuid.AVX2) {
-				if _, err := os.Stat(backendPath(o.assetDir, LLamaCPPAVX2)); err == nil {
-					backendToUse = LLamaCPPAVX2
-				}
-			} else if xsysinfo.HasCPUCaps(cpuid.AVX) {
-				if _, err := os.Stat(backendPath(o.assetDir, LLamaCPPAVX2)); err == nil {
-					backendToUse = LLamaCPPAVX
-				}
-			} else {
-				if _, err := os.Stat(backendPath(o.assetDir, LLamaCPPFallback)); err == nil {
-					backendToUse = LLamaCPPFallback
-				} else {
-					// If we don't have a fallback, just skip fallback
-					continue
-				}
-			}
-
-			// Autodetection failed, try the fallback
-			log.Info().Msgf("[%s] Autodetection failed, trying the fallback", key)
-			options = append(options, WithBackendString(backendToUse))
-			model, modelerr = ml.BackendLoader(options...)
-			if modelerr == nil && model != nil {
-				log.Info().Msgf("[%s] Loads OK", key)
-				return model, nil
-			} else {
-				err = errors.Join(err, fmt.Errorf("[%s]: %w", key, modelerr))
-				log.Info().Msgf("[%s] Fails: %s", key, modelerr.Error())
-			}
-		}
 	}

 	return nil, fmt.Errorf("could not load model - all backends returned error: %s", err.Error())
--- a/scripts/prepare-libs.sh
+++ b/scripts/prepare-libs.sh
@@ -1,23 +0,0 @@
-#!/bin/bash
-
-mkdir -p backend-assets/lib
-
-OS="$(uname)"
-
-if [ "$OS" == "Darwin" ]; then
-    LIBS="$(otool -L $1 | awk 'NR > 1 { system("echo " $1) } ' | xargs echo)"
-elif [ "$OS" == "Linux" ]; then
-    LIBS="$(ldd $1 | awk 'NF == 4 { system("echo " $3) } ' | xargs echo)"
-else
-    echo "Unsupported OS"
-    exit 1
-fi
-
-for lib in $LIBS; do
-  cp -f $lib backend-assets/lib
-done
-
-echo "==============================="
-echo "Copied libraries to backend-assets/lib"
-echo "$LIBS"
-echo "==============================="