Merge branch 'master' into fix_aarch64

fix(cuda): downgrade default version from 12.5 to 12.4 (#2707 )
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-05-24 16:51:44 -04:00 · 2024-07-09 23:15:00 +02:00 · 2024-07-09 23:13:29 +02:00 · 2024-07-09 23:12:19 +02:00 · 2024-07-09 23:11:51 +02:00 · 2024-07-09 23:10:02 +02:00
200 changed files with 4544 additions and 1412 deletions
--- a/.github/ci/modelslist.go
+++ b/.github/ci/modelslist.go
@@ -75,7 +75,7 @@ var modelPageTemplate string = `
    <div class="container mx-auto px-4 py-4">
        <div class="flex items-center justify-between">
            <div class="flex items-center">
-                <a href="/" class="text-white text-xl font-bold"><img src="https://github.com/go-skynet/LocalAI/assets/2420543/0966aa2a-166e-4f99-a3e5-6c915fc997dd" alt="LocalAI Logo" class="h-10 mr-3 border-2 border-gray-300 shadow rounded"></a>
+                <a href="/" class="text-white text-xl font-bold"><img src="https://github.com/mudler/LocalAI/assets/2420543/0966aa2a-166e-4f99-a3e5-6c915fc997dd" alt="LocalAI Logo" class="h-10 mr-3 border-2 border-gray-300 shadow rounded"></a>
                <a href="/" class="text-white text-xl font-bold">LocalAI</a>
            </div>
            <!-- Menu button for small screens -->
@@ -114,12 +114,12 @@ var modelPageTemplate string = `
 	<h2 class="text-center text-3xl font-semibold text-gray-100">
-	 🖼️ Available {{.AvailableModels}} models</i> repositories     <a href="https://localai.io/models/" target="_blank" >
+	 🖼️ Available {{.AvailableModels}} models</i> <a href="https://localai.io/models/" target="_blank" >
 			<i class="fas fa-circle-info pr-2"></i>
 		</a></h2>
 	<h3>
-	Refer to <a href="https://localai.io/models" target=_blank> Model gallery</a> for more information on how to use the models with LocalAI.
+	Refer to the Model gallery <a href="https://localai.io/models/" target="_blank" ><i class="fas fa-circle-info pr-2"></i></a> for more information on how to use the models with LocalAI.<br>
 	You can install models with the CLI command <code>local-ai models install <model-name></code>. or by using the WebUI.
 	</h3>
--- a/.github/release.yml
+++ b/.github/release.yml
@@ -13,6 +13,9 @@ changelog:
      labels:
        - bug
        - regression
    - title: "🖧 P2P area"
      labels:
         - area/p2p
    - title: Exciting New Features 🎉
      labels:
        - Semver-Minor
--- a/.github/workflows/image-pr.yml
+++ b/.github/workflows/image-pr.yml
@@ -32,7 +32,7 @@ jobs:
    strategy:
      # Pushing with all jobs in parallel
      # eats the bandwidth of all the nodes
-      max-parallel: ${{ github.event_name != 'pull_request' && 2 || 4 }}
+      max-parallel: ${{ github.event_name != 'pull_request' && 4 || 8 }}
      matrix:
        include:
          - build-type: ''
@@ -46,7 +46,7 @@ jobs:
            makeflags: "--jobs=3 --output-sync=target"
          - build-type: 'cublas'
            cuda-major-version: "12"
-            cuda-minor-version: "1"
+            cuda-minor-version: "4"
            platforms: 'linux/amd64'
            tag-latest: 'false'
            tag-suffix: '-cublas-cuda12-ffmpeg'
@@ -119,7 +119,7 @@ jobs:
            makeflags: "--jobs=3 --output-sync=target"
          - build-type: 'cublas'
            cuda-major-version: "12"
-            cuda-minor-version: "1"
+            cuda-minor-version: "4"
            platforms: 'linux/amd64'
            tag-latest: 'false'
            tag-suffix: '-cublas-cuda12-ffmpeg-core'
@@ -128,3 +128,12 @@ jobs:
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
            makeflags: "--jobs=4 --output-sync=target"
          - build-type: 'vulkan'
            platforms: 'linux/amd64'
            tag-latest: 'false'
            tag-suffix: '-vulkan-ffmpeg-core'
            ffmpeg: 'true'
            image-type: 'core'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
            makeflags: "--jobs=4 --output-sync=target"
--- a/.github/workflows/image.yml
+++ b/.github/workflows/image.yml
@@ -39,7 +39,7 @@ jobs:
    strategy:
      # Pushing with all jobs in parallel
      # eats the bandwidth of all the nodes
-      max-parallel: ${{ github.event_name != 'pull_request' && 2 || 4 }}
+      max-parallel: ${{ github.event_name != 'pull_request' && 6 || 10 }}
      matrix:
        include:
          # Extra images
@@ -75,7 +75,7 @@ jobs:
            makeflags: "--jobs=3 --output-sync=target"
          - build-type: 'cublas'
            cuda-major-version: "12"
-            cuda-minor-version: "1"
+            cuda-minor-version: "4"
            platforms: 'linux/amd64'
            tag-latest: 'false'
            tag-suffix: '-cublas-cuda12'
@@ -100,7 +100,7 @@ jobs:
            makeflags: "--jobs=3 --output-sync=target"
          - build-type: 'cublas'
            cuda-major-version: "12"
-            cuda-minor-version: "1"
+            cuda-minor-version: "4"
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-cublas-cuda12-ffmpeg'
@@ -257,6 +257,7 @@ jobs:
      quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
      quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
    strategy:
      max-parallel: ${{ github.event_name != 'pull_request' && 2 || 4 }}
      matrix:
        include:
          - build-type: ''
@@ -266,7 +267,7 @@ jobs:
            ffmpeg: 'true'
            image-type: 'core'
            base-image: "ubuntu:22.04"
-            runs-on: 'ubuntu-latest'
+            runs-on: 'arc-runner-set'
            aio: "-aio-cpu"
            latest-image: 'latest-cpu'
            latest-image-aio: 'latest-aio-cpu'
@@ -280,18 +281,18 @@ jobs:
            ffmpeg: ''
            image-type: 'core'
            base-image: "ubuntu:22.04"
-            runs-on: 'ubuntu-latest'
+            runs-on: 'arc-runner-set'
            makeflags: "--jobs=4 --output-sync=target"
          - build-type: 'cublas'
            cuda-major-version: "12"
-            cuda-minor-version: "1"
+            cuda-minor-version: "4"
            platforms: 'linux/amd64'
            tag-latest: 'false'
            tag-suffix: '-cublas-cuda12-core'
            ffmpeg: ''
            image-type: 'core'
            base-image: "ubuntu:22.04"
-            runs-on: 'ubuntu-latest'
+            runs-on: 'arc-runner-set'
            makeflags: "--jobs=4 --output-sync=target"
          - build-type: 'cublas'
            cuda-major-version: "11"
@@ -301,17 +302,27 @@ jobs:
            tag-suffix: '-cublas-cuda11-ffmpeg-core'
            ffmpeg: 'true'
            image-type: 'core'
-            runs-on: 'ubuntu-latest'
+            runs-on: 'arc-runner-set'
            base-image: "ubuntu:22.04"
            makeflags: "--jobs=4 --output-sync=target"
          - build-type: 'cublas'
            cuda-major-version: "12"
-            cuda-minor-version: "1"
+            cuda-minor-version: "4"
            platforms: 'linux/amd64'
            tag-latest: 'false'
            tag-suffix: '-cublas-cuda12-ffmpeg-core'
            ffmpeg: 'true'
            image-type: 'core'
-            runs-on: 'ubuntu-latest'
+            runs-on: 'arc-runner-set'
            base-image: "ubuntu:22.04"
            makeflags: "--jobs=4 --output-sync=target"
          - build-type: 'vulkan'
            platforms: 'linux/amd64'
            tag-latest: 'false'
            tag-suffix: '-vulkan-ffmpeg-core'
            latest-image: 'latest-vulkan-ffmpeg-core'
            ffmpeg: 'true'
            image-type: 'core'
            runs-on: 'arc-runner-set'
            base-image: "ubuntu:22.04"
            makeflags: "--jobs=4 --output-sync=target"
--- a/.github/workflows/image_build.yml
+++ b/.github/workflows/image_build.yml
@@ -19,11 +19,11 @@ on:
        type: string
      cuda-major-version:
        description: 'CUDA major version'
-        default: "11"
+        default: "12"
        type: string
      cuda-minor-version:
        description: 'CUDA minor version'
-        default: "7"
+        default: "4"
        type: string
      platforms:
        description: 'Platforms'
--- a/.github/workflows/notify-models.yaml
+++ b/.github/workflows/notify-models.yaml
@@ -0,0 +1,170 @@
 name: Notifications for new models
 on:
  pull_request:
     types:
       - closed
 jobs:
  notify-discord:
    if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'area/ai-model')) }}
    env:
        MODEL_NAME: hermes-2-theta-llama-3-8b
    runs-on: ubuntu-latest
    steps:
    - uses: actions/checkout@v4
      with:
        fetch-depth: 0 # needed to checkout all branches for this Action to work
    - name: Start LocalAI
      run: |
        echo "Starting LocalAI..."
        docker run -e -ti -d --name local-ai -p 8080:8080 localai/localai:master-ffmpeg-core run --debug $MODEL_NAME
        until [ "`docker inspect -f {{.State.Health.Status}} local-ai`" == "healthy" ]; do echo "Waiting for container to be ready";  docker logs --tail 10 local-ai; sleep 2; done
      # Check the PR diff using the current branch and the base branch of the PR
    - uses: GrantBirki/git-diff-action@v2.7.0
      id: git-diff-action
      with:
            json_diff_file_output: diff.json
            raw_diff_file_output: diff.txt
            file_output_only: "true"
    - name: Summarize
      env:
        DIFF: ${{ steps.git-diff-action.outputs.raw-diff-path }}
      id: summarize
      run: |
            input="$(cat $DIFF)"
            # Define the LocalAI API endpoint
            API_URL="http://localhost:8080/chat/completions"
            # Create a JSON payload using jq to handle special characters
            json_payload=$(jq -n --arg input "$input" '{
            model: "'$MODEL_NAME'",
            messages: [
                {
                role: "system",
                content: "You are LocalAI-bot. Write a discord message to notify everyone about the new model from the git diff. Make it informal. An example can include: the URL of the model, the name, and a brief description of the model if exists. Also add an hint on how to install it in LocalAI and that can be browsed over https://models.localai.io. For example: local-ai run model_name_here"
                },
                {
                role: "user",
                content: $input
                }
            ]
            }')
            # Send the request to LocalAI
            response=$(curl -s -X POST $API_URL \
            -H "Content-Type: application/json" \
            -d "$json_payload")
            # Extract the summary from the response
            summary="$(echo $response | jq -r '.choices[0].message.content')"
            # Print the summary
            #  -H "Authorization: Bearer $API_KEY" \
            echo "Summary:"
            echo "$summary"
            echo "payload sent"
            echo "$json_payload"
            {
                echo 'message<<EOF'
                echo "$summary"
                echo EOF
              } >> "$GITHUB_OUTPUT"
            docker logs --tail 10 local-ai
    - name: Discord notification
      env:
        DISCORD_WEBHOOK: ${{ secrets.DISCORD_WEBHOOK_URL }}
        DISCORD_USERNAME: "LocalAI-Bot"
        DISCORD_AVATAR: "https://avatars.githubusercontent.com/u/139863280?v=4"
      uses: Ilshidur/action-discord@master
      with:
        args: ${{ steps.summarize.outputs.message }}
    - name: Setup tmate session if fails
      if: ${{ failure() }}
      uses: mxschmitt/action-tmate@v3.18
      with:
        detached: true
        connect-timeout-seconds: 180
        limit-access-to-actor: true
  notify-twitter:
    if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'area/ai-model')) }}
    env:
        MODEL_NAME: hermes-2-theta-llama-3-8b
    runs-on: ubuntu-latest
    steps:
    - uses: actions/checkout@v4
      with:
        fetch-depth: 0 # needed to checkout all branches for this Action to work
    - name: Start LocalAI
      run: |
        echo "Starting LocalAI..."
        docker run -e -ti -d --name local-ai -p 8080:8080 localai/localai:master-ffmpeg-core run --debug $MODEL_NAME
        until [ "`docker inspect -f {{.State.Health.Status}} local-ai`" == "healthy" ]; do echo "Waiting for container to be ready";  docker logs --tail 10 local-ai; sleep 2; done
      # Check the PR diff using the current branch and the base branch of the PR
    - uses: GrantBirki/git-diff-action@v2.7.0
      id: git-diff-action
      with:
            json_diff_file_output: diff.json
            raw_diff_file_output: diff.txt
            file_output_only: "true"
    - name: Summarize
      env:
        DIFF: ${{ steps.git-diff-action.outputs.raw-diff-path }}
      id: summarize
      run: |
            input="$(cat $DIFF)"
            # Define the LocalAI API endpoint
            API_URL="http://localhost:8080/chat/completions"
            # Create a JSON payload using jq to handle special characters
            json_payload=$(jq -n --arg input "$input" '{
            model: "'$MODEL_NAME'",
            messages: [
                {
                role: "system",
                content: "You are LocalAI-bot. Write a twitter message to notify everyone about the new model from the git diff. Make it informal and really short. An example can include: the name, and a brief description of the model if exists. Also add an hint on how to install it in LocalAI. For example: local-ai run model_name_here"
                },
                {
                role: "user",
                content: $input
                }
            ]
            }')
            # Send the request to LocalAI
            response=$(curl -s -X POST $API_URL \
            -H "Content-Type: application/json" \
            -d "$json_payload")
            # Extract the summary from the response
            summary="$(echo $response | jq -r '.choices[0].message.content')"
            # Print the summary
            #  -H "Authorization: Bearer $API_KEY" \
            echo "Summary:"
            echo "$summary"
            echo "payload sent"
            echo "$json_payload"
            {
                echo 'message<<EOF'
                echo "$summary"
                echo EOF
              } >> "$GITHUB_OUTPUT"
            docker logs --tail 10 local-ai
    - uses: Eomm/why-don-t-you-tweet@v2
      with:
        tweet-message: ${{ steps.summarize.outputs.message }}
      env:
        # Get your tokens from https://developer.twitter.com/apps
        TWITTER_CONSUMER_API_KEY: ${{ secrets.TWITTER_APP_KEY }}
        TWITTER_CONSUMER_API_SECRET: ${{ secrets.TWITTER_APP_SECRET }}
        TWITTER_ACCESS_TOKEN: ${{ secrets.TWITTER_ACCESS_TOKEN }}
        TWITTER_ACCESS_TOKEN_SECRET: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }}
    - name: Setup tmate session if fails
      if: ${{ failure() }}
      uses: mxschmitt/action-tmate@v3.18
      with:
        detached: true
        connect-timeout-seconds: 180
        limit-access-to-actor: true
--- a/.github/workflows/notify-releases.yaml
+++ b/.github/workflows/notify-releases.yaml
@@ -0,0 +1,65 @@
 name: Release notifications
 on:
  release:
    types:
      - published
 jobs:
  notify-discord:
    runs-on: ubuntu-latest
    env:
        RELEASE_BODY: ${{ github.event.release.body }}
        RELEASE_TITLE: ${{ github.event.release.name }}
        RELEASE_TAG_NAME: ${{ github.event.release.tag_name }}
    steps:
    - name: Start LocalAI
      run: |
        echo "Starting LocalAI..."
        docker run -e -ti -d --name local-ai -p 8080:8080 localai/localai:master-ffmpeg-core run --debug $MODEL_NAME
        until [ "`docker inspect -f {{.State.Health.Status}} local-ai`" == "healthy" ]; do echo "Waiting for container to be ready";  docker logs --tail 10 local-ai; sleep 2; done
    - name: Summarize
      id: summarize
      run: |
            input="$RELEASE_TITLE\b$RELEASE_BODY"
            # Define the LocalAI API endpoint
            API_URL="http://localhost:8080/chat/completions"
            # Create a JSON payload using jq to handle special characters
            json_payload=$(jq -n --arg input "$input" '{
            model: "'$MODEL_NAME'",
            messages: [
                {
                role: "system",
                content: "Write a discord message with a bullet point summary of the release notes."
                },
                {
                role: "user",
                content: $input
                }
            ]
            }')
            # Send the request to LocalAI API
            response=$(curl -s -X POST $API_URL \
            -H "Content-Type: application/json" \
            -d "$json_payload")
            # Extract the summary from the response
            summary=$(echo $response | jq -r '.choices[0].message.content')
            # Print the summary
            #  -H "Authorization: Bearer $API_KEY" \
            {
                echo 'message<<EOF'
                echo "$summary"
                echo EOF
              } >> "$GITHUB_OUTPUT"
    - name: Discord notification
      env:
        DISCORD_WEBHOOK: ${{ secrets.DISCORD_WEBHOOK_URL_RELEASE }}
        DISCORD_USERNAME: "LocalAI-Bot"
        DISCORD_AVATAR: "https://avatars.githubusercontent.com/u/139863280?v=4"
      uses: Ilshidur/action-discord@master
      with:
        args: ${{ steps.summarize.outputs.message }}
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -31,7 +31,7 @@ jobs:
      - name: Dependencies
        run: |
          sudo apt-get update
-          sudo apt-get install build-essential ffmpeg protobuf-compiler ccache
+          sudo apt-get install build-essential ffmpeg protobuf-compiler ccache gawk
          sudo apt-get install -qy binutils-aarch64-linux-gnu gcc-aarch64-linux-gnu g++-aarch64-linux-gnu
      - name: Install CUDA Dependencies
        run: |
@@ -77,6 +77,16 @@ jobs:
            echo "set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)" >> $CMAKE_CROSS_TOOLCHAIN && \
            echo "set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)" >> $CMAKE_CROSS_TOOLCHAIN
          GRPC_DIR=$PWD/grpc
          # http://google.github.io/googletest/quickstart-cmake.html
          # Seems otherwise cross-arch fails to find it
          echo "include(FetchContent)" >> $GRPC_DIR/CMakeLists.txt
          echo "FetchContent_Declare(" >> $GRPC_DIR/CMakeLists.txt
          echo "  googletest" >> $GRPC_DIR/CMakeLists.txt
          echo "  URL https://github.com/google/googletest/archive/03597a01ee50ed33e9dfd640b249b4be3799d395.zip" >> $GRPC_DIR/CMakeLists.txt
          echo ")" >> $GRPC_DIR/CMakeLists.txt
          echo "FetchContent_MakeAvailable(googletest)" >> $GRPC_DIR/CMakeLists.txt
          cd grpc && cd cmake/build && sudo make --jobs 5 --output-sync=target install && \
          GRPC_CROSS_BUILD_DIR=$GRPC_DIR/cmake/cross_build && \
          mkdir -p $GRPC_CROSS_BUILD_DIR && \
@@ -96,11 +106,18 @@ jobs:
          CROSS_TOOLCHAIN=/usr/$GNU_HOST
          CROSS_STAGING_PREFIX=$CROSS_TOOLCHAIN/stage
          CMAKE_CROSS_TOOLCHAIN=/tmp/arm.toolchain.cmake
          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b
          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
          export PATH=$PATH:$GOPATH/bin
          export PATH=/usr/local/cuda/bin:$PATH
-          GO_TAGS=p2p GOOS=linux GOARCH=arm64 CMAKE_ARGS="-DProtobuf_INCLUDE_DIRS=$CROSS_STAGING_PREFIX/include -DProtobuf_DIR=$CROSS_STAGING_PREFIX/lib/cmake/protobuf -DgRPC_DIR=$CROSS_STAGING_PREFIX/lib/cmake/grpc -DCMAKE_TOOLCHAIN_FILE=$CMAKE_CROSS_TOOLCHAIN -DCMAKE_C_COMPILER=aarch64-linux-gnu-gcc -DCMAKE_CXX_COMPILER=aarch64-linux-gnu-g++" make dist-cross-linux-arm64
+          sudo rm -rf /usr/aarch64-linux-gnu/lib/libstdc++.so.6
          sudo cp -rf /usr/aarch64-linux-gnu/lib/libstdc++.so* /usr/aarch64-linux-gnu/lib/libstdc++.so.6
          sudo cp /usr/aarch64-linux-gnu/lib/ld-linux-aarch64.so.1 ld.so
          BACKEND_LIBS="./grpc/cmake/cross_build/third_party/re2/libre2.a ./grpc/cmake/cross_build/libgrpc.a ./grpc/cmake/cross_build/libgrpc++.a ./grpc/cmake/cross_build/third_party/protobuf/libprotobuf.a /usr/aarch64-linux-gnu/lib/libc.so.6 /usr/aarch64-linux-gnu/lib/libstdc++.so.6 /usr/aarch64-linux-gnu/lib/libgomp.so.1 /usr/aarch64-linux-gnu/lib/libm.so.6 /usr/aarch64-linux-gnu/lib/libgcc_s.so.1 /usr/aarch64-linux-gnu/lib/libdl.so.2 /usr/aarch64-linux-gnu/lib/libpthread.so.0 ./ld.so" \
          GOOS=linux \
          GOARCH=arm64 \
          CMAKE_ARGS="-DProtobuf_INCLUDE_DIRS=$CROSS_STAGING_PREFIX/include -DProtobuf_DIR=$CROSS_STAGING_PREFIX/lib/cmake/protobuf -DgRPC_DIR=$CROSS_STAGING_PREFIX/lib/cmake/grpc -DCMAKE_TOOLCHAIN_FILE=$CMAKE_CROSS_TOOLCHAIN -DCMAKE_C_COMPILER=aarch64-linux-gnu-gcc -DCMAKE_CXX_COMPILER=aarch64-linux-gnu-g++" make dist-cross-linux-arm64
      - uses: actions/upload-artifact@v4
        with:
          name: LocalAI-linux-arm64
@@ -111,7 +128,13 @@ jobs:
        with:
          files: |
            release/*
-
+      - name: Setup tmate session if tests fail
        if: ${{ failure() }}
        uses: mxschmitt/action-tmate@v3.18
        with:
          detached: true
          connect-timeout-seconds: 180
          limit-access-to-actor: true
  build-linux:
    runs-on: arc-runner-set
    steps:
@@ -134,7 +157,7 @@ jobs:
      - name: Dependencies
        run: |
          sudo apt-get update
-          sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache cmake
+          sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache gawk cmake
      - name: Intel Dependencies
        run: |
          wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null
@@ -190,6 +213,7 @@ jobs:
      - name: Install gRPC
        run: |
          cd grpc && cd cmake/build && sudo make --jobs 5 --output-sync=target install
      # BACKEND_LIBS needed for gpu-workload: /opt/intel/oneapi/*/lib/libiomp5.so /opt/intel/oneapi/*/lib/libmkl_core.so /opt/intel/oneapi/*/lib/libmkl_core.so.2 /opt/intel/oneapi/*/lib/libmkl_intel_ilp64.so /opt/intel/oneapi/*/lib/libmkl_intel_ilp64.so.2 /opt/intel/oneapi/*/lib/libmkl_sycl_blas.so /opt/intel/oneapi/*/lib/libmkl_sycl_blas.so.4 /opt/intel/oneapi/*/lib/libmkl_tbb_thread.so /opt/intel/oneapi/*/lib/libmkl_tbb_thread.so.2 /opt/intel/oneapi/*/lib/libsycl.so /opt/intel/oneapi/*/lib/libsycl.so.7 /opt/intel/oneapi/*/lib/libsycl.so.7.1.0 /opt/rocm-*/lib/libamdhip64.so /opt/rocm-*/lib/libamdhip64.so.5 /opt/rocm-*/lib/libamdhip64.so.6 /opt/rocm-*/lib/libamdhip64.so.6.1.60100 /opt/rocm-*/lib/libhipblas.so /opt/rocm-*/lib/libhipblas.so.2 /opt/rocm-*/lib/libhipblas.so.2.1.60100 /opt/rocm-*/lib/librocblas.so /opt/rocm-*/lib/librocblas.so.4 /opt/rocm-*/lib/librocblas.so.4.1.60100 /usr/lib/x86_64-linux-gnu/libstdc++.so.6 /usr/lib/x86_64-linux-gnu/libOpenCL.so.1 /usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 /usr/lib/x86_64-linux-gnu/libm.so.6 /usr/lib/x86_64-linux-gnu/libgcc_s.so.1 /usr/lib/x86_64-linux-gnu/libc.so.6 /usr/lib/x86_64-linux-gnu/librt.so.1 /usr/local/cuda-*/targets/x86_64-linux/lib/libcublas.so /usr/local/cuda-*/targets/x86_64-linux/lib/libcublasLt.so /usr/local/cuda-*/targets/x86_64-linux/lib/libcudart.so /usr/local/cuda-*/targets/x86_64-linux/lib/stubs/libcuda.so
      - name: Build
        id: build
        run: |
@@ -199,7 +223,9 @@ jobs:
          export PATH=/usr/local/cuda/bin:$PATH
          export PATH=/opt/rocm/bin:$PATH
          source /opt/intel/oneapi/setvars.sh
-          GO_TAGS=p2p make -j4 dist
+          sudo cp /lib64/ld-linux-x86-64.so.2 ld.so
          BACKEND_LIBS="./ld.so ./sources/go-piper/piper/build/fi/lib/libfmt.a ./sources/go-piper/piper-phonemize/pi/lib/libonnxruntime.so.1.14.1 ./sources/go-piper/piper-phonemize/pi/src/libespeak-ng/libespeak-ng.so /usr/lib/x86_64-linux-gnu/libdl.so.2 /usr/lib/x86_64-linux-gnu/librt.so.1 /usr/lib/x86_64-linux-gnu/libpthread.so.0 ./sources/go-piper/piper-phonemize/pi/lib/libpiper_phonemize.so.1 ./sources/go-piper/piper/build/si/lib/libspdlog.a ./sources/go-piper/espeak/ei/lib/libucd.so" \
          make -j4 dist
      - uses: actions/upload-artifact@v4
        with:
          name: LocalAI-linux
@@ -210,7 +236,13 @@ jobs:
        with:
          files: |
            release/*
-
+      - name: Setup tmate session if tests fail
        if: ${{ failure() }}
        uses: mxschmitt/action-tmate@v3.18
        with:
          detached: true
          connect-timeout-seconds: 180
          limit-access-to-actor: true
  build-stablediffusion:
    runs-on: ubuntu-latest
    steps:
@@ -246,6 +278,48 @@ jobs:
          files: |
            release/*
  build-macOS-x86_64:
    runs-on: macos-13
    steps:
      - name: Clone
        uses: actions/checkout@v4
        with:
          submodules: true
      - uses: actions/setup-go@v5
        with:
          go-version: '1.21.x'
          cache: false
      - name: Dependencies
        run: |
          brew install protobuf grpc
          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b
          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
      - name: Build
        id: build
        run: |
          export C_INCLUDE_PATH=/usr/local/include
          export CPLUS_INCLUDE_PATH=/usr/local/include
          export PATH=$PATH:$GOPATH/bin
          make dist
      - uses: actions/upload-artifact@v4
        with:
          name: LocalAI-MacOS-x86_64
          path: release/
      - name: Release
        uses: softprops/action-gh-release@v2
        if: startsWith(github.ref, 'refs/tags/')
        with:
          files: |
            release/*
      - name: Setup tmate session if tests fail
        if: ${{ failure() }}
        uses: mxschmitt/action-tmate@v3.18
        with:
          detached: true
          connect-timeout-seconds: 180
          limit-access-to-actor: true
  build-macOS-arm64:
    runs-on: macos-14
    steps:
@@ -268,7 +342,8 @@ jobs:
          export C_INCLUDE_PATH=/usr/local/include
          export CPLUS_INCLUDE_PATH=/usr/local/include
          export PATH=$PATH:$GOPATH/bin
-          GO_TAGS=p2p make dist
+
          make dist
      - uses: actions/upload-artifact@v4
        with:
          name: LocalAI-MacOS-arm64
@@ -279,3 +354,10 @@ jobs:
        with:
          files: |
            release/*
      - name: Setup tmate session if tests fail
        if: ${{ failure() }}
        uses: mxschmitt/action-tmate@v3.18
        with:
          detached: true
          connect-timeout-seconds: 180
          limit-access-to-actor: true
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -220,7 +220,7 @@ jobs:
          export CPLUS_INCLUDE_PATH=/usr/local/include
          # Used to run the newer GNUMake version from brew that supports --output-sync
          export PATH="/opt/homebrew/opt/make/libexec/gnubin:$PATH"
-          BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" make --jobs 4 --output-sync=target test
+          BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DGGML_F16C=OFF -DGGML_AVX512=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF" make --jobs 4 --output-sync=target test
      - name: Setup tmate session if tests fail
        if: ${{ failure() }}
        uses: mxschmitt/action-tmate@v3.18
--- a/26
+++ b/26
@@ -33,7 +33,7 @@ RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | ta
 ENV PATH $PATH:/root/go/bin:/usr/local/go/bin
 # Install grpc compilers
-RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.1 && \
+RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 && \
    go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
 COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/
@@ -98,11 +98,27 @@ RUN pip install --user grpcio-tools
 FROM requirements-${IMAGE_TYPE} AS requirements-drivers
 ARG BUILD_TYPE
-ARG CUDA_MAJOR_VERSION=11
+ARG CUDA_MAJOR_VERSION=12
-ARG CUDA_MINOR_VERSION=8
+ARG CUDA_MINOR_VERSION=4
 ENV BUILD_TYPE=${BUILD_TYPE}
 # Vulkan requirements
 RUN <<EOT bash
    if [ "${BUILD_TYPE}" = "vulkan" ]; then
        apt-get update && \
        apt-get install -y  --no-install-recommends \
                        software-properties-common pciutils wget gpg-agent && \
        wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
        wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
        apt-get update && \
            apt-get install -y \
            vulkan-sdk && \
        apt-get clean && \
        rm -rf /var/lib/apt/lists/*
    fi
 EOT
 # CuBLAS requirements
 RUN <<EOT bash
    if [ "${BUILD_TYPE}" = "cublas" ]; then
@@ -266,6 +282,8 @@ COPY --from=grpc /opt/grpc /usr/local
 # Rebuild with defaults backends
 WORKDIR /build
 ## Build the binary
 RUN make build
 RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \
@@ -292,7 +310,7 @@ ENV REBUILD=false
 ENV HEALTHCHECK_ENDPOINT=http://localhost:8080/readyz
 ENV MAKEFLAGS=${MAKEFLAGS}
-ARG CUDA_MAJOR_VERSION=11
+ARG CUDA_MAJOR_VERSION=12
 ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
 ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
 ENV NVIDIA_VISIBLE_DEVICES=all
--- a/101
+++ b/101
@@ -3,9 +3,11 @@ GOTEST=$(GOCMD) test
 GOVET=$(GOCMD) vet
 BINARY_NAME=local-ai
 DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=172c8256840ffd882ab9992ecedbb587d9b21f15
+CPPLLAMA_VERSION?=c4dd11d1d3903e1922c06242e189f6310fc4d8c3
 # gpt4all version
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
@@ -16,7 +18,7 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
 RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
 # whisper.cpp version
-WHISPER_CPP_VERSION?=b29b3b29240aac8b71ce8e5a4360c1f1562ad66f
+WHISPER_CPP_VERSION?=1c31f9d4a8936aec550e6c4dc9ca5cae3b4f304a
 # bert.cpp version
 BERT_VERSION?=710044b124545415f555e4260d16b146c725a6e4
@@ -33,9 +35,11 @@ TINYDREAM_VERSION?=c04fa463ace9d9a6464313aa5f9cd0f953b6c057
 export BUILD_TYPE?=
 export STABLE_BUILD_TYPE?=$(BUILD_TYPE)
 export CMAKE_ARGS?=
 export BACKEND_LIBS?=
 CGO_LDFLAGS?=
 CGO_LDFLAGS_WHISPER?=
 CGO_LDFLAGS_WHISPER+=-lggml
 CUDA_LIBPATH?=/usr/local/cuda/lib64/
 GO_TAGS?=
 BUILD_ID?=
@@ -49,12 +53,12 @@ RANDOM := $(shell bash -c 'echo $$RANDOM')
 VERSION?=$(shell git describe --always --tags || echo "dev" )
 # go tool nm ./local-ai | grep Commit
 LD_FLAGS?=
-override LD_FLAGS += -X "github.com/go-skynet/LocalAI/internal.Version=$(VERSION)"
+override LD_FLAGS += -X "github.com/mudler/LocalAI/internal.Version=$(VERSION)"
-override LD_FLAGS += -X "github.com/go-skynet/LocalAI/internal.Commit=$(shell git rev-parse HEAD)"
+override LD_FLAGS += -X "github.com/mudler/LocalAI/internal.Commit=$(shell git rev-parse HEAD)"
 OPTIONAL_TARGETS?=
-OS := $(shell uname -s)
+export OS := $(shell uname -s)
 ARCH := $(shell uname -m)
 GREEN  := $(shell tput -Txterm setaf 2)
 YELLOW := $(shell tput -Txterm setaf 3)
@@ -80,29 +84,42 @@ ifeq ($(OS),Darwin)
 		BUILD_TYPE=metal
 	# disable metal if on Darwin and any other value is explicitly passed.
 	else ifneq ($(BUILD_TYPE),metal)
-		CMAKE_ARGS+=-DLLAMA_METAL=OFF
+		CMAKE_ARGS+=-DGGML_METAL=OFF
-		export LLAMA_NO_ACCELERATE=1
+		export GGML_NO_ACCELERATE=1
 		export GGML_NO_METAL=1
 	endif
 	ifeq ($(BUILD_TYPE),metal)
 #			-lcblas 	removed: it seems to always be listed as a duplicate flag.
 		CGO_LDFLAGS += -framework Accelerate
 	endif
 else
 CGO_LDFLAGS_WHISPER+=-lgomp
 endif
 ifeq ($(BUILD_TYPE),openblas)
 	CGO_LDFLAGS+=-lopenblas
-	export WHISPER_OPENBLAS=1
+	export GGML_OPENBLAS=1
 endif
 ifeq ($(BUILD_TYPE),cublas)
 	CGO_LDFLAGS+=-lcublas -lcudart -L$(CUDA_LIBPATH)
-	export LLAMA_CUBLAS=1
+	export GGML_CUDA=1
 	export WHISPER_CUDA=1
 	CGO_LDFLAGS_WHISPER+=-L$(CUDA_LIBPATH)/stubs/ -lcuda -lcufft
 endif
 ifeq ($(BUILD_TYPE),vulkan)
 	CMAKE_ARGS+=-DGGML_VULKAN=1
 endif
 ifneq (,$(findstring sycl,$(BUILD_TYPE)))
 	export GGML_SYCL=1
 endif
 ifeq ($(BUILD_TYPE),sycl_f16)
 	export GGML_SYCL_F16=1
 endif
 ifeq ($(BUILD_TYPE),hipblas)
 	ROCM_HOME ?= /opt/rocm
 	ROCM_PATH ?= /opt/rocm
@@ -111,27 +128,26 @@ ifeq ($(BUILD_TYPE),hipblas)
 	export CC=$(ROCM_HOME)/llvm/bin/clang
 	# llama-ggml has no hipblas support, so override it here.
 	export STABLE_BUILD_TYPE=
-	export WHISPER_HIPBLAS=1
+	export GGML_HIPBLAS=1
 	GPU_TARGETS ?= gfx900,gfx906,gfx908,gfx940,gfx941,gfx942,gfx90a,gfx1030,gfx1031,gfx1100,gfx1101
 	AMDGPU_TARGETS ?= "$(GPU_TARGETS)"
-	CMAKE_ARGS+=-DLLAMA_HIPBLAS=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
+	CMAKE_ARGS+=-DGGML_HIPBLAS=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
 	CGO_LDFLAGS += -O3 --rtlib=compiler-rt -unwindlib=libgcc -lhipblas -lrocblas --hip-link -L${ROCM_HOME}/lib/llvm/lib
 endif
 ifeq ($(BUILD_TYPE),metal)
 	CGO_LDFLAGS+=-framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders
-	export LLAMA_METAL=1
+	export GGML_METAL=1
 	export WHISPER_METAL=1
 endif
 ifeq ($(BUILD_TYPE),clblas)
 	CGO_LDFLAGS+=-lOpenCL -lclblast
-	export WHISPER_CLBLAST=1
+	export GGML_OPENBLAS=1
 endif
 # glibc-static or glibc-devel-static required
 ifeq ($(STATIC),true)
-	LD_FLAGS=-linkmode external -extldflags -static
+	LD_FLAGS+=-linkmode external -extldflags -static
 endif
 ifeq ($(findstring stablediffusion,$(GO_TAGS)),stablediffusion)
@@ -165,6 +181,8 @@ ALL_GRPC_BACKENDS+=backend-assets/grpc/rwkv
 ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper
 ALL_GRPC_BACKENDS+=backend-assets/grpc/local-store
 ALL_GRPC_BACKENDS+=$(OPTIONAL_GRPC)
 # Use filter-out to remove the specified backends
 ALL_GRPC_BACKENDS := $(filter-out $(SKIP_GRPC_BACKEND),$(ALL_GRPC_BACKENDS))
 GRPC_BACKENDS?=$(ALL_GRPC_BACKENDS) $(OPTIONAL_GRPC)
 TEST_PATHS?=./api/... ./pkg/... ./core/...
@@ -244,7 +262,7 @@ sources/whisper.cpp:
 	cd sources/whisper.cpp && git checkout -b build $(WHISPER_CPP_VERSION) && git submodule update --init --recursive --depth 1
 sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
-	cd sources/whisper.cpp && $(MAKE) libwhisper.a
+	cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a
 get-sources: sources/go-llama.cpp sources/gpt4all sources/go-piper sources/go-rwkv.cpp sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream
@@ -313,6 +331,10 @@ build: prepare backend-assets grpcs ## Build the project
 	$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
 	$(info ${GREEN}I GO_TAGS: ${YELLOW}$(GO_TAGS)${RESET})
 	$(info ${GREEN}I LD_FLAGS: ${YELLOW}$(LD_FLAGS)${RESET})
 ifneq ($(BACKEND_LIBS),)
 	$(MAKE) backend-assets/lib
 	cp -f $(BACKEND_LIBS) backend-assets/lib/
 endif
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(BINARY_NAME) ./
 build-minimal:
@@ -321,8 +343,14 @@ build-minimal:
 build-api:
 	BUILD_GRPC_FOR_BACKEND_LLAMA=true BUILD_API_ONLY=true GO_TAGS=none $(MAKE) build
 backend-assets/lib:
 	mkdir -p backend-assets/lib
 dist:
-	STATIC=true $(MAKE) backend-assets/grpc/llama-cpp-avx2
+	$(MAKE) backend-assets/grpc/llama-cpp-avx2
 ifeq ($(DETECT_LIBS),true)
 	scripts/prepare-libs.sh backend-assets/grpc/llama-cpp-avx2
 endif
 ifeq ($(OS),Darwin)
 	$(info ${GREEN}I Skip CUDA/hipblas build on MacOS${RESET})
 else
@@ -331,7 +359,11 @@ else
 	$(MAKE) backend-assets/grpc/llama-cpp-sycl_f16
 	$(MAKE) backend-assets/grpc/llama-cpp-sycl_f32
 endif
-	$(MAKE) build
+	GO_TAGS="tts p2p" $(MAKE) build
 ifeq ($(DETECT_LIBS),true)
 	scripts/prepare-libs.sh backend-assets/grpc/piper
 endif
 	GO_TAGS="tts p2p" STATIC=true $(MAKE) build
 	mkdir -p release
 # if BUILD_ID is empty, then we don't append it to the binary name
 ifeq ($(BUILD_ID),)
@@ -343,8 +375,8 @@ else
 endif
 dist-cross-linux-arm64:
-	CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_NATIVE=off" GRPC_BACKENDS="backend-assets/grpc/llama-cpp-fallback backend-assets/grpc/llama-cpp-grpc backend-assets/util/llama-cpp-rpc-server" \
+	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_NATIVE=off" GRPC_BACKENDS="backend-assets/grpc/llama-cpp-fallback backend-assets/grpc/llama-cpp-grpc backend-assets/util/llama-cpp-rpc-server" \
-	$(MAKE) build
+	STATIC=true $(MAKE) build
 	mkdir -p release
 # if BUILD_ID is empty, then we don't append it to the binary name
 ifeq ($(BUILD_ID),)
@@ -393,7 +425,7 @@ prepare-e2e:
 	mkdir -p $(TEST_DIR)
 	cp -rfv $(abspath ./tests/e2e-fixtures)/gpu.yaml $(TEST_DIR)/gpu.yaml
 	test -e $(TEST_DIR)/ggllm-test-model.bin || wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(TEST_DIR)/ggllm-test-model.bin
-	docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=11 --build-arg CUDA_MINOR_VERSION=7 --build-arg FFMPEG=true -t localai-tests .
+	docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=4 --build-arg FFMPEG=true -t localai-tests .
 run-e2e-image:
 	ls -liah $(abspath ./tests/e2e-fixtures)
@@ -700,21 +732,21 @@ backend-assets/grpc/llama-cpp-avx2: backend-assets/grpc
 	cp -rf backend/cpp/llama backend/cpp/llama-avx2
 	$(MAKE) -C backend/cpp/llama-avx2 purge
 	$(info ${GREEN}I llama-cpp build info:avx2${RESET})
-	CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_AVX512=off -DLLAMA_FMA=on -DLLAMA_F16C=on" $(MAKE) VARIANT="llama-avx2" build-llama-cpp-grpc-server
+	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-avx2" build-llama-cpp-grpc-server
 	cp -rfv backend/cpp/llama-avx2/grpc-server backend-assets/grpc/llama-cpp-avx2
 backend-assets/grpc/llama-cpp-avx: backend-assets/grpc
 	cp -rf backend/cpp/llama backend/cpp/llama-avx
 	$(MAKE) -C backend/cpp/llama-avx purge
 	$(info ${GREEN}I llama-cpp build info:avx${RESET})
-	CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off" $(MAKE) VARIANT="llama-avx" build-llama-cpp-grpc-server
+	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-avx" build-llama-cpp-grpc-server
 	cp -rfv backend/cpp/llama-avx/grpc-server backend-assets/grpc/llama-cpp-avx
 backend-assets/grpc/llama-cpp-fallback: backend-assets/grpc
 	cp -rf backend/cpp/llama backend/cpp/llama-fallback
 	$(MAKE) -C backend/cpp/llama-fallback purge
 	$(info ${GREEN}I llama-cpp build info:fallback${RESET})
-	CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off" $(MAKE) VARIANT="llama-fallback" build-llama-cpp-grpc-server
+	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-fallback" build-llama-cpp-grpc-server
 	cp -rfv backend/cpp/llama-fallback/grpc-server backend-assets/grpc/llama-cpp-fallback
 # TODO: every binary should have its own folder instead, so can have different metal implementations
 ifeq ($(BUILD_TYPE),metal)
@@ -725,7 +757,7 @@ backend-assets/grpc/llama-cpp-cuda: backend-assets/grpc
 	cp -rf backend/cpp/llama backend/cpp/llama-cuda
 	$(MAKE) -C backend/cpp/llama-cuda purge
 	$(info ${GREEN}I llama-cpp build info:cuda${RESET})
-	CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off -DLLAMA_CUDA=ON" $(MAKE) VARIANT="llama-cuda" build-llama-cpp-grpc-server
+	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_CUDA=ON" $(MAKE) VARIANT="llama-cuda" build-llama-cpp-grpc-server
 	cp -rfv backend/cpp/llama-cuda/grpc-server backend-assets/grpc/llama-cpp-cuda
 backend-assets/grpc/llama-cpp-hipblas: backend-assets/grpc
@@ -753,7 +785,7 @@ backend-assets/grpc/llama-cpp-grpc: backend-assets/grpc
 	cp -rf backend/cpp/llama backend/cpp/llama-grpc
 	$(MAKE) -C backend/cpp/llama-grpc purge
 	$(info ${GREEN}I llama-cpp build info:grpc${RESET})
-	CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_RPC=ON -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off" $(MAKE) VARIANT="llama-grpc" build-llama-cpp-grpc-server
+	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_RPC=ON -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" TARGET="--target grpc-server --target rpc-server" $(MAKE) VARIANT="llama-grpc" build-llama-cpp-grpc-server
 	cp -rfv backend/cpp/llama-grpc/grpc-server backend-assets/grpc/llama-cpp-grpc
 backend-assets/util/llama-cpp-rpc-server: backend-assets/grpc/llama-cpp-grpc
@@ -781,7 +813,7 @@ backend-assets/grpc/tinydream: sources/go-tiny-dream sources/go-tiny-dream/libti
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/tinydream ./backend/go/image/tinydream
 backend-assets/grpc/whisper: sources/whisper.cpp sources/whisper.cpp/libwhisper.a backend-assets/grpc
-	CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_WHISPER)" C_INCLUDE_PATH=$(CURDIR)/sources/whisper.cpp LIBRARY_PATH=$(CURDIR)/sources/whisper.cpp \
+	CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_WHISPER)" C_INCLUDE_PATH="$(CURDIR)/sources/whisper.cpp/include:$(CURDIR)/sources/whisper.cpp/ggml/include" LIBRARY_PATH=$(CURDIR)/sources/whisper.cpp \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/
 backend-assets/grpc/local-store: backend-assets/grpc
@@ -803,6 +835,17 @@ docker:
 		--build-arg BUILD_TYPE=$(BUILD_TYPE) \
 		-t $(DOCKER_IMAGE) .
 docker-cuda11:
 	docker build \
 		--build-arg CUDA_MAJOR_VERSION=11 \
 		--build-arg CUDA_MINOR_VERSION=8 \
 		--build-arg BASE_IMAGE=$(BASE_IMAGE) \
 		--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
 		--build-arg GO_TAGS="$(GO_TAGS)" \
 		--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
 		--build-arg BUILD_TYPE=$(BUILD_TYPE) \
 		-t $(DOCKER_IMAGE)-cuda11 .
 docker-aio:
 	@echo "Building AIO image with base $(BASE_IMAGE) as $(DOCKER_AIO_IMAGE)"
 	docker build \
--- a/README.md
+++ b/README.md
@@ -48,6 +48,13 @@
 ![screen](https://github.com/mudler/LocalAI/assets/2420543/20b5ccd2-8393-44f0-aaf6-87a23806381e)
 Run the installer script:
 ```bash
 curl https://localai.io/install.sh | sh
 ```
 Or run with docker:
 ```bash
 docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
 # Alternative images:
@@ -65,6 +72,8 @@ docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
 [Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
 - 🔥🔥 🆕 P2P Dashboard, LocalAI Federated mode and AI Swarms: https://github.com/mudler/LocalAI/pull/2723
 - 🆕 You can browse now the model gallery without LocalAI! Check out https://models.localai.io
 - 🔥🔥 Decentralized llama.cpp:  https://github.com/mudler/LocalAI/pull/2343 (peer2peer llama.cpp!) 👉 Docs  https://localai.io/features/distribute/
 - 🔥🔥 Openvoice: https://github.com/mudler/LocalAI/pull/2334
 - 🆕 Function calls without grammars and mixed mode: https://github.com/mudler/LocalAI/pull/2328
@@ -96,6 +105,7 @@ If you want to help and contribute, issues up for grabs: https://github.com/mudl
 - 🥽 [Vision API](https://localai.io/features/gpt-vision/)
 - 📈 [Reranker API](https://localai.io/features/reranker/)
 - 🆕🖧 [P2P Inferencing](https://localai.io/features/distribute/)
 - 🌍 Integrated WebUI!
 ## 💻 Usage
--- a/backend/backend.proto
+++ b/backend/backend.proto
@@ -230,6 +230,7 @@ message TranscriptRequest {
  string dst = 2;
  string language = 3;
  uint32 threads = 4;
  bool translate = 5;
 }
 message TranscriptResult {
--- a/backend/cpp/llama/CMakeLists.txt
+++ b/backend/cpp/llama/CMakeLists.txt
@@ -84,3 +84,10 @@ target_compile_features(${TARGET} PRIVATE cxx_std_11)
 if(TARGET BUILD_INFO)
  add_dependencies(${TARGET} BUILD_INFO)
 endif()
 include(FetchContent)
 FetchContent_Declare(
  googletest
  URL https://github.com/google/googletest/archive/03597a01ee50ed33e9dfd640b249b4be3799d395.zip
 )
 FetchContent_MakeAvailable(googletest)
--- a/backend/cpp/llama/Makefile
+++ b/backend/cpp/llama/Makefile
@@ -4,34 +4,44 @@ LLAMA_VERSION?=
 CMAKE_ARGS?=
 BUILD_TYPE?=
 ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh
 TARGET?=--target grpc-server
-# If build type is cublas, then we set -DLLAMA_CUBLAS=ON to CMAKE_ARGS automatically
+# Disable Shared libs as we are linking on static gRPC and we can't mix shared and static
 CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
 # If build type is cublas, then we set -DGGML_CUDA=ON to CMAKE_ARGS automatically
 ifeq ($(BUILD_TYPE),cublas)
-	CMAKE_ARGS+=-DLLAMA_CUBLAS=ON
+	CMAKE_ARGS+=-DGGML_CUDA=ON
-# If build type is openblas then we set -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS
+# If build type is openblas then we set -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
 # to CMAKE_ARGS automatically
 else ifeq ($(BUILD_TYPE),openblas)
-	CMAKE_ARGS+=-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS
+	CMAKE_ARGS+=-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
-# If build type is clblas (openCL) we set -DLLAMA_CLBLAST=ON -DCLBlast_DIR=/some/path
+# If build type is clblas (openCL) we set -DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
 else ifeq ($(BUILD_TYPE),clblas)
-	CMAKE_ARGS+=-DLLAMA_CLBLAST=ON -DCLBlast_DIR=/some/path
+	CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
 # If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++ 
 else ifeq ($(BUILD_TYPE),hipblas)
-	CMAKE_ARGS+=-DLLAMA_HIPBLAS=ON
+	CMAKE_ARGS+=-DGGML_HIPBLAS=ON
-# If it's OSX, DO NOT embed the metal library - -DLLAMA_METAL_EMBED_LIBRARY=ON requires further investigation
+# If it's OSX, DO NOT embed the metal library - -DGGML_METAL_EMBED_LIBRARY=ON requires further investigation
 # But if it's OSX without metal, disable it here
-else ifeq ($(OS),darwin)
+else ifeq ($(OS),Darwin)
 	ifneq ($(BUILD_TYPE),metal)
-		CMAKE_ARGS+=-DLLAMA_METAL=OFF
+		CMAKE_ARGS+=-DGGML_METAL=OFF
 	else
 		CMAKE_ARGS+=-DGGML_METAL=ON
 # Until this is tested properly, we disable embedded metal file
 # as we already embed it as part of the LocalAI assets
 		CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=OFF
 		TARGET+=--target ggml-metal
 	endif
 endif
 ifeq ($(BUILD_TYPE),sycl_f16)
-	CMAKE_ARGS+=-DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON
+	CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON
 endif
 ifeq ($(BUILD_TYPE),sycl_f32)
-	CMAKE_ARGS+=-DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
+	CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
 endif
 llama.cpp:
@@ -61,9 +71,9 @@ clean: purge
 grpc-server: llama.cpp llama.cpp/examples/grpc-server
 	@echo "Building grpc-server with $(BUILD_TYPE) build type and $(CMAKE_ARGS)"
 ifneq (,$(findstring sycl,$(BUILD_TYPE)))
-	bash -c "source $(ONEAPI_VARS); \
+	+bash -c "source $(ONEAPI_VARS); \
-	cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && $(MAKE)"
+	cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release $(TARGET)"
 else
-	cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && $(MAKE)
+	+cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release $(TARGET)
 endif
 	cp llama.cpp/build/bin/grpc-server .
--- a/backend/cpp/llama/grpc-server.cpp
+++ b/backend/cpp/llama/grpc-server.cpp
@@ -886,6 +886,8 @@ struct llama_server_context
            {"task_id", slot->task_id},
        });
        LOG_TEE("sampling: \n%s\n", llama_sampling_print(slot->sparams).c_str());
        return true;
    }
--- a/backend/go/image/stablediffusion/main.go
+++ b/backend/go/image/stablediffusion/main.go
@@ -5,7 +5,7 @@ package main
 import (
 	"flag"
-	grpc "github.com/go-skynet/LocalAI/pkg/grpc"
+	grpc "github.com/mudler/LocalAI/pkg/grpc"
 )
 var (
--- a/backend/go/image/stablediffusion/stablediffusion.go
+++ b/backend/go/image/stablediffusion/stablediffusion.go
@@ -3,9 +3,9 @@ package main
 // This is a wrapper to statisfy the GRPC service interface
 // It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
 import (
-	"github.com/go-skynet/LocalAI/pkg/grpc/base"
+	"github.com/mudler/LocalAI/pkg/grpc/base"
-	pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
+	pb "github.com/mudler/LocalAI/pkg/grpc/proto"
-	"github.com/go-skynet/LocalAI/pkg/stablediffusion"
+	"github.com/mudler/LocalAI/pkg/stablediffusion"
 )
 type Image struct {
--- a/backend/go/image/tinydream/main.go
+++ b/backend/go/image/tinydream/main.go
@@ -5,7 +5,7 @@ package main
 import (
 	"flag"
-	grpc "github.com/go-skynet/LocalAI/pkg/grpc"
+	grpc "github.com/mudler/LocalAI/pkg/grpc"
 )
 var (
--- a/backend/go/image/tinydream/tinydream.go
+++ b/backend/go/image/tinydream/tinydream.go
@@ -3,9 +3,9 @@ package main
 // This is a wrapper to statisfy the GRPC service interface
 // It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
 import (
-	"github.com/go-skynet/LocalAI/pkg/grpc/base"
+	"github.com/mudler/LocalAI/pkg/grpc/base"
-	pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
+	pb "github.com/mudler/LocalAI/pkg/grpc/proto"
-	"github.com/go-skynet/LocalAI/pkg/tinydream"
+	"github.com/mudler/LocalAI/pkg/tinydream"
 )
 type Image struct {
--- a/backend/go/llm/bert/bert.go
+++ b/backend/go/llm/bert/bert.go
@@ -5,8 +5,8 @@ package main
 import (
 	bert "github.com/go-skynet/go-bert.cpp"
-	"github.com/go-skynet/LocalAI/pkg/grpc/base"
+	"github.com/mudler/LocalAI/pkg/grpc/base"
-	pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
+	pb "github.com/mudler/LocalAI/pkg/grpc/proto"
 )
 type Embeddings struct {
--- a/backend/go/llm/bert/main.go
+++ b/backend/go/llm/bert/main.go
@@ -5,7 +5,7 @@ package main
 import (
 	"flag"
-	grpc "github.com/go-skynet/LocalAI/pkg/grpc"
+	grpc "github.com/mudler/LocalAI/pkg/grpc"
 )
 var (
--- a/backend/go/llm/gpt4all/gpt4all.go
+++ b/backend/go/llm/gpt4all/gpt4all.go
@@ -5,8 +5,8 @@ package main
 import (
 	"fmt"
-	"github.com/go-skynet/LocalAI/pkg/grpc/base"
+	"github.com/mudler/LocalAI/pkg/grpc/base"
-	pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
+	pb "github.com/mudler/LocalAI/pkg/grpc/proto"
 	gpt4all "github.com/nomic-ai/gpt4all/gpt4all-bindings/golang"
 )
--- a/backend/go/llm/gpt4all/main.go
+++ b/backend/go/llm/gpt4all/main.go
@@ -5,7 +5,7 @@ package main
 import (
 	"flag"
-	grpc "github.com/go-skynet/LocalAI/pkg/grpc"
+	grpc "github.com/mudler/LocalAI/pkg/grpc"
 )
 var (
--- a/backend/go/llm/langchain/langchain.go
+++ b/backend/go/llm/langchain/langchain.go
@@ -6,9 +6,9 @@ import (
 	"fmt"
 	"os"
-	"github.com/go-skynet/LocalAI/pkg/grpc/base"
+	"github.com/mudler/LocalAI/pkg/grpc/base"
-	pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
+	pb "github.com/mudler/LocalAI/pkg/grpc/proto"
-	"github.com/go-skynet/LocalAI/pkg/langchain"
+	"github.com/mudler/LocalAI/pkg/langchain"
 )
 type LLM struct {
--- a/backend/go/llm/langchain/main.go
+++ b/backend/go/llm/langchain/main.go
@@ -5,7 +5,7 @@ package main
 import (
 	"flag"
-	grpc "github.com/go-skynet/LocalAI/pkg/grpc"
+	grpc "github.com/mudler/LocalAI/pkg/grpc"
 )
 var (
--- a/backend/go/llm/llama-ggml/llama.go
+++ b/backend/go/llm/llama-ggml/llama.go
@@ -5,9 +5,9 @@ package main
 import (
 	"fmt"
 	"github.com/go-skynet/LocalAI/pkg/grpc/base"
 	pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
 	"github.com/go-skynet/go-llama.cpp"
 	"github.com/mudler/LocalAI/pkg/grpc/base"
 	pb "github.com/mudler/LocalAI/pkg/grpc/proto"
 )
 type LLM struct {
--- a/backend/go/llm/llama-ggml/main.go
+++ b/backend/go/llm/llama-ggml/main.go
@@ -3,7 +3,7 @@ package main
 import (
 	"flag"
-	grpc "github.com/go-skynet/LocalAI/pkg/grpc"
+	grpc "github.com/mudler/LocalAI/pkg/grpc"
 )
 var (
--- a/backend/go/llm/llama/llama.go
+++ b/backend/go/llm/llama/llama.go
@@ -6,9 +6,9 @@ import (
 	"fmt"
 	"path/filepath"
 	"github.com/go-skynet/LocalAI/pkg/grpc/base"
 	pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
 	"github.com/go-skynet/go-llama.cpp"
 	"github.com/mudler/LocalAI/pkg/grpc/base"
 )
 type LLM struct {
--- a/backend/go/llm/llama/main.go
+++ b/backend/go/llm/llama/main.go
@@ -7,7 +7,7 @@ package main
 import (
 	"flag"
-	grpc "github.com/go-skynet/LocalAI/pkg/grpc"
+	grpc "github.com/mudler/LocalAI/pkg/grpc"
 )
 var (
--- a/backend/go/llm/rwkv/main.go
+++ b/backend/go/llm/rwkv/main.go
@@ -5,7 +5,7 @@ package main
 import (
 	"flag"
-	grpc "github.com/go-skynet/LocalAI/pkg/grpc"
+	grpc "github.com/mudler/LocalAI/pkg/grpc"
 )
 var (
--- a/backend/go/llm/rwkv/rwkv.go
+++ b/backend/go/llm/rwkv/rwkv.go
@@ -7,8 +7,8 @@ import (
 	"path/filepath"
 	"github.com/donomii/go-rwkv.cpp"
-	"github.com/go-skynet/LocalAI/pkg/grpc/base"
+	"github.com/mudler/LocalAI/pkg/grpc/base"
-	pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
+	pb "github.com/mudler/LocalAI/pkg/grpc/proto"
 )
 const tokenizerSuffix = ".tokenizer.json"
@@ -31,7 +31,7 @@ func (llm *LLM) Load(opts *pb.ModelOptions) error {
 	model := rwkv.LoadFiles(opts.ModelFile, tokenizerPath, uint32(opts.GetThreads()))
 	if model == nil {
-		return fmt.Errorf("could not load model")
+		return fmt.Errorf("rwkv could not load model")
 	}
 	llm.rwkv = model
 	return nil
--- a/backend/go/stores/main.go
+++ b/backend/go/stores/main.go
@@ -6,7 +6,7 @@ import (
 	"flag"
 	"os"
-	grpc "github.com/go-skynet/LocalAI/pkg/grpc"
+	grpc "github.com/mudler/LocalAI/pkg/grpc"
 	"github.com/rs/zerolog"
 	"github.com/rs/zerolog/log"
 )
--- a/backend/go/stores/store.go
+++ b/backend/go/stores/store.go
@@ -8,8 +8,8 @@ import (
 	"math"
 	"slices"
-	"github.com/go-skynet/LocalAI/pkg/grpc/base"
+	"github.com/mudler/LocalAI/pkg/grpc/base"
-	pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
+	pb "github.com/mudler/LocalAI/pkg/grpc/proto"
 	"github.com/rs/zerolog/log"
 )
--- a/backend/go/transcribe/main.go
+++ b/backend/go/transcribe/main.go
@@ -5,7 +5,7 @@ package main
 import (
 	"flag"
-	grpc "github.com/go-skynet/LocalAI/pkg/grpc"
+	grpc "github.com/mudler/LocalAI/pkg/grpc"
 )
 var (
--- a/backend/go/transcribe/transcript.go
+++ b/backend/go/transcribe/transcript.go
@@ -8,7 +8,7 @@ import (
 	"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
 	"github.com/go-audio/wav"
-	"github.com/go-skynet/LocalAI/core/schema"
+	"github.com/mudler/LocalAI/core/schema"
 )
 func ffmpegCommand(args []string) (string, error) {
@@ -29,7 +29,7 @@ func audioToWav(src, dst string) error {
 	return nil
 }
-func Transcript(model whisper.Model, audiopath, language string, threads uint) (schema.TranscriptionResult, error) {
+func Transcript(model whisper.Model, audiopath, language string, translate bool, threads uint) (schema.TranscriptionResult, error) {
 	res := schema.TranscriptionResult{}
 	dir, err := os.MkdirTemp("", "whisper")
@@ -75,6 +75,10 @@ func Transcript(model whisper.Model, audiopath, language string, threads uint) (
 		context.SetLanguage("auto")
 	}
 	if translate {
 		context.SetTranslate(true)
 	}
 	if err := context.Process(data, nil, nil); err != nil {
 		return res, err
 	}
--- a/backend/go/transcribe/whisper.go
+++ b/backend/go/transcribe/whisper.go
@@ -4,9 +4,9 @@ package main
 // It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
 import (
 	"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
-	"github.com/go-skynet/LocalAI/core/schema"
+	"github.com/mudler/LocalAI/core/schema"
-	"github.com/go-skynet/LocalAI/pkg/grpc/base"
+	"github.com/mudler/LocalAI/pkg/grpc/base"
-	pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
+	pb "github.com/mudler/LocalAI/pkg/grpc/proto"
 )
 type Whisper struct {
@@ -22,5 +22,5 @@ func (sd *Whisper) Load(opts *pb.ModelOptions) error {
 }
 func (sd *Whisper) AudioTranscription(opts *pb.TranscriptRequest) (schema.TranscriptionResult, error) {
-	return Transcript(sd.whisper, opts.Dst, opts.Language, uint(opts.Threads))
+	return Transcript(sd.whisper, opts.Dst, opts.Language, opts.Translate, uint(opts.Threads))
 }
--- a/backend/go/tts/main.go
+++ b/backend/go/tts/main.go
@@ -5,7 +5,7 @@ package main
 import (
 	"flag"
-	grpc "github.com/go-skynet/LocalAI/pkg/grpc"
+	grpc "github.com/mudler/LocalAI/pkg/grpc"
 )
 var (
--- a/backend/go/tts/piper.go
+++ b/backend/go/tts/piper.go
@@ -7,8 +7,8 @@ import (
 	"os"
 	"path/filepath"
-	"github.com/go-skynet/LocalAI/pkg/grpc/base"
+	"github.com/mudler/LocalAI/pkg/grpc/base"
-	pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
+	pb "github.com/mudler/LocalAI/pkg/grpc/proto"
 	piper "github.com/mudler/go-piper"
 )
--- a/backend/python/common/libbackend.sh
+++ b/backend/python/common/libbackend.sh
@@ -148,13 +148,13 @@ function startBackend() {
    ensureVenv
    if [ ! -z ${BACKEND_FILE} ]; then
-        python ${BACKEND_FILE} $@
+        exec python ${BACKEND_FILE} $@
    elif [ -e "${MY_DIR}/server.py" ]; then
-        python ${MY_DIR}/server.py $@
+        exec python ${MY_DIR}/server.py $@
    elif [ -e "${MY_DIR}/backend.py" ]; then
-        python ${MY_DIR}/backend.py $@
+        exec python ${MY_DIR}/backend.py $@
    elif [ -e "${MY_DIR}/${BACKEND_NAME}.py" ]; then
-        python ${MY_DIR}/${BACKEND_NAME}.py $@
+        exec python ${MY_DIR}/${BACKEND_NAME}.py $@
    fi
 }
--- a/backend/python/diffusers/backend.py
+++ b/backend/python/diffusers/backend.py
@@ -17,7 +17,7 @@ import backend_pb2_grpc
 import grpc
-from diffusers import StableDiffusionXLPipeline, StableDiffusionDepth2ImgPipeline, DPMSolverMultistepScheduler, StableDiffusionPipeline, DiffusionPipeline, EulerAncestralDiscreteScheduler
+from diffusers import StableDiffusion3Pipeline, StableDiffusionXLPipeline, StableDiffusionDepth2ImgPipeline, DPMSolverMultistepScheduler, StableDiffusionPipeline, DiffusionPipeline, EulerAncestralDiscreteScheduler
 from diffusers import StableDiffusionImg2ImgPipeline, AutoPipelineForText2Image, ControlNetModel, StableVideoDiffusionPipeline
 from diffusers.pipelines.stable_diffusion import safety_checker
 from diffusers.utils import load_image,export_to_video
@@ -225,6 +225,17 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
                        torch_dtype=torchType, 
                        use_safetensors=True, 
                        variant=variant)
            elif request.PipelineType == "StableDiffusion3Pipeline":
                if fromSingleFile:
                    self.pipe = StableDiffusion3Pipeline.from_single_file(modelFile,
                                                               torch_dtype=torchType,
                                                               use_safetensors=True)
                else:
                    self.pipe = StableDiffusion3Pipeline.from_pretrained(
                        request.Model, 
                        torch_dtype=torchType, 
                        use_safetensors=True, 
                        variant=variant)
            if CLIPSKIP and request.CLIPSkip != 0:
                self.clip_skip = request.CLIPSkip
--- a/backend/python/diffusers/requirements.txt
+++ b/backend/python/diffusers/requirements.txt
@@ -5,6 +5,7 @@ grpcio==1.64.0
 opencv-python
 pillow
 protobuf
 sentencepiece
 torch
 transformers
 certifi
--- a/core/application.go
+++ b/core/application.go
@@ -1,9 +1,9 @@
 package core
 import (
-	"github.com/go-skynet/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/config"
-	"github.com/go-skynet/LocalAI/core/services"
+	"github.com/mudler/LocalAI/core/services"
-	"github.com/go-skynet/LocalAI/pkg/model"
+	"github.com/mudler/LocalAI/pkg/model"
 )
 // The purpose of this structure is to hold pointers to all initialized services, to make plumbing easy
--- a/core/backend/embeddings.go
+++ b/core/backend/embeddings.go
@@ -3,10 +3,10 @@ package backend
 import (
 	"fmt"
-	"github.com/go-skynet/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/config"
-	"github.com/go-skynet/LocalAI/pkg/grpc"
+	"github.com/mudler/LocalAI/pkg/grpc"
-	model "github.com/go-skynet/LocalAI/pkg/model"
+	model "github.com/mudler/LocalAI/pkg/model"
 )
 func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() ([]float32, error), error) {
--- a/core/backend/image.go
+++ b/core/backend/image.go
@@ -1,10 +1,10 @@
 package backend
 import (
-	"github.com/go-skynet/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/config"
-	"github.com/go-skynet/LocalAI/pkg/grpc/proto"
+	"github.com/mudler/LocalAI/pkg/grpc/proto"
-	model "github.com/go-skynet/LocalAI/pkg/model"
+	model "github.com/mudler/LocalAI/pkg/model"
 )
 func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negative_prompt, src, dst string, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() error, error) {
--- a/core/backend/llm.go
+++ b/core/backend/llm.go
@@ -9,14 +9,14 @@ import (
 	"sync"
 	"unicode/utf8"
-	"github.com/go-skynet/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/config"
-	"github.com/go-skynet/LocalAI/core/schema"
+	"github.com/mudler/LocalAI/core/schema"
-	"github.com/go-skynet/LocalAI/pkg/gallery"
+	"github.com/mudler/LocalAI/core/gallery"
-	"github.com/go-skynet/LocalAI/pkg/grpc"
+	"github.com/mudler/LocalAI/pkg/grpc"
-	"github.com/go-skynet/LocalAI/pkg/grpc/proto"
+	"github.com/mudler/LocalAI/pkg/grpc/proto"
-	model "github.com/go-skynet/LocalAI/pkg/model"
+	model "github.com/mudler/LocalAI/pkg/model"
-	"github.com/go-skynet/LocalAI/pkg/utils"
+	"github.com/mudler/LocalAI/pkg/utils"
 )
 type LLMResponse struct {
--- a/core/backend/options.go
+++ b/core/backend/options.go
@@ -5,9 +5,9 @@ import (
 	"os"
 	"path/filepath"
-	"github.com/go-skynet/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/config"
-	pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
+	pb "github.com/mudler/LocalAI/pkg/grpc/proto"
-	"github.com/go-skynet/LocalAI/pkg/model"
+	"github.com/mudler/LocalAI/pkg/model"
 	"github.com/rs/zerolog/log"
 )
@@ -142,12 +142,14 @@ func gRPCPredictOpts(c config.BackendConfig, modelPath string) *pb.PredictOption
 		MirostatTAU:         float32(*c.LLMConfig.MirostatTAU),
 		Debug:               *c.Debug,
 		StopPrompts:         c.StopWords,
-		Repeat:              int32(c.RepeatPenalty),
+		Repeat:              int32(c.RepeatLastN),
 		FrequencyPenalty:    float32(c.FrequencyPenalty),
 		PresencePenalty:     float32(c.PresencePenalty),
 		Penalty:             float32(c.RepeatPenalty),
 		NKeep:               int32(c.Keep),
 		Batch:               int32(c.Batch),
 		IgnoreEOS:           c.IgnoreEOS,
 		Seed:                getSeed(c),
 		FrequencyPenalty:    float32(c.FrequencyPenalty),
 		MLock:               *c.MMlock,
 		MMap:                *c.MMap,
 		MainGPU:             c.MainGPU,
--- a/core/backend/rerank.go
+++ b/core/backend/rerank.go
@@ -4,9 +4,9 @@ import (
 	"context"
 	"fmt"
-	"github.com/go-skynet/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/config"
-	"github.com/go-skynet/LocalAI/pkg/grpc/proto"
+	"github.com/mudler/LocalAI/pkg/grpc/proto"
-	model "github.com/go-skynet/LocalAI/pkg/model"
+	model "github.com/mudler/LocalAI/pkg/model"
 )
 func Rerank(backend, modelFile string, request *proto.RerankRequest, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig config.BackendConfig) (*proto.RerankResult, error) {
--- a/core/backend/stores.go
+++ b/core/backend/stores.go
@@ -1,10 +1,10 @@
 package backend
 import (
-	"github.com/go-skynet/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/config"
-	"github.com/go-skynet/LocalAI/pkg/grpc"
+	"github.com/mudler/LocalAI/pkg/grpc"
-	"github.com/go-skynet/LocalAI/pkg/model"
+	"github.com/mudler/LocalAI/pkg/model"
 )
 func StoreBackend(sl *model.ModelLoader, appConfig *config.ApplicationConfig, storeName string) (grpc.Backend, error) {
--- a/core/backend/transcript.go
+++ b/core/backend/transcript.go
@@ -4,14 +4,14 @@ import (
 	"context"
 	"fmt"
-	"github.com/go-skynet/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/config"
-	"github.com/go-skynet/LocalAI/core/schema"
+	"github.com/mudler/LocalAI/core/schema"
-	"github.com/go-skynet/LocalAI/pkg/grpc/proto"
+	"github.com/mudler/LocalAI/pkg/grpc/proto"
-	model "github.com/go-skynet/LocalAI/pkg/model"
+	model "github.com/mudler/LocalAI/pkg/model"
 )
-func ModelTranscription(audio, language string, ml *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (*schema.TranscriptionResult, error) {
+func ModelTranscription(audio, language string, translate bool, ml *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (*schema.TranscriptionResult, error) {
 	opts := modelOpts(backendConfig, appConfig, []model.Option{
 		model.WithBackendString(model.WhisperBackend),
@@ -33,6 +33,7 @@ func ModelTranscription(audio, language string, ml *model.ModelLoader, backendCo
 	return whisperModel.AudioTranscription(context.Background(), &proto.TranscriptRequest{
 		Dst:       audio,
 		Language:  language,
 		Translate: translate,
 		Threads:   uint32(*backendConfig.Threads),
 	})
 }
--- a/core/backend/tts.go
+++ b/core/backend/tts.go
@@ -6,11 +6,11 @@ import (
 	"os"
 	"path/filepath"
-	"github.com/go-skynet/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/config"
-	"github.com/go-skynet/LocalAI/pkg/grpc/proto"
+	"github.com/mudler/LocalAI/pkg/grpc/proto"
-	model "github.com/go-skynet/LocalAI/pkg/model"
+	model "github.com/mudler/LocalAI/pkg/model"
-	"github.com/go-skynet/LocalAI/pkg/utils"
+	"github.com/mudler/LocalAI/pkg/utils"
 )
 func generateUniqueFileName(dir, baseName, ext string) string {
--- a/core/cli/cli.go
+++ b/core/cli/cli.go
@@ -1,14 +1,15 @@
 package cli
 import (
-	cliContext "github.com/go-skynet/LocalAI/core/cli/context"
+	cliContext "github.com/mudler/LocalAI/core/cli/context"
-	"github.com/go-skynet/LocalAI/core/cli/worker"
+	"github.com/mudler/LocalAI/core/cli/worker"
 )
 var CLI struct {
 	cliContext.Context `embed:""`
 	Run        RunCMD        `cmd:"" help:"Run LocalAI, this the default command if no other command is specified. Run 'local-ai run --help' for more information" default:"withargs"`
 	Federated  FederatedCLI  `cmd:"" help:"Run LocalAI in federated mode"`
 	Models     ModelsCMD     `cmd:"" help:"Manage LocalAI models and definitions"`
 	TTS        TTSCMD        `cmd:"" help:"Convert text to speech"`
 	Transcript TranscriptCMD `cmd:"" help:"Convert audio to text"`
--- a/core/cli/federated.go
+++ b/core/cli/federated.go
@@ -0,0 +1,130 @@
 package cli
 import (
 	"context"
 	"errors"
 	"fmt"
 	"io"
 	"net"
 	"time"
 	"math/rand/v2"
 	cliContext "github.com/mudler/LocalAI/core/cli/context"
 	"github.com/mudler/LocalAI/core/p2p"
 	"github.com/mudler/edgevpn/pkg/node"
 	"github.com/mudler/edgevpn/pkg/protocol"
 	"github.com/mudler/edgevpn/pkg/types"
 	"github.com/rs/zerolog/log"
 )
 type FederatedCLI struct {
 	Address        string `env:"LOCALAI_ADDRESS,ADDRESS" default:":8080" help:"Bind address for the API server" group:"api"`
 	Peer2PeerToken string `env:"LOCALAI_P2P_TOKEN,P2P_TOKEN,TOKEN" name:"p2ptoken" help:"Token for P2P mode (optional)" group:"p2p"`
 }
 func (f *FederatedCLI) Run(ctx *cliContext.Context) error {
 	n, err := p2p.NewNode(f.Peer2PeerToken)
 	if err != nil {
 		return fmt.Errorf("creating a new node: %w", err)
 	}
 	err = n.Start(context.Background())
 	if err != nil {
 		return fmt.Errorf("creating a new node: %w", err)
 	}
 	if err := p2p.ServiceDiscoverer(context.Background(), n, f.Peer2PeerToken, p2p.FederatedID, nil); err != nil {
 		return err
 	}
 	return Proxy(context.Background(), n, f.Address, p2p.FederatedID)
 }
 func Proxy(ctx context.Context, node *node.Node, listenAddr, service string) error {
 	log.Info().Msgf("Allocating service '%s' on: %s", service, listenAddr)
 	// Open local port for listening
 	l, err := net.Listen("tcp", listenAddr)
 	if err != nil {
 		log.Error().Err(err).Msg("Error listening")
 		return err
 	}
 	//	ll.Info("Binding local port on", srcaddr)
 	ledger, _ := node.Ledger()
 	// Announce ourselves so nodes accepts our connection
 	ledger.Announce(
 		ctx,
 		10*time.Second,
 		func() {
 			// Retrieve current ID for ip in the blockchain
 			//_, found := ledger.GetKey(protocol.UsersLedgerKey, node.Host().ID().String())
 			// If mismatch, update the blockchain
 			//if !found {
 			updatedMap := map[string]interface{}{}
 			updatedMap[node.Host().ID().String()] = &types.User{
 				PeerID:    node.Host().ID().String(),
 				Timestamp: time.Now().String(),
 			}
 			ledger.Add(protocol.UsersLedgerKey, updatedMap)
 			//	}
 		},
 	)
 	defer l.Close()
 	for {
 		select {
 		case <-ctx.Done():
 			return errors.New("context canceled")
 		default:
 			log.Debug().Msg("New for connection")
 			// Listen for an incoming connection.
 			conn, err := l.Accept()
 			if err != nil {
 				fmt.Println("Error accepting: ", err.Error())
 				continue
 			}
 			// Handle connections in a new goroutine, forwarding to the p2p service
 			go func() {
 				var tunnelAddresses []string
 				for _, v := range p2p.GetAvailableNodes(p2p.FederatedID) {
 					if v.IsOnline() {
 						tunnelAddresses = append(tunnelAddresses, v.TunnelAddress)
 					} else {
 						log.Info().Msgf("Node %s is offline", v.ID)
 					}
 				}
 				// open a TCP stream to one of the tunnels
 				// chosen randomly
 				// TODO: optimize this and track usage
 				tunnelAddr := tunnelAddresses[rand.IntN(len(tunnelAddresses))]
 				tunnelConn, err := net.Dial("tcp", tunnelAddr)
 				if err != nil {
 					log.Error().Err(err).Msg("Error connecting to tunnel")
 					return
 				}
 				log.Info().Msgf("Redirecting %s to %s", conn.LocalAddr().String(), tunnelConn.RemoteAddr().String())
 				closer := make(chan struct{}, 2)
 				go copyStream(closer, tunnelConn, conn)
 				go copyStream(closer, conn, tunnelConn)
 				<-closer
 				tunnelConn.Close()
 				conn.Close()
 				//	ll.Infof("(service %s) Done handling %s", serviceID, l.Addr().String())
 			}()
 		}
 	}
 }
 func copyStream(closer chan struct{}, dst io.Writer, src io.Reader) {
 	defer func() { closer <- struct{}{} }() // connection is closed, send signal to stop proxy
 	io.Copy(dst, src)
 }
--- a/core/cli/models.go
+++ b/core/cli/models.go
@@ -4,10 +4,12 @@ import (
 	"encoding/json"
 	"fmt"
-	cliContext "github.com/go-skynet/LocalAI/core/cli/context"
+	cliContext "github.com/mudler/LocalAI/core/cli/context"
 	"github.com/mudler/LocalAI/core/config"
-	"github.com/go-skynet/LocalAI/pkg/gallery"
+	"github.com/mudler/LocalAI/core/gallery"
-	"github.com/go-skynet/LocalAI/pkg/startup"
+	"github.com/mudler/LocalAI/pkg/downloader"
 	"github.com/mudler/LocalAI/pkg/startup"
 	"github.com/rs/zerolog/log"
 	"github.com/schollz/progressbar/v3"
 )
@@ -33,7 +35,7 @@ type ModelsCMD struct {
 }
 func (ml *ModelsList) Run(ctx *cliContext.Context) error {
-	var galleries []gallery.Gallery
+	var galleries []config.Gallery
 	if err := json.Unmarshal([]byte(ml.Galleries), &galleries); err != nil {
 		log.Error().Err(err).Msg("unable to load galleries")
 	}
@@ -53,10 +55,11 @@ func (ml *ModelsList) Run(ctx *cliContext.Context) error {
 }
 func (mi *ModelsInstall) Run(ctx *cliContext.Context) error {
-	var galleries []gallery.Gallery
+	var galleries []config.Gallery
 	if err := json.Unmarshal([]byte(mi.Galleries), &galleries); err != nil {
 		log.Error().Err(err).Msg("unable to load galleries")
 	}
 	for _, modelName := range mi.ModelArgs {
 		progressBar := progressbar.NewOptions(
@@ -78,6 +81,7 @@ func (mi *ModelsInstall) Run(ctx *cliContext.Context) error {
 			return err
 		}
 		if !downloader.LooksLikeOCI(modelName) {
 			model := gallery.FindModel(models, modelName, mi.ModelsPath)
 			if model == nil {
 				log.Error().Str("model", modelName).Msg("model not found")
@@ -85,6 +89,7 @@ func (mi *ModelsInstall) Run(ctx *cliContext.Context) error {
 			}
 			log.Info().Str("model", modelName).Str("license", model.License).Msg("installing model")
 		}
 		err = startup.InstallModels(galleries, "", mi.ModelsPath, progressCallback, modelName)
 		if err != nil {
 			return err
--- a/core/cli/run.go
+++ b/core/cli/run.go
@@ -3,14 +3,16 @@ package cli
 import (
 	"context"
 	"fmt"
 	"net"
 	"os"
 	"strings"
 	"time"
-	cliContext "github.com/go-skynet/LocalAI/core/cli/context"
+	cliContext "github.com/mudler/LocalAI/core/cli/context"
-	"github.com/go-skynet/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/config"
-	"github.com/go-skynet/LocalAI/core/http"
+	"github.com/mudler/LocalAI/core/http"
-	"github.com/go-skynet/LocalAI/core/p2p"
+	"github.com/mudler/LocalAI/core/p2p"
-	"github.com/go-skynet/LocalAI/core/startup"
+	"github.com/mudler/LocalAI/core/startup"
 	"github.com/rs/zerolog"
 	"github.com/rs/zerolog/log"
 )
@@ -43,13 +45,14 @@ type RunCMD struct {
 	Address              string   `env:"LOCALAI_ADDRESS,ADDRESS" default:":8080" help:"Bind address for the API server" group:"api"`
 	CORS                 bool     `env:"LOCALAI_CORS,CORS" help:"" group:"api"`
 	CORSAllowOrigins     string   `env:"LOCALAI_CORS_ALLOW_ORIGINS,CORS_ALLOW_ORIGINS" group:"api"`
 	LibraryPath          string   `env:"LOCALAI_LIBRARY_PATH,LIBRARY_PATH" help:"Path to the library directory (for e.g. external libraries used by backends)" default:"/usr/share/local-ai/libs" group:"backends"`
 	CSRF                 bool     `env:"LOCALAI_CSRF" help:"Enables fiber CSRF middleware" group:"api"`
 	UploadLimit          int      `env:"LOCALAI_UPLOAD_LIMIT,UPLOAD_LIMIT" default:"15" help:"Default upload-limit in MB" group:"api"`
 	APIKeys              []string `env:"LOCALAI_API_KEY,API_KEY" help:"List of API Keys to enable API authentication. When this is set, all the requests must be authenticated with one of these API keys" group:"api"`
 	DisableWebUI         bool     `env:"LOCALAI_DISABLE_WEBUI,DISABLE_WEBUI" default:"false" help:"Disable webui" group:"api"`
 	OpaqueErrors         bool     `env:"LOCALAI_OPAQUE_ERRORS" default:"false" help:"If true, all error responses are replaced with blank 500 errors. This is intended only for hardening against information leaks and is normally not recommended." group:"api"`
 	Peer2Peer            bool     `env:"LOCALAI_P2P,P2P" name:"p2p" default:"false" help:"Enable P2P mode" group:"p2p"`
-	Peer2PeerToken       string   `env:"LOCALAI_P2P_TOKEN,P2P_TOKEN" name:"p2ptoken" help:"Token for P2P mode (optional)" group:"p2p"`
+	Peer2PeerToken       string   `env:"LOCALAI_P2P_TOKEN,P2P_TOKEN,TOKEN" name:"p2ptoken" help:"Token for P2P mode (optional)" group:"p2p"`
 	ParallelRequests     bool     `env:"LOCALAI_PARALLEL_REQUESTS,PARALLEL_REQUESTS" help:"Enable backends to handle multiple requests in parallel if they support it (e.g.: llama.cpp or vllm)" group:"backends"`
 	SingleActiveBackend  bool     `env:"LOCALAI_SINGLE_ACTIVE_BACKEND,SINGLE_ACTIVE_BACKEND" help:"Allow only one backend to be run at a time" group:"backends"`
 	PreloadBackendOnly   bool     `env:"LOCALAI_PRELOAD_BACKEND_ONLY,PRELOAD_BACKEND_ONLY" default:"false" help:"Do not launch the API services, only the preloaded models / backends are started (useful for multi-node setups)" group:"backends"`
@@ -58,6 +61,7 @@ type RunCMD struct {
 	WatchdogIdleTimeout  string   `env:"LOCALAI_WATCHDOG_IDLE_TIMEOUT,WATCHDOG_IDLE_TIMEOUT" default:"15m" help:"Threshold beyond which an idle backend should be stopped" group:"backends"`
 	EnableWatchdogBusy   bool     `env:"LOCALAI_WATCHDOG_BUSY,WATCHDOG_BUSY" default:"false" help:"Enable watchdog for stopping backends that are busy longer than the watchdog-busy-timeout" group:"backends"`
 	WatchdogBusyTimeout  string   `env:"LOCALAI_WATCHDOG_BUSY_TIMEOUT,WATCHDOG_BUSY_TIMEOUT" default:"5m" help:"Threshold beyond which a busy backend should be stopped" group:"backends"`
 	Federated            bool     `env:"LOCALAI_FEDERATED,FEDERATED" help:"Enable federated instance" group:"federated"`
 }
 func (r *RunCMD) Run(ctx *cliContext.Context) error {
@@ -80,6 +84,7 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
 		config.WithCors(r.CORS),
 		config.WithCorsAllowOrigins(r.CORSAllowOrigins),
 		config.WithCsrf(r.CSRF),
 		config.WithLibPath(r.LibraryPath),
 		config.WithThreads(r.Threads),
 		config.WithBackendAssets(ctx.BackendAssets),
 		config.WithBackendAssetsOutput(r.BackendAssetsPath),
@@ -89,9 +94,10 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
 		config.WithOpaqueErrors(r.OpaqueErrors),
 	}
 	token := ""
 	if r.Peer2Peer || r.Peer2PeerToken != "" {
 		log.Info().Msg("P2P mode enabled")
-		token := r.Peer2PeerToken
+		token = r.Peer2PeerToken
 		if token == "" {
 			// IF no token is provided, and p2p is enabled,
 			// we generate one and wait for the user to pick up the token (this is for interactive)
@@ -102,14 +108,46 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
 			log.Info().Msg("To use the token, you can run the following command in another node or terminal:")
 			fmt.Printf("export TOKEN=\"%s\"\nlocal-ai worker p2p-llama-cpp-rpc\n", token)
 			// Ask for user confirmation
 			log.Info().Msg("Press a button to proceed")
 			var input string
 			fmt.Scanln(&input)
 		}
 		opts = append(opts, config.WithP2PToken(token))
 		node, err := p2p.NewNode(token)
 		if err != nil {
 			return err
 		}
 		log.Info().Msg("Starting P2P server discovery...")
-		if err := p2p.LLamaCPPRPCServerDiscoverer(context.Background(), token); err != nil {
+		if err := p2p.ServiceDiscoverer(context.Background(), node, token, "", func() {
 			var tunnelAddresses []string
 			for _, v := range p2p.GetAvailableNodes("") {
 				if v.IsOnline() {
 					tunnelAddresses = append(tunnelAddresses, v.TunnelAddress)
 				} else {
 					log.Info().Msgf("Node %s is offline", v.ID)
 				}
 			}
 			tunnelEnvVar := strings.Join(tunnelAddresses, ",")
 			os.Setenv("LLAMACPP_GRPC_SERVERS", tunnelEnvVar)
 			log.Debug().Msgf("setting LLAMACPP_GRPC_SERVERS to %s", tunnelEnvVar)
 		}); err != nil {
 			return err
 		}
 	}
 	if r.Federated {
 		_, port, err := net.SplitHostPort(r.Address)
 		if err != nil {
 			return err
 		}
 		if err := p2p.ExposeService(context.Background(), "localhost", port, token, p2p.FederatedID); err != nil {
 			return err
 		}
 		node, err := p2p.NewNode(token)
 		if err != nil {
 			return err
 		}
 		if err := p2p.ServiceDiscoverer(context.Background(), node, token, p2p.FederatedID, nil); err != nil {
 			return err
 		}
 	}
--- a/core/cli/transcript.go
+++ b/core/cli/transcript.go
@@ -5,10 +5,10 @@ import (
 	"errors"
 	"fmt"
-	"github.com/go-skynet/LocalAI/core/backend"
+	"github.com/mudler/LocalAI/core/backend"
-	cliContext "github.com/go-skynet/LocalAI/core/cli/context"
+	cliContext "github.com/mudler/LocalAI/core/cli/context"
-	"github.com/go-skynet/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/config"
-	"github.com/go-skynet/LocalAI/pkg/model"
+	"github.com/mudler/LocalAI/pkg/model"
 	"github.com/rs/zerolog/log"
 )
@@ -18,6 +18,7 @@ type TranscriptCMD struct {
 	Backend           string `short:"b" default:"whisper" help:"Backend to run the transcription model"`
 	Model             string `short:"m" required:"" help:"Model name to run the TTS"`
 	Language          string `short:"l" help:"Language of the audio file"`
 	Translate         bool   `short:"c" help:"Translate the transcription to english"`
 	Threads           int    `short:"t" default:"1" help:"Number of threads used for parallel computation"`
 	ModelsPath        string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
 	BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"`
@@ -50,7 +51,7 @@ func (t *TranscriptCMD) Run(ctx *cliContext.Context) error {
 		}
 	}()
-	tr, err := backend.ModelTranscription(t.Filename, t.Language, ml, c, opts)
+	tr, err := backend.ModelTranscription(t.Filename, t.Language, t.Translate, ml, c, opts)
 	if err != nil {
 		return err
 	}
--- a/core/cli/tts.go
+++ b/core/cli/tts.go
@@ -7,10 +7,10 @@ import (
 	"path/filepath"
 	"strings"
-	"github.com/go-skynet/LocalAI/core/backend"
+	"github.com/mudler/LocalAI/core/backend"
-	cliContext "github.com/go-skynet/LocalAI/core/cli/context"
+	cliContext "github.com/mudler/LocalAI/core/cli/context"
-	"github.com/go-skynet/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/config"
-	"github.com/go-skynet/LocalAI/pkg/model"
+	"github.com/mudler/LocalAI/pkg/model"
 	"github.com/rs/zerolog/log"
 )
--- a/core/cli/util.go
+++ b/core/cli/util.go
@@ -5,7 +5,7 @@ import (
 	"github.com/rs/zerolog/log"
-	cliContext "github.com/go-skynet/LocalAI/core/cli/context"
+	cliContext "github.com/mudler/LocalAI/core/cli/context"
 	gguf "github.com/thxcode/gguf-parser-go"
 )
--- a/core/cli/worker/worker_llamacpp.go
+++ b/core/cli/worker/worker_llamacpp.go
@@ -5,8 +5,9 @@ import (
 	"os"
 	"syscall"
-	cliContext "github.com/go-skynet/LocalAI/core/cli/context"
+	cliContext "github.com/mudler/LocalAI/core/cli/context"
-	"github.com/go-skynet/LocalAI/pkg/assets"
+	"github.com/mudler/LocalAI/pkg/assets"
 	"github.com/mudler/LocalAI/pkg/library"
 	"github.com/rs/zerolog/log"
 )
@@ -27,17 +28,18 @@ func (r *LLamaCPP) Run(ctx *cliContext.Context) error {
 		return fmt.Errorf("usage: local-ai worker llama-cpp-rpc -- <llama-rpc-server-args>")
 	}
 	grpcProcess := assets.ResolvePath(
 		r.BackendAssetsPath,
 		"util",
 		"llama-cpp-rpc-server",
 	)
 	args := os.Args[4:]
 	args, grpcProcess = library.LoadLDSO(r.BackendAssetsPath, args, grpcProcess)
 	args = append([]string{grpcProcess}, args...)
 	return syscall.Exec(
-		assets.ResolvePath(
+		grpcProcess,
-			r.BackendAssetsPath,
+		args,
 			"util",
 			"llama-cpp-rpc-server",
 		),
 		append([]string{
 			assets.ResolvePath(
 				r.BackendAssetsPath,
 				"util",
 				"llama-cpp-rpc-server",
 			)}, os.Args[4:]...),
 		os.Environ())
 }
--- a/core/cli/worker/worker_nop2p.go
+++ b/core/cli/worker/worker_nop2p.go
@@ -6,7 +6,7 @@ package worker
 import (
 	"fmt"
-	cliContext "github.com/go-skynet/LocalAI/core/cli/context"
+	cliContext "github.com/mudler/LocalAI/core/cli/context"
 )
 type P2P struct{}
--- a/core/cli/worker/worker_p2p.go
+++ b/core/cli/worker/worker_p2p.go
@@ -10,16 +10,17 @@ import (
 	"os/exec"
 	"time"
-	cliContext "github.com/go-skynet/LocalAI/core/cli/context"
+	cliContext "github.com/mudler/LocalAI/core/cli/context"
-	"github.com/go-skynet/LocalAI/core/p2p"
+	"github.com/mudler/LocalAI/core/p2p"
-	"github.com/go-skynet/LocalAI/pkg/assets"
+	"github.com/mudler/LocalAI/pkg/assets"
 	"github.com/mudler/LocalAI/pkg/library"
 	"github.com/phayes/freeport"
 	"github.com/rs/zerolog/log"
 )
 type P2P struct {
 	WorkerFlags       `embed:""`
-	Token             string   `env:"LOCALAI_TOKEN,TOKEN" help:"JSON list of galleries"`
+	Token             string   `env:"LOCALAI_TOKEN,LOCALAI_P2P_TOKEN,TOKEN" help:"P2P token to use"`
 	NoRunner          bool     `env:"LOCALAI_NO_RUNNER,NO_RUNNER" help:"Do not start the llama-cpp-rpc-server"`
 	RunnerAddress     string   `env:"LOCALAI_RUNNER_ADDRESS,RUNNER_ADDRESS" help:"Address of the llama-cpp-rpc-server"`
 	RunnerPort        string   `env:"LOCALAI_RUNNER_PORT,RUNNER_PORT" help:"Port of the llama-cpp-rpc-server"`
@@ -58,7 +59,7 @@ func (r *P2P) Run(ctx *cliContext.Context) error {
 			p = r.RunnerPort
 		}
-		err = p2p.BindLLamaCPPWorker(context.Background(), address, p, r.Token)
+		err = p2p.ExposeService(context.Background(), address, p, r.Token, "")
 		if err != nil {
 			return err
 		}
@@ -71,13 +72,18 @@ func (r *P2P) Run(ctx *cliContext.Context) error {
 	go func() {
 		for {
 			log.Info().Msgf("Starting llama-cpp-rpc-server on '%s:%d'", address, port)
-			cmd := exec.Command(
+
-				assets.ResolvePath(
+			grpcProcess := assets.ResolvePath(
 				r.BackendAssetsPath,
 				"util",
 				"llama-cpp-rpc-server",
-				),
+			)
-				append([]string{"--host", address, "--port", fmt.Sprint(port)}, r.ExtraLLamaCPPArgs...)...,
+
 			args := append([]string{"--host", address, "--port", fmt.Sprint(port)}, r.ExtraLLamaCPPArgs...)
 			args, grpcProcess = library.LoadLDSO(r.BackendAssetsPath, args, grpcProcess)
 			cmd := exec.Command(
 				grpcProcess, args...,
 			)
 			cmd.Env = os.Environ()
@@ -86,14 +92,14 @@ func (r *P2P) Run(ctx *cliContext.Context) error {
 			cmd.Stdout = os.Stdout
 			if err := cmd.Start(); err != nil {
-				log.Error().Err(err).Msg("Failed to start llama-cpp-rpc-server")
+				log.Error().Any("grpcProcess", grpcProcess).Any("args", args).Err(err).Msg("Failed to start llama-cpp-rpc-server")
 			}
 			cmd.Wait()
 		}
 	}()
-	err = p2p.BindLLamaCPPWorker(context.Background(), address, fmt.Sprint(port), r.Token)
+	err = p2p.ExposeService(context.Background(), address, fmt.Sprint(port), r.Token, "")
 	if err != nil {
 		return err
 	}
--- a/core/config/application_config.go
+++ b/core/config/application_config.go
@@ -6,8 +6,7 @@ import (
 	"encoding/json"
 	"time"
-	"github.com/go-skynet/LocalAI/pkg/gallery"
+	"github.com/mudler/LocalAI/pkg/xsysinfo"
 	"github.com/go-skynet/LocalAI/pkg/xsysinfo"
 	"github.com/rs/zerolog/log"
 )
@@ -15,6 +14,7 @@ type ApplicationConfig struct {
 	Context                             context.Context
 	ConfigFile                          string
 	ModelPath                           string
 	LibPath                             string
 	UploadLimitMB, Threads, ContextSize int
 	DisableWebUI                        bool
 	F16                                 bool
@@ -32,10 +32,11 @@ type ApplicationConfig struct {
 	CORSAllowOrigins                    string
 	ApiKeys                             []string
 	OpaqueErrors                        bool
 	P2PToken                            string
 	ModelLibraryURL string
-	Galleries []gallery.Gallery
+	Galleries []Gallery
 	BackendAssets     embed.FS
 	AssetsDestination string
@@ -95,12 +96,24 @@ func WithCsrf(b bool) AppOption {
 	}
 }
 func WithP2PToken(s string) AppOption {
 	return func(o *ApplicationConfig) {
 		o.P2PToken = s
 	}
 }
 func WithModelLibraryURL(url string) AppOption {
 	return func(o *ApplicationConfig) {
 		o.ModelLibraryURL = url
 	}
 }
 func WithLibPath(path string) AppOption {
 	return func(o *ApplicationConfig) {
 		o.LibPath = path
 	}
 }
 var EnableWatchDog = func(o *ApplicationConfig) {
 	o.WatchDog = true
 }
@@ -173,10 +186,10 @@ func WithBackendAssets(f embed.FS) AppOption {
 func WithStringGalleries(galls string) AppOption {
 	return func(o *ApplicationConfig) {
 		if galls == "" {
-			o.Galleries = []gallery.Gallery{}
+			o.Galleries = []Gallery{}
 			return
 		}
-		var galleries []gallery.Gallery
+		var galleries []Gallery
 		if err := json.Unmarshal([]byte(galls), &galleries); err != nil {
 			log.Error().Err(err).Msg("failed loading galleries")
 		}
@@ -184,7 +197,7 @@ func WithStringGalleries(galls string) AppOption {
 	}
 }
-func WithGalleries(galleries []gallery.Gallery) AppOption {
+func WithGalleries(galleries []Gallery) AppOption {
 	return func(o *ApplicationConfig) {
 		o.Galleries = append(o.Galleries, galleries...)
 	}
--- a/core/config/backend_config.go
+++ b/core/config/backend_config.go
@@ -5,10 +5,10 @@ import (
 	"regexp"
 	"strings"
-	"github.com/go-skynet/LocalAI/core/schema"
+	"github.com/mudler/LocalAI/core/schema"
-	"github.com/go-skynet/LocalAI/pkg/downloader"
+	"github.com/mudler/LocalAI/pkg/downloader"
-	"github.com/go-skynet/LocalAI/pkg/functions"
+	"github.com/mudler/LocalAI/pkg/functions"
-	"github.com/go-skynet/LocalAI/pkg/utils"
+	"github.com/mudler/LocalAI/pkg/utils"
 )
 const (
@@ -390,10 +390,6 @@ func (c *BackendConfig) Validate() bool {
 		}
 	}
 	if c.Name == "" {
 		return false
 	}
 	if c.Backend != "" {
 		// a regex that checks that is a string name with no special characters, except '-' and '_'
 		re := regexp.MustCompile(`^[a-zA-Z0-9-_]+$`)
--- a/core/config/backend_config_loader.go
+++ b/core/config/backend_config_loader.go
@@ -11,9 +11,9 @@ import (
 	"sync"
 	"github.com/charmbracelet/glamour"
-	"github.com/go-skynet/LocalAI/core/schema"
+	"github.com/mudler/LocalAI/core/schema"
-	"github.com/go-skynet/LocalAI/pkg/downloader"
+	"github.com/mudler/LocalAI/pkg/downloader"
-	"github.com/go-skynet/LocalAI/pkg/utils"
+	"github.com/mudler/LocalAI/pkg/utils"
 	"github.com/rs/zerolog/log"
 	"gopkg.in/yaml.v3"
 )
--- a/core/config/backend_config_test.go
+++ b/core/config/backend_config_test.go
@@ -16,7 +16,8 @@ var _ = Describe("Test cases for config related functions", func() {
 			Expect(err).To(BeNil())
 			defer os.Remove(tmp.Name())
 			_, err = tmp.WriteString(
-				`backend: "foo-bar"
+				`backend: "../foo-bar"
 name: "foo"
 parameters:
  model: "foo-bar"`)
 			Expect(err).ToNot(HaveOccurred())
--- a/core/config/gallery.go
+++ b/core/config/gallery.go
@@ -0,0 +1,6 @@
 package config
 type Gallery struct {
 	URL  string `json:"url" yaml:"url"`
 	Name string `json:"name" yaml:"name"`
 }
--- a/core/dependencies_manager/manager.go
+++ b/core/dependencies_manager/manager.go
@@ -5,8 +5,8 @@ import (
 	"os"
 	"path/filepath"
-	"github.com/go-skynet/LocalAI/pkg/downloader"
+	"github.com/mudler/LocalAI/pkg/downloader"
-	"github.com/go-skynet/LocalAI/pkg/utils"
+	"github.com/mudler/LocalAI/pkg/utils"
 	"gopkg.in/yaml.v3"
 )
--- a/core/gallery/gallery.go
+++ b/core/gallery/gallery.go
@@ -7,19 +7,15 @@ import (
 	"path/filepath"
 	"strings"
 	"github.com/go-skynet/LocalAI/pkg/downloader"
 	"github.com/imdario/mergo"
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/pkg/downloader"
 	"github.com/rs/zerolog/log"
 	"gopkg.in/yaml.v2"
 )
 type Gallery struct {
 	URL  string `json:"url" yaml:"url"`
 	Name string `json:"name" yaml:"name"`
 }
 // Installs a model from the gallery
-func InstallModelFromGallery(galleries []Gallery, name string, basePath string, req GalleryModel, downloadStatus func(string, string, string, float64)) error {
+func InstallModelFromGallery(galleries []config.Gallery, name string, basePath string, req GalleryModel, downloadStatus func(string, string, string, float64)) error {
 	applyModel := func(model *GalleryModel) error {
 		name = strings.ReplaceAll(name, string(os.PathSeparator), "__")
@@ -117,7 +113,7 @@ func FindModel(models []*GalleryModel, name string, basePath string) *GalleryMod
 // List available models
 // Models galleries are a list of yaml files that are hosted on a remote server (for example github).
 // Each yaml file contains a list of models that can be downloaded and optionally overrides to define a new model setting.
-func AvailableGalleryModels(galleries []Gallery, basePath string) ([]*GalleryModel, error) {
+func AvailableGalleryModels(galleries []config.Gallery, basePath string) ([]*GalleryModel, error) {
 	var models []*GalleryModel
 	// Get models from galleries
@@ -134,7 +130,7 @@ func AvailableGalleryModels(galleries []Gallery, basePath string) ([]*GalleryMod
 func findGalleryURLFromReferenceURL(url string, basePath string) (string, error) {
 	var refFile string
-	err := downloader.GetURI(url, basePath, func(url string, d []byte) error {
+	err := downloader.DownloadAndUnmarshal(url, basePath, func(url string, d []byte) error {
 		refFile = string(d)
 		if len(refFile) == 0 {
 			return fmt.Errorf("invalid reference file at url %s: %s", url, d)
@@ -146,7 +142,7 @@ func findGalleryURLFromReferenceURL(url string, basePath string) (string, error)
 	return refFile, err
 }
-func getGalleryModels(gallery Gallery, basePath string) ([]*GalleryModel, error) {
+func getGalleryModels(gallery config.Gallery, basePath string) ([]*GalleryModel, error) {
 	var models []*GalleryModel = []*GalleryModel{}
 	if strings.HasSuffix(gallery.URL, ".ref") {
@@ -157,7 +153,7 @@ func getGalleryModels(gallery Gallery, basePath string) ([]*GalleryModel, error)
 		}
 	}
-	err := downloader.GetURI(gallery.URL, basePath, func(url string, d []byte) error {
+	err := downloader.DownloadAndUnmarshal(gallery.URL, basePath, func(url string, d []byte) error {
 		return yaml.Unmarshal(d, &models)
 	})
 	if err != nil {
--- a/core/gallery/gallery_suite_test.go
+++ b/core/gallery/gallery_suite_test.go
--- a/core/gallery/models.go
+++ b/core/gallery/models.go
@@ -5,9 +5,11 @@ import (
 	"os"
 	"path/filepath"
 	"github.com/go-skynet/LocalAI/pkg/downloader"
 	"github.com/go-skynet/LocalAI/pkg/utils"
 	"github.com/imdario/mergo"
 	lconfig "github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/pkg/downloader"
 	"github.com/mudler/LocalAI/pkg/utils"
 	"github.com/rs/zerolog/log"
 	"gopkg.in/yaml.v2"
 )
@@ -65,7 +67,7 @@ type PromptTemplate struct {
 func GetGalleryConfigFromURL(url string, basePath string) (Config, error) {
 	var config Config
-	err := downloader.GetURI(url, basePath, func(url string, d []byte) error {
+	err := downloader.DownloadAndUnmarshal(url, basePath, func(url string, d []byte) error {
 		return yaml.Unmarshal(d, &config)
 	})
 	if err != nil {
@@ -172,6 +174,15 @@ func InstallModel(basePath, nameOverride string, config *Config, configOverrides
 			return fmt.Errorf("failed to marshal updated config YAML: %v", err)
 		}
 		backendConfig := lconfig.BackendConfig{}
 		err = yaml.Unmarshal(updatedConfigYAML, &backendConfig)
 		if err != nil {
 			return fmt.Errorf("failed to unmarshal updated config YAML: %v", err)
 		}
 		if !backendConfig.Validate() {
 			return fmt.Errorf("failed to validate updated config YAML")
 		}
 		err = os.WriteFile(configFilePath, updatedConfigYAML, 0600)
 		if err != nil {
 			return fmt.Errorf("failed to write updated config file: %v", err)
--- a/core/gallery/models_test.go
+++ b/core/gallery/models_test.go
@@ -5,7 +5,8 @@ import (
 	"os"
 	"path/filepath"
-	. "github.com/go-skynet/LocalAI/pkg/gallery"
+	"github.com/mudler/LocalAI/core/config"
 	. "github.com/mudler/LocalAI/core/gallery"
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
 	"gopkg.in/yaml.v3"
@@ -54,7 +55,7 @@ var _ = Describe("Model test", func() {
 			err = os.WriteFile(galleryFilePath, out, 0600)
 			Expect(err).ToNot(HaveOccurred())
 			Expect(filepath.IsAbs(galleryFilePath)).To(BeTrue(), galleryFilePath)
-			galleries := []Gallery{
+			galleries := []config.Gallery{
 				{
 					Name: "test",
 					URL:  "file://" + galleryFilePath,
--- a/core/gallery/op.go
+++ b/core/gallery/op.go
@@ -1,5 +1,7 @@
 package gallery
 import "github.com/mudler/LocalAI/core/config"
 type GalleryOp struct {
 	Id               string
 	GalleryModelName string
@@ -7,7 +9,7 @@ type GalleryOp struct {
 	Delete           bool
 	Req       GalleryModel
-	Galleries []Gallery
+	Galleries []config.Gallery
 }
 type GalleryOpStatus struct {
--- a/core/gallery/request.go
+++ b/core/gallery/request.go
@@ -3,6 +3,8 @@ package gallery
 import (
 	"fmt"
 	"strings"
 	"github.com/mudler/LocalAI/core/config"
 )
 // GalleryModel is the struct used to represent a model in the gallery returned by the endpoint.
@@ -23,7 +25,7 @@ type GalleryModel struct {
 	// AdditionalFiles are used to add additional files to the model
 	AdditionalFiles []File `json:"files,omitempty" yaml:"files,omitempty"`
 	// Gallery is a reference to the gallery which contains the model
-	Gallery Gallery `json:"gallery,omitempty" yaml:"gallery,omitempty"`
+	Gallery config.Gallery `json:"gallery,omitempty" yaml:"gallery,omitempty"`
 	// Installed is used to indicate if the model is installed or not
 	Installed bool `json:"installed,omitempty" yaml:"installed,omitempty"`
 }
--- a/core/gallery/request_test.go
+++ b/core/gallery/request_test.go
@@ -1,7 +1,7 @@
 package gallery_test
 import (
-	. "github.com/go-skynet/LocalAI/pkg/gallery"
+	. "github.com/mudler/LocalAI/core/gallery"
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
 )
--- a/core/http/app.go
+++ b/core/http/app.go
@@ -6,16 +6,16 @@ import (
 	"net/http"
 	"strings"
-	"github.com/go-skynet/LocalAI/pkg/utils"
+	"github.com/mudler/LocalAI/pkg/utils"
-	"github.com/go-skynet/LocalAI/core/http/endpoints/localai"
+	"github.com/mudler/LocalAI/core/http/endpoints/localai"
-	"github.com/go-skynet/LocalAI/core/http/endpoints/openai"
+	"github.com/mudler/LocalAI/core/http/endpoints/openai"
-	"github.com/go-skynet/LocalAI/core/http/routes"
+	"github.com/mudler/LocalAI/core/http/routes"
-	"github.com/go-skynet/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/config"
-	"github.com/go-skynet/LocalAI/core/schema"
+	"github.com/mudler/LocalAI/core/schema"
-	"github.com/go-skynet/LocalAI/core/services"
+	"github.com/mudler/LocalAI/core/services"
-	"github.com/go-skynet/LocalAI/pkg/model"
+	"github.com/mudler/LocalAI/pkg/model"
 	"github.com/gofiber/contrib/fiberzerolog"
 	"github.com/gofiber/fiber/v2"
--- a/core/http/app_test.go
+++ b/core/http/app_test.go
@@ -13,15 +13,15 @@ import (
 	"path/filepath"
 	"runtime"
-	"github.com/go-skynet/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/config"
-	. "github.com/go-skynet/LocalAI/core/http"
+	. "github.com/mudler/LocalAI/core/http"
-	"github.com/go-skynet/LocalAI/core/schema"
+	"github.com/mudler/LocalAI/core/schema"
-	"github.com/go-skynet/LocalAI/core/startup"
+	"github.com/mudler/LocalAI/core/startup"
 	"github.com/go-skynet/LocalAI/pkg/downloader"
 	"github.com/go-skynet/LocalAI/pkg/gallery"
 	"github.com/go-skynet/LocalAI/pkg/model"
 	"github.com/gofiber/fiber/v2"
 	"github.com/mudler/LocalAI/core/gallery"
 	"github.com/mudler/LocalAI/pkg/downloader"
 	"github.com/mudler/LocalAI/pkg/model"
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
 	"gopkg.in/yaml.v3"
@@ -74,7 +74,7 @@ func getModelStatus(url string) (response map[string]interface{}) {
 func getModels(url string) (response []gallery.GalleryModel) {
 	// TODO: No tests currently seem to exercise file:// urls. Fix?
-	downloader.GetURI(url, "", func(url string, i []byte) error {
+	downloader.DownloadAndUnmarshal(url, "", func(url string, i []byte) error {
 		// Unmarshal YAML data into a struct
 		return json.Unmarshal(i, &response)
 	})
@@ -247,7 +247,7 @@ var _ = Describe("API test", func() {
 			err = os.WriteFile(filepath.Join(modelDir, "gallery_simple.yaml"), out, 0600)
 			Expect(err).ToNot(HaveOccurred())
-			galleries := []gallery.Gallery{
+			galleries := []config.Gallery{
 				{
 					Name: "test",
 					URL:  "file://" + filepath.Join(modelDir, "gallery_simple.yaml"),
@@ -603,7 +603,7 @@ var _ = Describe("API test", func() {
 			c, cancel = context.WithCancel(context.Background())
-			galleries := []gallery.Gallery{
+			galleries := []config.Gallery{
 				{
 					Name: "model-gallery",
 					URL:  "https://raw.githubusercontent.com/go-skynet/model-gallery/main/index.yaml",
--- a/core/http/ctx/fiber.go
+++ b/core/http/ctx/fiber.go
@@ -4,8 +4,8 @@ import (
 	"fmt"
 	"strings"
 	"github.com/go-skynet/LocalAI/pkg/model"
 	"github.com/gofiber/fiber/v2"
 	"github.com/mudler/LocalAI/pkg/model"
 	"github.com/rs/zerolog/log"
 )
--- a/core/http/elements/gallery.go
+++ b/core/http/elements/gallery.go
@@ -6,15 +6,24 @@ import (
 	"github.com/chasefleming/elem-go"
 	"github.com/chasefleming/elem-go/attrs"
-	"github.com/go-skynet/LocalAI/core/services"
+	"github.com/mudler/LocalAI/core/gallery"
-	"github.com/go-skynet/LocalAI/pkg/gallery"
+	"github.com/mudler/LocalAI/core/p2p"
-	"github.com/go-skynet/LocalAI/pkg/xsync"
+	"github.com/mudler/LocalAI/core/services"
 	"github.com/mudler/LocalAI/pkg/xsync"
 )
 const (
 	noImage = "https://upload.wikimedia.org/wikipedia/commons/6/65/No-Image-Placeholder.svg"
 )
 func renderElements(n []elem.Node) string {
 	render := ""
 	for _, r := range n {
 		render += r.Render()
 	}
 	return render
 }
 func DoneProgress(galleryID, text string, showDelete bool) string {
 	var modelName = galleryID
 	// Split by @ and grab the name
@@ -72,6 +81,135 @@ func ProgressBar(progress string) string {
 	).Render()
 }
 func P2PNodeStats(nodes []p2p.NodeData) string {
 	/*
 	   <div class="bg-gray-800 p-6 rounded-lg shadow-lg text-left">
 	                       <p class="text-xl font-semibold text-gray-200">Total Workers Detected: {{ len .Nodes }}</p>
 	                       {{ $online := 0 }}
 	                       {{ range .Nodes }}
 	                           {{ if .IsOnline }}
 	                               {{ $online = add $online 1 }}
 	                           {{ end }}
 	                       {{ end }}
 	                       <p class="text-xl font-semibold text-gray-200">Total Online Workers: {{$online}}</p>
 	                   </div>
 	*/
 	online := 0
 	for _, n := range nodes {
 		if n.IsOnline() {
 			online++
 		}
 	}
 	class := "text-green-500"
 	if online == 0 {
 		class = "text-red-500"
 	}
 	/*
 	   <i class="fas fa-circle animate-pulse text-green-500 ml-2 mr-1"></i>
 	*/
 	circle := elem.I(attrs.Props{
 		"class": "fas fa-circle animate-pulse " + class + " ml-2 mr-1",
 	})
 	nodesElements := []elem.Node{
 		elem.Span(
 			attrs.Props{
 				"class": class,
 			},
 			circle,
 			elem.Text(fmt.Sprintf("%d", online)),
 		),
 		elem.Span(
 			attrs.Props{
 				"class": "text-gray-200",
 			},
 			elem.Text(fmt.Sprintf("/%d", len(nodes))),
 		),
 	}
 	return renderElements(nodesElements)
 }
 func P2PNodeBoxes(nodes []p2p.NodeData) string {
 	/*
 			<div class="bg-gray-800 p-4 rounded-lg shadow-lg text-left">
 			<div class="flex items-center mb-2">
 				<i class="fas fa-desktop text-gray-400 mr-2"></i>
 				<span class="text-gray-200 font-semibold">{{.ID}}</span>
 			</div>
 			<p class="text-sm text-gray-400 mt-2 flex items-center">
 				Status:
 				<i class="fas fa-circle {{ if .IsOnline }}text-green-500{{ else }}text-red-500{{ end }} ml-2 mr-1"></i>
 				<span class="{{ if .IsOnline }}text-green-400{{ else }}text-red-400{{ end }}">
 					{{ if .IsOnline }}Online{{ else }}Offline{{ end }}
 				</span>
 			</p>
 		</div>
 	*/
 	nodesElements := []elem.Node{}
 	for _, n := range nodes {
 		nodesElements = append(nodesElements,
 			elem.Div(
 				attrs.Props{
 					"class": "bg-gray-700 p-6 rounded-lg shadow-lg text-left",
 				},
 				elem.P(
 					attrs.Props{
 						"class": "text-sm text-gray-400 mt-2 flex",
 					},
 					elem.I(
 						attrs.Props{
 							"class": "fas fa-desktop text-gray-400 mr-2",
 						},
 					),
 					elem.Text("Name: "),
 					elem.Span(
 						attrs.Props{
 							"class": "text-gray-200 font-semibold ml-2 mr-1",
 						},
 						elem.Text(n.ID),
 					),
 					elem.Text("Status: "),
 					elem.If(
 						n.IsOnline(),
 						elem.I(
 							attrs.Props{
 								"class": "fas fa-circle animate-pulse text-green-500 ml-2 mr-1",
 							},
 						),
 						elem.I(
 							attrs.Props{
 								"class": "fas fa-circle animate-pulse text-red-500 ml-2 mr-1",
 							},
 						),
 					),
 					elem.If(
 						n.IsOnline(),
 						elem.Span(
 							attrs.Props{
 								"class": "text-green-400",
 							},
 							elem.Text("Online"),
 						),
 						elem.Span(
 							attrs.Props{
 								"class": "text-red-400",
 							},
 							elem.Text("Offline"),
 						),
 					),
 				),
 			))
 	}
 	return renderElements(nodesElements)
 }
 func StartProgressBar(uid, progress, text string) string {
 	if progress == "" {
 		progress = "0"
--- a/core/http/endpoints/elevenlabs/tts.go
+++ b/core/http/endpoints/elevenlabs/tts.go
@@ -1,13 +1,13 @@
 package elevenlabs
 import (
-	"github.com/go-skynet/LocalAI/core/backend"
+	"github.com/mudler/LocalAI/core/backend"
-	"github.com/go-skynet/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/config"
-	fiberContext "github.com/go-skynet/LocalAI/core/http/ctx"
+	fiberContext "github.com/mudler/LocalAI/core/http/ctx"
-	"github.com/go-skynet/LocalAI/pkg/model"
+	"github.com/mudler/LocalAI/pkg/model"
 	"github.com/go-skynet/LocalAI/core/schema"
 	"github.com/gofiber/fiber/v2"
 	"github.com/mudler/LocalAI/core/schema"
 	"github.com/rs/zerolog/log"
 )
--- a/core/http/endpoints/jina/rerank.go
+++ b/core/http/endpoints/jina/rerank.go
@@ -1,14 +1,14 @@
 package jina
 import (
-	"github.com/go-skynet/LocalAI/core/backend"
+	"github.com/mudler/LocalAI/core/backend"
-	"github.com/go-skynet/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/config"
 	fiberContext "github.com/go-skynet/LocalAI/core/http/ctx"
 	"github.com/go-skynet/LocalAI/core/schema"
 	"github.com/go-skynet/LocalAI/pkg/grpc/proto"
 	"github.com/go-skynet/LocalAI/pkg/model"
 	"github.com/gofiber/fiber/v2"
 	fiberContext "github.com/mudler/LocalAI/core/http/ctx"
 	"github.com/mudler/LocalAI/core/schema"
 	"github.com/mudler/LocalAI/pkg/grpc/proto"
 	"github.com/mudler/LocalAI/pkg/model"
 	"github.com/rs/zerolog/log"
 )
--- a/core/http/endpoints/localai/backend_monitor.go
+++ b/core/http/endpoints/localai/backend_monitor.go
@@ -1,9 +1,9 @@
 package localai
 import (
 	"github.com/go-skynet/LocalAI/core/schema"
 	"github.com/go-skynet/LocalAI/core/services"
 	"github.com/gofiber/fiber/v2"
 	"github.com/mudler/LocalAI/core/schema"
 	"github.com/mudler/LocalAI/core/services"
 )
 func BackendMonitorEndpoint(bm *services.BackendMonitorService) func(c *fiber.Ctx) error {
--- a/core/http/endpoints/localai/gallery.go
+++ b/core/http/endpoints/localai/gallery.go
@@ -5,15 +5,16 @@ import (
 	"fmt"
 	"slices"
 	"github.com/go-skynet/LocalAI/core/services"
 	"github.com/go-skynet/LocalAI/pkg/gallery"
 	"github.com/gofiber/fiber/v2"
 	"github.com/google/uuid"
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/gallery"
 	"github.com/mudler/LocalAI/core/services"
 	"github.com/rs/zerolog/log"
 )
 type ModelGalleryEndpointService struct {
-	galleries      []gallery.Gallery
+	galleries      []config.Gallery
 	modelPath      string
 	galleryApplier *services.GalleryService
 }
@@ -24,7 +25,7 @@ type GalleryModel struct {
 	gallery.GalleryModel
 }
-func CreateModelGalleryEndpointService(galleries []gallery.Gallery, modelPath string, galleryApplier *services.GalleryService) ModelGalleryEndpointService {
+func CreateModelGalleryEndpointService(galleries []config.Gallery, modelPath string, galleryApplier *services.GalleryService) ModelGalleryEndpointService {
 	return ModelGalleryEndpointService{
 		galleries:      galleries,
 		modelPath:      modelPath,
@@ -129,12 +130,12 @@ func (mgs *ModelGalleryEndpointService) ListModelGalleriesEndpoint() func(c *fib
 func (mgs *ModelGalleryEndpointService) AddModelGalleryEndpoint() func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
-		input := new(gallery.Gallery)
+		input := new(config.Gallery)
 		// Get input data from the request body
 		if err := c.BodyParser(input); err != nil {
 			return err
 		}
-		if slices.ContainsFunc(mgs.galleries, func(gallery gallery.Gallery) bool {
+		if slices.ContainsFunc(mgs.galleries, func(gallery config.Gallery) bool {
 			return gallery.Name == input.Name
 		}) {
 			return fmt.Errorf("%s already exists", input.Name)
@@ -151,17 +152,17 @@ func (mgs *ModelGalleryEndpointService) AddModelGalleryEndpoint() func(c *fiber.
 func (mgs *ModelGalleryEndpointService) RemoveModelGalleryEndpoint() func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
-		input := new(gallery.Gallery)
+		input := new(config.Gallery)
 		// Get input data from the request body
 		if err := c.BodyParser(input); err != nil {
 			return err
 		}
-		if !slices.ContainsFunc(mgs.galleries, func(gallery gallery.Gallery) bool {
+		if !slices.ContainsFunc(mgs.galleries, func(gallery config.Gallery) bool {
 			return gallery.Name == input.Name
 		}) {
 			return fmt.Errorf("%s is not currently registered", input.Name)
 		}
-		mgs.galleries = slices.DeleteFunc(mgs.galleries, func(gallery gallery.Gallery) bool {
+		mgs.galleries = slices.DeleteFunc(mgs.galleries, func(gallery config.Gallery) bool {
 			return gallery.Name == input.Name
 		})
 		return c.Send(nil)
--- a/core/http/endpoints/localai/metrics.go
+++ b/core/http/endpoints/localai/metrics.go
@@ -3,9 +3,9 @@ package localai
 import (
 	"time"
 	"github.com/go-skynet/LocalAI/core/services"
 	"github.com/gofiber/fiber/v2"
 	"github.com/gofiber/fiber/v2/middleware/adaptor"
 	"github.com/mudler/LocalAI/core/services"
 	"github.com/prometheus/client_golang/prometheus/promhttp"
 )
--- a/core/http/endpoints/localai/stores.go
+++ b/core/http/endpoints/localai/stores.go
@@ -1,12 +1,12 @@
 package localai
 import (
 	"github.com/go-skynet/LocalAI/core/backend"
 	"github.com/go-skynet/LocalAI/core/config"
 	"github.com/go-skynet/LocalAI/core/schema"
 	"github.com/go-skynet/LocalAI/pkg/model"
 	"github.com/go-skynet/LocalAI/pkg/store"
 	"github.com/gofiber/fiber/v2"
 	"github.com/mudler/LocalAI/core/backend"
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/schema"
 	"github.com/mudler/LocalAI/pkg/model"
 	"github.com/mudler/LocalAI/pkg/store"
 )
 func StoresSetEndpoint(sl *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
--- a/core/http/endpoints/localai/tts.go
+++ b/core/http/endpoints/localai/tts.go
@@ -1,13 +1,13 @@
 package localai
 import (
-	"github.com/go-skynet/LocalAI/core/backend"
+	"github.com/mudler/LocalAI/core/backend"
-	"github.com/go-skynet/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/config"
-	fiberContext "github.com/go-skynet/LocalAI/core/http/ctx"
+	fiberContext "github.com/mudler/LocalAI/core/http/ctx"
-	"github.com/go-skynet/LocalAI/pkg/model"
+	"github.com/mudler/LocalAI/pkg/model"
 	"github.com/go-skynet/LocalAI/core/schema"
 	"github.com/gofiber/fiber/v2"
 	"github.com/mudler/LocalAI/core/schema"
 	"github.com/rs/zerolog/log"
 )
--- a/core/http/endpoints/localai/welcome.go
+++ b/core/http/endpoints/localai/welcome.go
@@ -1,11 +1,12 @@
 package localai
 import (
 	"github.com/go-skynet/LocalAI/core/config"
 	"github.com/go-skynet/LocalAI/internal"
 	"github.com/go-skynet/LocalAI/pkg/gallery"
 	"github.com/go-skynet/LocalAI/pkg/model"
 	"github.com/gofiber/fiber/v2"
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/gallery"
 	"github.com/mudler/LocalAI/core/p2p"
 	"github.com/mudler/LocalAI/internal"
 	"github.com/mudler/LocalAI/pkg/model"
 )
 func WelcomeEndpoint(appConfig *config.ApplicationConfig,
@@ -33,6 +34,7 @@ func WelcomeEndpoint(appConfig *config.ApplicationConfig,
 			"Models":            models,
 			"ModelsConfig":      backendConfigs,
 			"GalleryConfig":     galleryConfigs,
 			"IsP2PEnabled":      p2p.IsP2PEnabled(),
 			"ApplicationConfig": appConfig,
 			"ProcessingModels":  processingModels,
 			"TaskTypes":         taskTypes,
--- a/core/http/endpoints/openai/assistant.go
+++ b/core/http/endpoints/openai/assistant.go
@@ -9,10 +9,10 @@ import (
 	"sync/atomic"
 	"time"
 	"github.com/go-skynet/LocalAI/core/config"
 	model "github.com/go-skynet/LocalAI/pkg/model"
 	"github.com/go-skynet/LocalAI/pkg/utils"
 	"github.com/gofiber/fiber/v2"
 	"github.com/mudler/LocalAI/core/config"
 	model "github.com/mudler/LocalAI/pkg/model"
 	"github.com/mudler/LocalAI/pkg/utils"
 	"github.com/rs/zerolog/log"
 )
@@ -339,7 +339,7 @@ func CreateAssistantFileEndpoint(cl *config.BackendConfigLoader, ml *model.Model
 			}
 		}
-		return c.Status(fiber.StatusNotFound).SendString(fmt.Sprintf("Unable to find "))
+		return c.Status(fiber.StatusNotFound).SendString(fmt.Sprintf("Unable to find %q", assistantID))
 	}
 }
--- a/core/http/endpoints/openai/assistant_test.go
+++ b/core/http/endpoints/openai/assistant_test.go
@@ -4,7 +4,6 @@ import (
 	"encoding/json"
 	"fmt"
 	"io"
 	"io/ioutil"
 	"net/http"
 	"net/http/httptest"
 	"os"
@@ -13,9 +12,9 @@ import (
 	"testing"
 	"time"
 	"github.com/go-skynet/LocalAI/core/config"
 	"github.com/go-skynet/LocalAI/pkg/model"
 	"github.com/gofiber/fiber/v2"
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/pkg/model"
 	"github.com/stretchr/testify/assert"
 )
@@ -183,7 +182,7 @@ func TestAssistantEndpoints(t *testing.T) {
 				assert.NoError(t, err)
 				assert.Equal(t, tt.expectedStatus, response.StatusCode)
 				if tt.expectedStatus != fiber.StatusOK {
-					all, _ := ioutil.ReadAll(response.Body)
+					all, _ := io.ReadAll(response.Body)
 					assert.Equal(t, tt.expectedStringResult, string(all))
 				} else {
 					var result []Assistant
@@ -279,6 +278,7 @@ func TestAssistantEndpoints(t *testing.T) {
 		assert.NoError(t, err)
 		var getAssistant Assistant
 		err = json.NewDecoder(modifyResponse.Body).Decode(&getAssistant)
 		assert.NoError(t, err)
 		t.Cleanup(cleanupAllAssistants(t, app, []string{getAssistant.ID}))
@@ -391,7 +391,10 @@ func createAssistantFile(app *fiber.App, afr AssistantFileRequest, assistantId s
 	}
 	var assistantFile AssistantFile
-	all, err := ioutil.ReadAll(resp.Body)
+	all, err := io.ReadAll(resp.Body)
 	if err != nil {
 		return AssistantFile{}, resp, err
 	}
 	err = json.NewDecoder(strings.NewReader(string(all))).Decode(&assistantFile)
 	if err != nil {
 		return AssistantFile{}, resp, err
@@ -422,8 +425,7 @@ func createAssistant(app *fiber.App, ar AssistantRequest) (Assistant, *http.Resp
 	var resultAssistant Assistant
 	err = json.NewDecoder(strings.NewReader(string(bodyString))).Decode(&resultAssistant)
-
+	return resultAssistant, resp, err
 	return resultAssistant, resp, nil
 }
 func cleanupAllAssistants(t *testing.T, app *fiber.App, ids []string) func() {
--- a/core/http/endpoints/openai/chat.go
+++ b/core/http/endpoints/openai/chat.go
@@ -8,13 +8,13 @@ import (
 	"strings"
 	"time"
 	"github.com/go-skynet/LocalAI/core/backend"
 	"github.com/go-skynet/LocalAI/core/config"
 	"github.com/go-skynet/LocalAI/core/schema"
 	"github.com/go-skynet/LocalAI/pkg/functions"
 	model "github.com/go-skynet/LocalAI/pkg/model"
 	"github.com/gofiber/fiber/v2"
 	"github.com/google/uuid"
 	"github.com/mudler/LocalAI/core/backend"
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/schema"
 	"github.com/mudler/LocalAI/pkg/functions"
 	model "github.com/mudler/LocalAI/pkg/model"
 	"github.com/rs/zerolog/log"
 	"github.com/valyala/fasthttp"
 )
--- a/core/http/endpoints/openai/completion.go
+++ b/core/http/endpoints/openai/completion.go
@@ -8,14 +8,14 @@ import (
 	"fmt"
 	"time"
-	"github.com/go-skynet/LocalAI/core/backend"
+	"github.com/mudler/LocalAI/core/backend"
-	"github.com/go-skynet/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/config"
 	"github.com/go-skynet/LocalAI/core/schema"
 	"github.com/go-skynet/LocalAI/pkg/functions"
 	model "github.com/go-skynet/LocalAI/pkg/model"
 	"github.com/gofiber/fiber/v2"
 	"github.com/google/uuid"
 	"github.com/mudler/LocalAI/core/schema"
 	"github.com/mudler/LocalAI/pkg/functions"
 	model "github.com/mudler/LocalAI/pkg/model"
 	"github.com/rs/zerolog/log"
 	"github.com/valyala/fasthttp"
 )
--- a/core/http/endpoints/openai/edit.go
+++ b/core/http/endpoints/openai/edit.go
@@ -5,13 +5,13 @@ import (
 	"fmt"
 	"time"
-	"github.com/go-skynet/LocalAI/core/backend"
+	"github.com/mudler/LocalAI/core/backend"
-	"github.com/go-skynet/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/config"
 	"github.com/go-skynet/LocalAI/core/schema"
 	model "github.com/go-skynet/LocalAI/pkg/model"
 	"github.com/gofiber/fiber/v2"
 	"github.com/google/uuid"
 	"github.com/mudler/LocalAI/core/schema"
 	model "github.com/mudler/LocalAI/pkg/model"
 	"github.com/rs/zerolog/log"
 )
--- a/core/http/endpoints/openai/embeddings.go
+++ b/core/http/endpoints/openai/embeddings.go
@@ -5,12 +5,12 @@ import (
 	"fmt"
 	"time"
-	"github.com/go-skynet/LocalAI/core/backend"
+	"github.com/mudler/LocalAI/core/backend"
-	"github.com/go-skynet/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/config"
-	"github.com/go-skynet/LocalAI/pkg/model"
+	"github.com/mudler/LocalAI/pkg/model"
 	"github.com/go-skynet/LocalAI/core/schema"
 	"github.com/google/uuid"
 	"github.com/mudler/LocalAI/core/schema"
 	"github.com/gofiber/fiber/v2"
 	"github.com/rs/zerolog/log"
--- a/core/http/endpoints/openai/files.go
+++ b/core/http/endpoints/openai/files.go
@@ -8,10 +8,10 @@ import (
 	"sync/atomic"
 	"time"
-	"github.com/go-skynet/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/config"
 	"github.com/go-skynet/LocalAI/pkg/utils"
 	"github.com/gofiber/fiber/v2"
 	"github.com/mudler/LocalAI/pkg/utils"
 )
 var UploadedFiles []File
@@ -123,7 +123,10 @@ func getFileFromRequest(c *fiber.Ctx) (*File, error) {
 	return nil, fmt.Errorf("unable to find file id %s", id)
 }
-// GetFilesEndpoint https://platform.openai.com/docs/api-reference/files/retrieve
+// GetFilesEndpoint is the OpenAI API endpoint to get files https://platform.openai.com/docs/api-reference/files/retrieve
 // @Summary Returns information about a specific file.
 // @Success 200 {object} File "Response"
 // @Router /v1/files/{file_id} [get]
 func GetFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		file, err := getFileFromRequest(c)
@@ -135,14 +138,18 @@ func GetFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.Applicat
 	}
 }
 // DeleteFilesEndpoint https://platform.openai.com/docs/api-reference/files/delete
 func DeleteFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
 type DeleteStatus struct {
 	Id      string
 	Object  string
 	Deleted bool
 }
 // DeleteFilesEndpoint is the OpenAI API endpoint to delete files https://platform.openai.com/docs/api-reference/files/delete
 // @Summary Delete a file.
 // @Success 200 {object} DeleteStatus "Response"
 // @Router /v1/files/{file_id} [delete]
 func DeleteFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		file, err := getFileFromRequest(c)
 		if err != nil {
@@ -174,7 +181,11 @@ func DeleteFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.Appli
 	}
 }
-// GetFilesContentsEndpoint https://platform.openai.com/docs/api-reference/files/retrieve-contents
+// GetFilesContentsEndpoint is the OpenAI API endpoint to get files content https://platform.openai.com/docs/api-reference/files/retrieve-contents
 // @Summary Returns information about a specific file.
 // @Success	200		{string}	binary				"file"
 // @Router /v1/files/{file_id}/content [get]
 // GetFilesContentsEndpoint
 func GetFilesContentsEndpoint(cm *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		file, err := getFileFromRequest(c)
--- a/core/http/endpoints/openai/files_test.go
+++ b/core/http/endpoints/openai/files_test.go
@@ -13,10 +13,10 @@ import (
 	"github.com/rs/zerolog/log"
-	"github.com/go-skynet/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/config"
 	utils2 "github.com/go-skynet/LocalAI/pkg/utils"
 	"github.com/gofiber/fiber/v2"
 	utils2 "github.com/mudler/LocalAI/pkg/utils"
 	"github.com/stretchr/testify/assert"
 	"testing"
--- a/core/http/endpoints/openai/image.go
+++ b/core/http/endpoints/openai/image.go
@@ -13,14 +13,14 @@ import (
 	"strings"
 	"time"
 	"github.com/go-skynet/LocalAI/core/config"
 	"github.com/go-skynet/LocalAI/core/schema"
 	"github.com/google/uuid"
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/schema"
-	"github.com/go-skynet/LocalAI/core/backend"
+	"github.com/mudler/LocalAI/core/backend"
 	model "github.com/go-skynet/LocalAI/pkg/model"
 	"github.com/gofiber/fiber/v2"
 	model "github.com/mudler/LocalAI/pkg/model"
 	"github.com/rs/zerolog/log"
 )
--- a/core/http/endpoints/openai/inference.go
+++ b/core/http/endpoints/openai/inference.go
@@ -1,11 +1,11 @@
 package openai
 import (
-	"github.com/go-skynet/LocalAI/core/backend"
+	"github.com/mudler/LocalAI/core/backend"
-	"github.com/go-skynet/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/config"
-	"github.com/go-skynet/LocalAI/core/schema"
+	"github.com/mudler/LocalAI/core/schema"
-	model "github.com/go-skynet/LocalAI/pkg/model"
+	model "github.com/mudler/LocalAI/pkg/model"
 )
 func ComputeChoices(
--- a/core/http/endpoints/openai/list.go
+++ b/core/http/endpoints/openai/list.go
@@ -1,11 +1,15 @@
 package openai
 import (
 	"github.com/go-skynet/LocalAI/core/schema"
 	"github.com/go-skynet/LocalAI/core/services"
 	"github.com/gofiber/fiber/v2"
 	"github.com/mudler/LocalAI/core/schema"
 	"github.com/mudler/LocalAI/core/services"
 )
 // ListModelsEndpoint is the OpenAI Models API endpoint https://platform.openai.com/docs/api-reference/models
 // @Summary List and describe the various models available in the API.
 // @Success 200 {object} schema.ModelsDataResponse "Response"
 // @Router /v1/models [get]
 func ListModelsEndpoint(lms *services.ListModelsService) func(ctx *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		// If blank, no filter is applied.
@@ -18,10 +22,7 @@ func ListModelsEndpoint(lms *services.ListModelsService) func(ctx *fiber.Ctx) er
 		if err != nil {
 			return err
 		}
-		return c.JSON(struct {
+		return c.JSON(schema.ModelsDataResponse{
 			Object string               `json:"object"`
 			Data   []schema.OpenAIModel `json:"data"`
 		}{
 			Object: "list",
 			Data:   dataModels,
 		})
--- a/core/http/endpoints/openai/request.go
+++ b/core/http/endpoints/openai/request.go
@@ -2,19 +2,16 @@ package openai
 import (
 	"context"
 	"encoding/base64"
 	"encoding/json"
 	"fmt"
 	"io"
 	"net/http"
 	"strings"
 	"github.com/go-skynet/LocalAI/core/config"
 	fiberContext "github.com/go-skynet/LocalAI/core/http/ctx"
 	"github.com/go-skynet/LocalAI/core/schema"
 	"github.com/go-skynet/LocalAI/pkg/functions"
 	model "github.com/go-skynet/LocalAI/pkg/model"
 	"github.com/gofiber/fiber/v2"
 	"github.com/mudler/LocalAI/core/config"
 	fiberContext "github.com/mudler/LocalAI/core/http/ctx"
 	"github.com/mudler/LocalAI/core/schema"
 	"github.com/mudler/LocalAI/pkg/functions"
 	"github.com/mudler/LocalAI/pkg/model"
 	"github.com/mudler/LocalAI/pkg/utils"
 	"github.com/rs/zerolog/log"
 )
@@ -39,41 +36,6 @@ func readRequest(c *fiber.Ctx, ml *model.ModelLoader, o *config.ApplicationConfi
 	return modelFile, input, err
 }
 // this function check if the string is an URL, if it's an URL downloads the image in memory
 // encodes it in base64 and returns the base64 string
 func getBase64Image(s string) (string, error) {
 	if strings.HasPrefix(s, "http") {
 		// download the image
 		resp, err := http.Get(s)
 		if err != nil {
 			return "", err
 		}
 		defer resp.Body.Close()
 		// read the image data into memory
 		data, err := io.ReadAll(resp.Body)
 		if err != nil {
 			return "", err
 		}
 		// encode the image data in base64
 		encoded := base64.StdEncoding.EncodeToString(data)
 		// return the base64 string
 		return encoded, nil
 	}
 	// if the string instead is prefixed with "data:image/...;base64,", drop it
 	dropPrefix := []string{"data:image/jpeg;base64,", "data:image/png;base64,"}
 	for _, prefix := range dropPrefix {
 		if strings.HasPrefix(s, prefix) {
 			return strings.ReplaceAll(s, prefix, ""), nil
 		}
 	}
 	return "", fmt.Errorf("not valid string")
 }
 func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIRequest) {
 	if input.Echo {
 		config.Echo = input.Echo
@@ -187,7 +149,7 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque
 					input.Messages[i].StringContent = pp.Text
 				} else if pp.Type == "image_url" {
 					// Detect if pp.ImageURL is an URL, if it is download the image and encode it in base64:
-					base64, err := getBase64Image(pp.ImageURL.URL)
+					base64, err := utils.GetImageURLAsBase64(pp.ImageURL.URL)
 					if err == nil {
 						input.Messages[i].StringImages = append(input.Messages[i].StringImages, base64) // TODO: make sure that we only return base64 stuff
 						// set a placeholder for each image
@@ -295,5 +257,9 @@ func mergeRequestWithConfig(modelFile string, input *schema.OpenAIRequest, cm *c
 	// Set the parameters for the language model prediction
 	updateRequestConfig(cfg, input)
 	if !cfg.Validate() {
 		return nil, nil, fmt.Errorf("failed to validate config")
 	}
 	return cfg, input, err
 }
--- a/core/http/endpoints/openai/transcription.go
+++ b/core/http/endpoints/openai/transcription.go
@@ -8,9 +8,9 @@ import (
 	"path"
 	"path/filepath"
-	"github.com/go-skynet/LocalAI/core/backend"
+	"github.com/mudler/LocalAI/core/backend"
-	"github.com/go-skynet/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/config"
-	model "github.com/go-skynet/LocalAI/pkg/model"
+	model "github.com/mudler/LocalAI/pkg/model"
 	"github.com/gofiber/fiber/v2"
 	"github.com/rs/zerolog/log"
@@ -65,7 +65,7 @@ func TranscriptEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a
 		log.Debug().Msgf("Audio file copied to: %+v", dst)
-		tr, err := backend.ModelTranscription(dst, input.Language, ml, *config, appConfig)
+		tr, err := backend.ModelTranscription(dst, input.Language, input.Translate, ml, *config, appConfig)
 		if err != nil {
 			return err
 		}
--- a/core/http/render.go
+++ b/core/http/render.go
@@ -7,10 +7,10 @@ import (
 	"net/http"
 	"github.com/Masterminds/sprig/v3"
 	"github.com/go-skynet/LocalAI/core/schema"
 	"github.com/gofiber/fiber/v2"
 	fiberhtml "github.com/gofiber/template/html/v2"
 	"github.com/microcosm-cc/bluemonday"
 	"github.com/mudler/LocalAI/core/schema"
 	"github.com/russross/blackfriday"
 )
@@ -21,14 +21,13 @@ func notFoundHandler(c *fiber.Ctx) error {
 	// Check if the request accepts JSON
 	if string(c.Context().Request.Header.ContentType()) == "application/json" || len(c.Accepts("html")) == 0 {
 		// The client expects a JSON response
-		c.Status(fiber.StatusNotFound).JSON(schema.ErrorResponse{
+		return c.Status(fiber.StatusNotFound).JSON(schema.ErrorResponse{
 			Error: &schema.APIError{Message: "Resource not found", Code: fiber.StatusNotFound},
 		})
 	} else {
 		// The client expects an HTML response
-		c.Status(fiber.StatusNotFound).Render("views/404", fiber.Map{})
+		return c.Status(fiber.StatusNotFound).Render("views/404", fiber.Map{})
 	}
 	return nil
 }
 func renderEngine() *fiberhtml.Engine {
--- a/Show More
+++ b/Show More