fix: newline in virtual.yaml

Stupid one line fix, but it will fix CI Signed-off-by: Dave <dave@gray101.com>
feat(swagger): update swagger (#2128 )
2026-02-03 03:02:38 -05:00 · 2024-04-25 10:39:07 -04:00 · 2024-04-25 16:10:08 +02:00 · 2024-04-25 16:06:18 +02:00 · 2024-04-25 16:05:02 +02:00 · 2024-04-25 15:57:06 +02:00
124 changed files with 4703 additions and 869 deletions
--- a/.dockerignore
+++ b/.dockerignore
@@ -5,4 +5,7 @@ models
 examples/chatbot-ui/models
 examples/rwkv/models
 examples/**/models
-Dockerfile*
+Dockerfile*
+
+# SonarQube
+.scannerwork
--- a/.github/workflows/generate_grpc_cache.yaml
+++ b/.github/workflows/generate_grpc_cache.yaml
@@ -0,0 +1,90 @@
+name: 'generate and publish GRPC docker caches'
+
+on:
+- workflow_dispatch
+
+concurrency:
+  group: grpc-cache-${{ github.head_ref || github.ref }}-${{ github.repository }}
+  cancel-in-progress: true
+
+jobs:
+  generate_caches:
+    strategy:
+      matrix:
+        include:
+          - grpc-base-image: ubuntu:22.04
+            runs-on: 'ubuntu-latest'
+            platforms: 'linux/amd64'
+    runs-on: ${{matrix.runs-on}}
+    steps:
+      - name: Release space from worker
+        if: matrix.runs-on == 'ubuntu-latest'
+        run: |
+          echo "Listing top largest packages"
+          pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
+          head -n 30 <<< "${pkgs}"
+          echo
+          df -h
+          echo
+          sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
+          sudo apt-get remove --auto-remove android-sdk-platform-tools || true
+          sudo apt-get purge --auto-remove android-sdk-platform-tools || true
+          sudo rm -rf /usr/local/lib/android
+          sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
+          sudo rm -rf /usr/share/dotnet
+          sudo apt-get remove -y '^mono-.*' || true
+          sudo apt-get remove -y '^ghc-.*' || true
+          sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
+          sudo apt-get remove -y 'php.*' || true
+          sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
+          sudo apt-get remove -y '^google-.*' || true
+          sudo apt-get remove -y azure-cli || true
+          sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
+          sudo apt-get remove -y '^gfortran-.*' || true
+          sudo apt-get remove -y microsoft-edge-stable || true
+          sudo apt-get remove -y firefox || true
+          sudo apt-get remove -y powershell || true
+          sudo apt-get remove -y r-base-core || true
+          sudo apt-get autoremove -y
+          sudo apt-get clean
+          echo
+          echo "Listing top largest packages"
+          pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
+          head -n 30 <<< "${pkgs}"
+          echo
+          sudo rm -rfv build || true
+          sudo rm -rf /usr/share/dotnet || true
+          sudo rm -rf /opt/ghc || true
+          sudo rm -rf "/usr/local/share/boost" || true
+          sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
+          df -h
+
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@master
+        with:
+          platforms: all
+
+      - name: Set up Docker Buildx
+        id: buildx
+        uses: docker/setup-buildx-action@master
+
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Cache GRPC
+        uses: docker/build-push-action@v5
+        with:
+          builder: ${{ steps.buildx.outputs.name }}
+          # The build-args MUST be an EXACT match between the image cache and other workflow steps that want to use that cache.
+          # This means that even the MAKEFLAGS have to be an EXACT match.
+          # If the build-args are not an EXACT match, it will result in a cache miss, which will require GRPC to be built from scratch.
+          build-args: |
+            GRPC_BASE_IMAGE=${{ matrix.grpc-base-image }}
+            MAKEFLAGS=--jobs=4 --output-sync=target
+            GRPC_VERSION=v1.58.0
+          context: .
+          file: ./Dockerfile
+          cache-to: type=gha,ignore-error=true
+          target: grpc
+          platforms: ${{ matrix.platforms }}
+          push: false
--- a/.github/workflows/image-pr.yml
+++ b/.github/workflows/image-pr.yml
@@ -22,6 +22,7 @@ jobs:
      platforms: ${{ matrix.platforms }}
      runs-on: ${{ matrix.runs-on }}
      base-image: ${{ matrix.base-image }}
+      grpc-base-image: ${{ matrix.grpc-base-image }}
      makeflags: ${{ matrix.makeflags }}
    secrets:
      dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
@@ -61,12 +62,14 @@ jobs:
            ffmpeg: 'false'
            image-type: 'extras'
            base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
+            grpc-base-image: "ubuntu:22.04"
            runs-on: 'arc-runner-set'
            makeflags: "--jobs=3 --output-sync=target"
          - build-type: 'sycl_f16'
            platforms: 'linux/amd64'
            tag-latest: 'false'
            base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
+            grpc-base-image: "ubuntu:22.04"
            tag-suffix: 'sycl-f16-ffmpeg'
            ffmpeg: 'true'
            image-type: 'extras'
@@ -85,6 +88,7 @@ jobs:
      platforms: ${{ matrix.platforms }}
      runs-on: ${{ matrix.runs-on }}
      base-image: ${{ matrix.base-image }}
+      grpc-base-image: ${{ matrix.grpc-base-image }}
      makeflags: ${{ matrix.makeflags }}
    secrets:
      dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
@@ -102,11 +106,12 @@ jobs:
            image-type: 'core'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
-            makeflags: "--jobs=5 --output-sync=target"
+            makeflags: "--jobs=4 --output-sync=target"
          - build-type: 'sycl_f16'
            platforms: 'linux/amd64'
            tag-latest: 'false'
            base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
+            grpc-base-image: "ubuntu:22.04"
            tag-suffix: 'sycl-f16-ffmpeg-core'
            ffmpeg: 'true'
            image-type: 'core'
@@ -122,4 +127,4 @@ jobs:
            image-type: 'core'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
-            makeflags: "--jobs=5 --output-sync=target"
+            makeflags: "--jobs=4 --output-sync=target"
--- a/.github/workflows/image.yml
+++ b/.github/workflows/image.yml
@@ -26,6 +26,7 @@ jobs:
      platforms: ${{ matrix.platforms }}
      runs-on: ${{ matrix.runs-on }}
      base-image: ${{ matrix.base-image }}
+      grpc-base-image: ${{ matrix.grpc-base-image }}
      aio: ${{ matrix.aio }}
      makeflags: ${{ matrix.makeflags }}
      latest-image: ${{ matrix.latest-image }}
@@ -129,6 +130,7 @@ jobs:
            image-type: 'extras'
            aio: "-aio-gpu-hipblas"
            base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
+            grpc-base-image: "ubuntu:22.04"
            latest-image: 'latest-gpu-hipblas'
            latest-image-aio: 'latest-aio-gpu-hipblas'
            runs-on: 'arc-runner-set'
@@ -140,12 +142,14 @@ jobs:
            ffmpeg: 'false'
            image-type: 'extras'
            base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
+            grpc-base-image: "ubuntu:22.04"
            runs-on: 'arc-runner-set'
            makeflags: "--jobs=3 --output-sync=target"
          - build-type: 'sycl_f16'
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
+            grpc-base-image: "ubuntu:22.04"
            tag-suffix: '-sycl-f16-ffmpeg'
            ffmpeg: 'true'
            image-type: 'extras'
@@ -158,6 +162,7 @@ jobs:
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
+            grpc-base-image: "ubuntu:22.04"
            tag-suffix: '-sycl-f32-ffmpeg'
            ffmpeg: 'true'
            image-type: 'extras'
@@ -171,6 +176,7 @@ jobs:
            platforms: 'linux/amd64'
            tag-latest: 'false'
            base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
+            grpc-base-image: "ubuntu:22.04"
            tag-suffix: '-sycl-f16-core'
            ffmpeg: 'false'
            image-type: 'core'
@@ -180,6 +186,7 @@ jobs:
            platforms: 'linux/amd64'
            tag-latest: 'false'
            base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
+            grpc-base-image: "ubuntu:22.04"
            tag-suffix: '-sycl-f32-core'
            ffmpeg: 'false'
            image-type: 'core'
@@ -189,6 +196,7 @@ jobs:
            platforms: 'linux/amd64'
            tag-latest: 'false'
            base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
+            grpc-base-image: "ubuntu:22.04"
            tag-suffix: '-sycl-f16-ffmpeg-core'
            ffmpeg: 'true'
            image-type: 'core'
@@ -198,6 +206,7 @@ jobs:
            platforms: 'linux/amd64'
            tag-latest: 'false'
            base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
+            grpc-base-image: "ubuntu:22.04"
            tag-suffix: '-sycl-f32-ffmpeg-core'
            ffmpeg: 'true'
            image-type: 'core'
@@ -210,6 +219,7 @@ jobs:
            ffmpeg: 'true'
            image-type: 'core'
            base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
+            grpc-base-image: "ubuntu:22.04"
            runs-on: 'arc-runner-set'
            makeflags: "--jobs=3 --output-sync=target"
          - build-type: 'hipblas'
@@ -219,6 +229,7 @@ jobs:
            ffmpeg: 'false'
            image-type: 'core'
            base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
+            grpc-base-image: "ubuntu:22.04"
            runs-on: 'arc-runner-set'
            makeflags: "--jobs=3 --output-sync=target"
  
@@ -236,6 +247,7 @@ jobs:
      runs-on: ${{ matrix.runs-on }}
      aio: ${{ matrix.aio }}
      base-image: ${{ matrix.base-image }}
+      grpc-base-image: ${{ matrix.grpc-base-image }}
      makeflags: ${{ matrix.makeflags }}
      latest-image: ${{ matrix.latest-image }}
      latest-image-aio: ${{ matrix.latest-image-aio }}
@@ -258,7 +270,7 @@ jobs:
            aio: "-aio-cpu"
            latest-image: 'latest-cpu'
            latest-image-aio: 'latest-aio-cpu'
-            makeflags: "--jobs=5 --output-sync=target"
+            makeflags: "--jobs=4 --output-sync=target"
          - build-type: 'cublas'
            cuda-major-version: "11"
            cuda-minor-version: "7"
@@ -269,7 +281,7 @@ jobs:
            image-type: 'core'
            base-image: "ubuntu:22.04"
            runs-on: 'ubuntu-latest'
-            makeflags: "--jobs=5 --output-sync=target"
+            makeflags: "--jobs=4 --output-sync=target"
          - build-type: 'cublas'
            cuda-major-version: "12"
            cuda-minor-version: "1"
@@ -280,7 +292,7 @@ jobs:
            image-type: 'core'
            base-image: "ubuntu:22.04"
            runs-on: 'ubuntu-latest'
-            makeflags: "--jobs=5 --output-sync=target"
+            makeflags: "--jobs=4 --output-sync=target"
          - build-type: 'cublas'
            cuda-major-version: "11"
            cuda-minor-version: "7"
@@ -291,7 +303,7 @@ jobs:
            image-type: 'core'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
-            makeflags: "--jobs=5 --output-sync=target"
+            makeflags: "--jobs=4 --output-sync=target"
          - build-type: 'cublas'
            cuda-major-version: "12"
            cuda-minor-version: "1"
@@ -302,4 +314,4 @@ jobs:
            image-type: 'core'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
-            makeflags: "--jobs=5 --output-sync=target"
+            makeflags: "--jobs=4 --output-sync=target"
--- a/.github/workflows/image_build.yml
+++ b/.github/workflows/image_build.yml
@@ -6,6 +6,10 @@ on:
    inputs:
      base-image:
        description: 'Base image'
+        required: true
+        type: string
+      grpc-base-image:
+        description: 'GRPC Base image, must be a compatible image with base-image'
        required: false
        default: ''
        type: string
@@ -57,7 +61,7 @@ on:
      makeflags:
        description: 'Make Flags'
        required: false
-        default: '--jobs=3 --output-sync=target'
+        default: '--jobs=4 --output-sync=target'
        type: string
      aio:
        description: 'AIO Image Name'
@@ -201,15 +205,16 @@ jobs:
        uses: docker/build-push-action@v5
        with:
          builder: ${{ steps.buildx.outputs.name }}
+          # The build-args MUST be an EXACT match between the image cache and other workflow steps that want to use that cache.
+          # This means that even the MAKEFLAGS have to be an EXACT match.
+          # If the build-args are not an EXACT match, it will result in a cache miss, which will require GRPC to be built from scratch.
          build-args: |
-            IMAGE_TYPE=${{ inputs.image-type }}
-            BASE_IMAGE=${{ inputs.base-image }}
-            MAKEFLAGS=${{ inputs.makeflags }}
+            GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }}
+            MAKEFLAGS=--jobs=4 --output-sync=target
            GRPC_VERSION=v1.58.0
          context: .
          file: ./Dockerfile
          cache-from: type=gha
-          cache-to: type=gha,ignore-error=true
          target: grpc
          platforms: ${{ inputs.platforms }}
          push: false
--- a/.github/workflows/test-extra.yml
+++ b/.github/workflows/test-extra.yml
@@ -74,6 +74,37 @@ jobs:
           make --jobs=5 --output-sync=target -C backend/python/sentencetransformers
           make --jobs=5 --output-sync=target -C backend/python/sentencetransformers test

+
+  tests-rerankers:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Clone
+        uses: actions/checkout@v4
+        with: 
+          submodules: true
+      - name: Dependencies
+        run: |
+          sudo apt-get update
+          sudo apt-get install build-essential ffmpeg
+          curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
+             sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
+              gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
+             sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
+             sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
+             sudo apt-get update && \
+             sudo apt-get install -y conda
+          sudo apt-get install -y ca-certificates cmake curl patch python3-pip
+          sudo apt-get install -y libopencv-dev
+          pip install --user grpcio-tools
+          
+          sudo rm -rfv /usr/bin/conda || true
+
+      - name: Test rerankers
+        run: |
+           export PATH=$PATH:/opt/conda/bin
+           make --jobs=5 --output-sync=target -C backend/python/rerankers
+           make --jobs=5 --output-sync=target -C backend/python/rerankers test
+
  tests-diffusers:
    runs-on: ubuntu-latest
    steps:
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -121,8 +121,11 @@ jobs:
          PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" make --jobs 5 --output-sync=target test
      - name: Setup tmate session if tests fail
        if: ${{ failure() }}
-        uses: mxschmitt/action-tmate@v3
-        timeout-minutes: 5
+        uses: mxschmitt/action-tmate@v3.18
+        with:
+          detached: true
+          connect-timeout-seconds: 180
+          limit-access-to-actor: true

  tests-aio-container:
    runs-on: ubuntu-latest
@@ -173,8 +176,11 @@ jobs:
            make run-e2e-aio
      - name: Setup tmate session if tests fail
        if: ${{ failure() }}
-        uses: mxschmitt/action-tmate@v3
-        timeout-minutes: 5
+        uses: mxschmitt/action-tmate@v3.18
+        with:
+          detached: true
+          connect-timeout-seconds: 180
+          limit-access-to-actor: true

  tests-apple:
    runs-on: macOS-14
@@ -207,5 +213,8 @@ jobs:
          BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" make --jobs 4 --output-sync=target test
      - name: Setup tmate session if tests fail
        if: ${{ failure() }}
-        uses: mxschmitt/action-tmate@v3
-        timeout-minutes: 5
+        uses: mxschmitt/action-tmate@v3.18
+        with:
+          detached: true
+          connect-timeout-seconds: 180
+          limit-access-to-actor: true
--- a/.github/workflows/update_swagger.yaml
+++ b/.github/workflows/update_swagger.yaml
@@ -0,0 +1,31 @@
+name: Update swagger
+on:
+  schedule:
+    - cron: 0 20 * * *
+  workflow_dispatch:
+jobs:
+  swagger:
+    strategy:
+      fail-fast: false
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-go@v5
+        with:
+          go-version: 'stable'
+      - run: |
+          go install github.com/swaggo/swag/cmd/swag@latest
+      - name: Bump swagger 🔧
+        run: |
+          make swagger
+      - name: Create Pull Request
+        uses: peter-evans/create-pull-request@v6
+        with:
+          token: ${{ secrets.UPDATE_BOT_TOKEN }}
+          push-to-fork: ci-forks/LocalAI
+          commit-message: 'feat(swagger): update swagger'
+          title: 'feat(swagger): update swagger'
+          branch: "update/swagger"
+          body:  Update swagger
+          signoff: true
+
--- a/.github/workflows/yaml-check.yml
+++ b/.github/workflows/yaml-check.yml
@@ -0,0 +1,18 @@
+name: 'Yamllint GitHub Actions'
+on:
+  - pull_request
+jobs:
+  yamllint:
+    name: 'Yamllint'
+    runs-on: ubuntu-latest
+    steps:
+      - name: 'Checkout'
+        uses: actions/checkout@master
+      - name: 'Yamllint'
+        uses: karancode/yamllint-github-action@master
+        with:
+          yamllint_file_or_dir: 'gallery'
+          yamllint_strict: false
+          yamllint_comment: true
+        env:
+          GITHUB_ACCESS_TOKEN: ${{ secrets.GITHUB_TOKEN }}
--- a/.gitignore
+++ b/.gitignore
@@ -44,3 +44,6 @@ prepare
 *.pb.go
 *pb2.py
 *pb2_grpc.py
+
+# SonarQube
+.scannerwork
--- a/29
+++ b/29
@@ -1,8 +1,9 @@
 ARG IMAGE_TYPE=extras
 ARG BASE_IMAGE=ubuntu:22.04
+ARG GRPC_BASE_IMAGE=${BASE_IMAGE}

 # extras or core
-FROM ${BASE_IMAGE} as requirements-core
+FROM ${BASE_IMAGE} AS requirements-core

 USER root

@@ -15,7 +16,7 @@ ARG TARGETVARIANT

 ENV BUILD_TYPE=${BUILD_TYPE}
 ENV DEBIAN_FRONTEND=noninteractive
-ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,petals:/build/backend/python/petals/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"
+ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,petals:/build/backend/python/petals/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"

 ARG GO_TAGS="stablediffusion tinydream tts"

@@ -23,7 +24,7 @@ RUN apt-get update && \
    apt-get install -y ca-certificates curl python3-pip unzip && apt-get clean

 # Install Go
-RUN curl -L -s https://go.dev/dl/go$GO_VERSION.linux-$TARGETARCH.tar.gz | tar -C /usr/local -xz
+RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz
 ENV PATH $PATH:/usr/local/go/bin

 # Install grpc compilers
@@ -79,7 +80,7 @@ RUN test -n "$TARGETARCH" \
 ###################################
 ###################################

-FROM requirements-core as requirements-extras
+FROM requirements-core AS requirements-extras

 RUN apt install -y gpg && \
    curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
@@ -104,7 +105,7 @@ RUN if [ ! -e /usr/bin/python ]; then \
 ###################################
 ###################################

-FROM ${BASE_IMAGE} as grpc
+FROM ${GRPC_BASE_IMAGE} AS grpc

 ARG MAKEFLAGS
 ARG GRPC_VERSION=v1.58.0
@@ -120,16 +121,15 @@ RUN apt-get update && \

 RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shallow-submodules https://github.com/grpc/grpc

-RUN cd grpc && \
-    mkdir -p cmake/build && \
-    cd cmake/build && \
-    cmake -DgRPC_INSTALL=ON -DgRPC_BUILD_TESTS=OFF ../.. && \
+WORKDIR /build/grpc/cmake/build
+
+RUN cmake -DgRPC_INSTALL=ON -DgRPC_BUILD_TESTS=OFF ../.. && \
    make

 ###################################
 ###################################

-FROM requirements-${IMAGE_TYPE} as builder
+FROM requirements-${IMAGE_TYPE} AS builder

 ARG GO_TAGS="stablediffusion tts"
 ARG GRPC_BACKENDS
@@ -167,9 +167,11 @@ RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build

 COPY --from=grpc /build/grpc ./grpc/

-RUN cd /build/grpc/cmake/build && make install
+WORKDIR /build/grpc/cmake/build
+RUN make install

 # Rebuild with defaults backends
+WORKDIR /build
 RUN make build

 RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \
@@ -257,6 +259,9 @@ RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
 RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
    make -C backend/python/sentencetransformers \
    ; fi
+RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
+    make -C backend/python/rerankers \
+    ; fi
 RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
    make -C backend/python/transformers \
    ; fi
@@ -287,7 +292,7 @@ RUN mkdir -p /build/models

 # Define the health check command
 HEALTHCHECK --interval=1m --timeout=10m --retries=10 \
-  CMD curl -f $HEALTHCHECK_ENDPOINT || exit 1
+  CMD curl -f ${HEALTHCHECK_ENDPOINT} || exit 1
  
 VOLUME /build/models
 EXPOSE 8080
--- a/79
+++ b/79
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai

 # llama.cpp versions
 GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=7593639ce335e8d7f89aa9a54d616951f273af60
+CPPLLAMA_VERSION?=784e11dea1f5ce9638851b2b0dddb107e2a609c8

 # gpt4all version
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
@@ -16,7 +16,7 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
 RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6

 # whisper.cpp version
-WHISPER_CPP_VERSION?=b0c3cbf2e851cf232e432b590dcc514a689ec028
+WHISPER_CPP_VERSION?=858452d58dba3acdc3431c9bced2bb8cfd9bf418

 # bert.cpp version
 BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d
@@ -179,20 +179,20 @@ endif
 all: help

 ## BERT embeddings
-sources/go-bert:
-	git clone --recurse-submodules https://github.com/go-skynet/go-bert.cpp sources/go-bert
-	cd sources/go-bert && git checkout -b build $(BERT_VERSION) && git submodule update --init --recursive --depth 1
+sources/go-bert.cpp:
+	git clone --recurse-submodules https://github.com/go-skynet/go-bert.cpp sources/go-bert.cpp
+	cd sources/go-bert.cpp && git checkout -b build $(BERT_VERSION) && git submodule update --init --recursive --depth 1

-sources/go-bert/libgobert.a: sources/go-bert
-	$(MAKE) -C sources/go-bert libgobert.a
+sources/go-bert.cpp/libgobert.a: sources/go-bert.cpp
+	$(MAKE) -C sources/go-bert.cpp libgobert.a

-## go-llama-ggml
-sources/go-llama-ggml:
-	git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp sources/go-llama-ggml
-	cd sources/go-llama-ggml && git checkout -b build $(GOLLAMA_STABLE_VERSION) && git submodule update --init --recursive --depth 1
+## go-llama.cpp
+sources/go-llama.cpp:
+	git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp sources/go-llama.cpp
+	cd sources/go-llama.cpp && git checkout -b build $(GOLLAMA_STABLE_VERSION) && git submodule update --init --recursive --depth 1

-sources/go-llama-ggml/libbinding.a: sources/go-llama-ggml
-	$(MAKE) -C sources/go-llama-ggml BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a
+sources/go-llama.cpp/libbinding.a: sources/go-llama.cpp
+	$(MAKE) -C sources/go-llama.cpp BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a

 ## go-piper
 sources/go-piper:
@@ -211,12 +211,12 @@ sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a: sources/gpt4all
 	$(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ libgpt4all.a

 ## RWKV
-sources/go-rwkv:
-	git clone --recurse-submodules $(RWKV_REPO) sources/go-rwkv
-	cd sources/go-rwkv && git checkout -b build $(RWKV_VERSION) && git submodule update --init --recursive --depth 1
+sources/go-rwkv.cpp:
+	git clone --recurse-submodules $(RWKV_REPO) sources/go-rwkv.cpp
+	cd sources/go-rwkv.cpp && git checkout -b build $(RWKV_VERSION) && git submodule update --init --recursive --depth 1

-sources/go-rwkv/librwkv.a: sources/go-rwkv
-	cd sources/go-rwkv && cd rwkv.cpp &&	cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF &&	cmake --build . && 	cp librwkv.a ..
+sources/go-rwkv.cpp/librwkv.a: sources/go-rwkv.cpp
+	cd sources/go-rwkv.cpp && cd rwkv.cpp &&	cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF &&	cmake --build . && 	cp librwkv.a ..

 ## stable diffusion
 sources/go-stable-diffusion:
@@ -236,23 +236,24 @@ sources/go-tiny-dream/libtinydream.a: sources/go-tiny-dream

 ## whisper
 sources/whisper.cpp:
-	git clone https://github.com/ggerganov/whisper.cpp.git sources/whisper.cpp
+	git clone https://github.com/ggerganov/whisper.cpp sources/whisper.cpp
 	cd sources/whisper.cpp && git checkout -b build $(WHISPER_CPP_VERSION) && git submodule update --init --recursive --depth 1

 sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
 	cd sources/whisper.cpp && make libwhisper.a

-get-sources: sources/go-llama-ggml sources/gpt4all sources/go-piper sources/go-rwkv sources/whisper.cpp sources/go-bert sources/go-stable-diffusion sources/go-tiny-dream
+get-sources: sources/go-llama.cpp sources/gpt4all sources/go-piper sources/go-rwkv.cpp sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream

 replace:
-	$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(CURDIR)/sources/go-rwkv
+	$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(CURDIR)/sources/go-rwkv.cpp
 	$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
 	$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go
-	$(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(CURDIR)/sources/go-bert
+	$(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(CURDIR)/sources/go-bert.cpp
 	$(GOCMD) mod edit -replace github.com/M0Rf30/go-tiny-dream=$(CURDIR)/sources/go-tiny-dream
 	$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper
 	$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(CURDIR)/sources/go-stable-diffusion
 	$(GOCMD) mod edit -replace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang
+	$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama.cpp

 dropreplace:
 	$(GOCMD) mod edit -dropreplace github.com/donomii/go-rwkv.cpp
@@ -271,12 +272,12 @@ prepare-sources: get-sources replace
 ## GENERIC
 rebuild: ## Rebuilds the project
 	$(GOCMD) clean -cache
-	$(MAKE) -C sources/go-llama-ggml clean
+	$(MAKE) -C sources/go-llama.cpp clean
 	$(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ clean
-	$(MAKE) -C sources/go-rwkv clean
+	$(MAKE) -C sources/go-rwkv.cpp clean
 	$(MAKE) -C sources/whisper.cpp clean
 	$(MAKE) -C sources/go-stable-diffusion clean
-	$(MAKE) -C sources/go-bert clean
+	$(MAKE) -C sources/go-bert.cpp clean
 	$(MAKE) -C sources/go-piper clean
 	$(MAKE) -C sources/go-tiny-dream clean
 	$(MAKE) build
@@ -436,10 +437,10 @@ protogen-go-clean:
 	$(RM) bin/*

 .PHONY: protogen-python
-protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama-protogen exllama2-protogen mamba-protogen petals-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen transformers-musicgen-protogen vall-e-x-protogen vllm-protogen
+protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama-protogen exllama2-protogen mamba-protogen petals-protogen rerankers-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen transformers-musicgen-protogen vall-e-x-protogen vllm-protogen

 .PHONY: protogen-python-clean
-protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama-protogen-clean exllama2-protogen-clean mamba-protogen-clean petals-protogen-clean sentencetransformers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean parler-tts-protogen-clean vall-e-x-protogen-clean vllm-protogen-clean
+protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama-protogen-clean exllama2-protogen-clean mamba-protogen-clean petals-protogen-clean sentencetransformers-protogen-clean rerankers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean parler-tts-protogen-clean vall-e-x-protogen-clean vllm-protogen-clean

 .PHONY: autogptq-protogen
 autogptq-protogen:
@@ -505,6 +506,14 @@ petals-protogen:
 petals-protogen-clean:
 	$(MAKE) -C backend/python/petals protogen-clean

+.PHONY: rerankers-protogen
+rerankers-protogen:
+	$(MAKE) -C backend/python/rerankers protogen
+
+.PHONY: rerankers-protogen-clean
+rerankers-protogen-clean:
+	$(MAKE) -C backend/python/rerankers protogen-clean
+
 .PHONY: sentencetransformers-protogen
 sentencetransformers-protogen:
 	$(MAKE) -C backend/python/sentencetransformers protogen
@@ -563,6 +572,7 @@ prepare-extra-conda-environments: protogen-python
 	$(MAKE) -C backend/python/vllm
 	$(MAKE) -C backend/python/mamba
 	$(MAKE) -C backend/python/sentencetransformers
+	$(MAKE) -C backend/python/rerankers
 	$(MAKE) -C backend/python/transformers
 	$(MAKE) -C backend/python/transformers-musicgen
 	$(MAKE) -C backend/python/parler-tts
@@ -598,8 +608,8 @@ backend-assets/gpt4all: sources/gpt4all sources/gpt4all/gpt4all-bindings/golang/
 backend-assets/grpc: protogen-go replace
 	mkdir -p backend-assets/grpc

-backend-assets/grpc/bert-embeddings: sources/go-bert sources/go-bert/libgobert.a backend-assets/grpc
-	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-bert LIBRARY_PATH=$(CURDIR)/sources/go-bert \
+backend-assets/grpc/bert-embeddings: sources/go-bert.cpp sources/go-bert.cpp/libgobert.a backend-assets/grpc
+	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-bert.cpp LIBRARY_PATH=$(CURDIR)/sources/go-bert.cpp \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bert-embeddings ./backend/go/llm/bert/

 backend-assets/grpc/gpt4all: sources/gpt4all sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a backend-assets/gpt4all backend-assets/grpc
@@ -641,17 +651,16 @@ ifeq ($(BUILD_TYPE),metal)
 	cp backend/cpp/llama/llama.cpp/build/bin/default.metallib backend-assets/grpc/
 endif

-backend-assets/grpc/llama-ggml: sources/go-llama-ggml sources/go-llama-ggml/libbinding.a backend-assets/grpc
-	$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama-ggml
-	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama-ggml LIBRARY_PATH=$(CURDIR)/sources/go-llama-ggml \
+backend-assets/grpc/llama-ggml: sources/go-llama.cpp sources/go-llama.cpp/libbinding.a backend-assets/grpc
+	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama.cpp LIBRARY_PATH=$(CURDIR)/sources/go-llama.cpp \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-ggml ./backend/go/llm/llama-ggml/

 backend-assets/grpc/piper: sources/go-piper sources/go-piper/libpiper_binding.a backend-assets/grpc backend-assets/espeak-ng-data
 	CGO_CXXFLAGS="$(PIPER_CGO_CXXFLAGS)" CGO_LDFLAGS="$(PIPER_CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/sources/go-piper \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/piper ./backend/go/tts/

-backend-assets/grpc/rwkv: sources/go-rwkv sources/go-rwkv/librwkv.a backend-assets/grpc
-	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-rwkv LIBRARY_PATH=$(CURDIR)/sources/go-rwkv \
+backend-assets/grpc/rwkv: sources/go-rwkv.cpp sources/go-rwkv.cpp/librwkv.a backend-assets/grpc
+	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-rwkv.cpp LIBRARY_PATH=$(CURDIR)/sources/go-rwkv.cpp \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/rwkv ./backend/go/llm/rwkv

 backend-assets/grpc/stablediffusion: sources/go-stable-diffusion sources/go-stable-diffusion/libstablediffusion.a backend-assets/grpc
@@ -714,4 +723,4 @@ docker-image-intel-xpu:

 .PHONY: swagger
 swagger:
-	swag init -g core/http/api.go --output swagger
+	swag init -g core/http/app.go --output swagger
--- a/README.md
+++ b/README.md
@@ -44,18 +44,19 @@

 [![tests](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[![Build and Release](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[![build container images](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[![Bump dependencies](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[![Artifact Hub](https://img.shields.io/endpoint?url=https://artifacthub.io/badge/repository/localai)](https://artifacthub.io/packages/search?repo=localai)

-**LocalAI** is the free, Open Source OpenAI alternative. LocalAI act as a drop-in replacement REST API that’s compatible with OpenAI (Elevenlabs, Anthropic... ) API specifications for local AI inferencing. It allows you to run LLMs, generate images, audio (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families. Does not require GPU.
+**LocalAI** is the free, Open Source OpenAI alternative. LocalAI act as a drop-in replacement REST API that’s compatible with OpenAI (Elevenlabs, Anthropic... ) API specifications for local AI inferencing. It allows you to run LLMs, generate images, audio (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families. Does not require GPU. It is created and maintained by [Ettore Di Giacinto](https://github.com/mudler).

 ## 🔥🔥 Hot topics / Roadmap

 [Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)

+- Reranker API: https://github.com/mudler/LocalAI/pull/2121
+- Gallery WebUI: https://github.com/mudler/LocalAI/pull/2104
+- llama3: https://github.com/mudler/LocalAI/discussions/2076
 - Parler-TTS: https://github.com/mudler/LocalAI/pull/2027
- Landing page: https://github.com/mudler/LocalAI/pull/1922
 - Openvino support: https://github.com/mudler/LocalAI/pull/1892
 - Vector store: https://github.com/mudler/LocalAI/pull/1795
 - All-in-one container image: https://github.com/mudler/LocalAI/issues/1855
- Parallel function calling: https://github.com/mudler/LocalAI/pull/1726 / Tools API support: https://github.com/mudler/LocalAI/pull/1715

 Hot topics (looking for contributors):
 - Backends v2: https://github.com/mudler/LocalAI/issues/1126
--- a/aio/cpu/rerank.yaml
+++ b/aio/cpu/rerank.yaml
@@ -0,0 +1,27 @@
+name: jina-reranker-v1-base-en
+backend: rerankers
+parameters:
+  model: cross-encoder
+
+usage: |
+    You can test this model with curl like this:
+
+    curl http://localhost:8080/v1/rerank \
+      -H "Content-Type: application/json" \
+      -d '{
+      "model": "jina-reranker-v1-base-en",
+      "query": "Organic skincare products for sensitive skin",
+      "documents": [
+        "Eco-friendly kitchenware for modern homes",
+        "Biodegradable cleaning supplies for eco-conscious consumers",
+        "Organic cotton baby clothes for sensitive skin",
+        "Natural organic skincare range for sensitive skin",
+        "Tech gadgets for smart homes: 2024 edition",
+        "Sustainable gardening tools and compost solutions",
+        "Sensitive skin-friendly facial cleansers and toners",
+        "Organic food wraps and storage solutions",
+        "All-natural pet food for dogs with allergies",
+        "Yoga mats made from recycled materials"
+      ],
+      "top_n": 3
+    }'
--- a/aio/cpu/text-to-text.yaml
+++ b/aio/cpu/text-to-text.yaml
@@ -6,15 +6,22 @@ parameters:
 template:
  chat_message: |
    <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
-    {{- if .FunctionCall }}<tool_call>{{end}}
-    {{- if eq .RoleName "tool" }}<tool_result>{{end }}
-    {{- if .Content}}
-    {{.Content}}
+    {{- if .FunctionCall }}
+    <tool_call>
+    {{- else if eq .RoleName "tool" }}
+    <tool_response>
    {{- end }}
-    {{- if .FunctionCall}}{{toJson .FunctionCall}}{{end }}
-    {{- if .FunctionCall }}</tool_call>{{end }}
-    {{- if eq .RoleName "tool" }}</tool_result>{{end }}
-    <|im_end|>
+    {{- if .Content}}
+    {{.Content }}
+    {{- end }}
+    {{- if .FunctionCall}}
+    {{toJson .FunctionCall}}
+    {{- end }}
+    {{- if .FunctionCall }}
+    </tool_call>
+    {{- else if eq .RoleName "tool" }}
+    </tool_response>
+    {{- end }}<|im_end|>
  # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
  function: |
    <|im_start|>system
@@ -29,8 +36,7 @@ template:
    For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
    <tool_call>
    {'arguments': <args-dict>, 'name': <function-name>}
-    </tool_call>
-    <|im_end|>
+    </tool_call><|im_end|>
    {{.Input -}}
    <|im_start|>assistant
    <tool_call>
--- a/aio/entrypoint.sh
+++ b/aio/entrypoint.sh
@@ -129,7 +129,7 @@ detect_gpu
 detect_gpu_size

 PROFILE="${PROFILE:-$GPU_SIZE}" # default to cpu
-export MODELS="${MODELS:-/aio/${PROFILE}/embeddings.yaml,/aio/${PROFILE}/text-to-speech.yaml,/aio/${PROFILE}/image-gen.yaml,/aio/${PROFILE}/text-to-text.yaml,/aio/${PROFILE}/speech-to-text.yaml,/aio/${PROFILE}/vision.yaml}"
+export MODELS="${MODELS:-/aio/${PROFILE}/embeddings.yaml,/aio/${PROFILE}/rerank.yaml,/aio/${PROFILE}/text-to-speech.yaml,/aio/${PROFILE}/image-gen.yaml,/aio/${PROFILE}/text-to-text.yaml,/aio/${PROFILE}/speech-to-text.yaml,/aio/${PROFILE}/vision.yaml}"

 check_vars

--- a/aio/gpu-8g/rerank.yaml
+++ b/aio/gpu-8g/rerank.yaml
@@ -0,0 +1,27 @@
+name: jina-reranker-v1-base-en
+backend: rerankers
+parameters:
+  model: cross-encoder
+
+usage: |
+    You can test this model with curl like this:
+
+    curl http://localhost:8080/v1/rerank \
+      -H "Content-Type: application/json" \
+      -d '{
+      "model": "jina-reranker-v1-base-en",
+      "query": "Organic skincare products for sensitive skin",
+      "documents": [
+        "Eco-friendly kitchenware for modern homes",
+        "Biodegradable cleaning supplies for eco-conscious consumers",
+        "Organic cotton baby clothes for sensitive skin",
+        "Natural organic skincare range for sensitive skin",
+        "Tech gadgets for smart homes: 2024 edition",
+        "Sustainable gardening tools and compost solutions",
+        "Sensitive skin-friendly facial cleansers and toners",
+        "Organic food wraps and storage solutions",
+        "All-natural pet food for dogs with allergies",
+        "Yoga mats made from recycled materials"
+      ],
+      "top_n": 3
+    }'
--- a/aio/gpu-8g/text-to-text.yaml
+++ b/aio/gpu-8g/text-to-text.yaml
@@ -6,15 +6,22 @@ parameters:
 template:
  chat_message: |
    <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
-    {{- if .FunctionCall }}<tool_call>{{end}}
-    {{- if eq .RoleName "tool" }}<tool_result>{{end }}
-    {{- if .Content}}
-    {{.Content}}
+    {{- if .FunctionCall }}
+    <tool_call>
+    {{- else if eq .RoleName "tool" }}
+    <tool_response>
    {{- end }}
-    {{- if .FunctionCall}}{{toJson .FunctionCall}}{{end }}
-    {{- if .FunctionCall }}</tool_call>{{end }}
-    {{- if eq .RoleName "tool" }}</tool_result>{{end }}
-    <|im_end|>
+    {{- if .Content}}
+    {{.Content }}
+    {{- end }}
+    {{- if .FunctionCall}}
+    {{toJson .FunctionCall}}
+    {{- end }}
+    {{- if .FunctionCall }}
+    </tool_call>
+    {{- else if eq .RoleName "tool" }}
+    </tool_response>
+    {{- end }}<|im_end|>
  # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
  function: |
    <|im_start|>system
@@ -29,8 +36,7 @@ template:
    For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
    <tool_call>
    {'arguments': <args-dict>, 'name': <function-name>}
-    </tool_call>
-    <|im_end|>
+    </tool_call><|im_end|>
    {{.Input -}}
    <|im_start|>assistant
    <tool_call>
--- a/aio/intel/rerank.yaml
+++ b/aio/intel/rerank.yaml
@@ -0,0 +1,27 @@
+name: jina-reranker-v1-base-en
+backend: rerankers
+parameters:
+  model: cross-encoder
+
+usage: |
+    You can test this model with curl like this:
+
+    curl http://localhost:8080/v1/rerank \
+      -H "Content-Type: application/json" \
+      -d '{
+      "model": "jina-reranker-v1-base-en",
+      "query": "Organic skincare products for sensitive skin",
+      "documents": [
+        "Eco-friendly kitchenware for modern homes",
+        "Biodegradable cleaning supplies for eco-conscious consumers",
+        "Organic cotton baby clothes for sensitive skin",
+        "Natural organic skincare range for sensitive skin",
+        "Tech gadgets for smart homes: 2024 edition",
+        "Sustainable gardening tools and compost solutions",
+        "Sensitive skin-friendly facial cleansers and toners",
+        "Organic food wraps and storage solutions",
+        "All-natural pet food for dogs with allergies",
+        "Yoga mats made from recycled materials"
+      ],
+      "top_n": 3
+    }'
--- a/aio/intel/text-to-text.yaml
+++ b/aio/intel/text-to-text.yaml
@@ -7,15 +7,22 @@ parameters:
 template:
  chat_message: |
    <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
-    {{- if .FunctionCall }}<tool_call>{{end}}
-    {{- if eq .RoleName "tool" }}<tool_result>{{end }}
-    {{- if .Content}}
-    {{.Content}}
+    {{- if .FunctionCall }}
+    <tool_call>
+    {{- else if eq .RoleName "tool" }}
+    <tool_response>
    {{- end }}
-    {{- if .FunctionCall}}{{toJson .FunctionCall}}{{end }}
-    {{- if .FunctionCall }}</tool_call>{{end }}
-    {{- if eq .RoleName "tool" }}</tool_result>{{end }}
-    <|im_end|>
+    {{- if .Content}}
+    {{.Content }}
+    {{- end }}
+    {{- if .FunctionCall}}
+    {{toJson .FunctionCall}}
+    {{- end }}
+    {{- if .FunctionCall }}
+    </tool_call>
+    {{- else if eq .RoleName "tool" }}
+    </tool_response>
+    {{- end }}<|im_end|>
  # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
  function: |
    <|im_start|>system
@@ -30,8 +37,7 @@ template:
    For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
    <tool_call>
    {'arguments': <args-dict>, 'name': <function-name>}
-    </tool_call>
-    <|im_end|>
+    </tool_call><|im_end|>
    {{.Input -}}
    <|im_start|>assistant
    <tool_call>
--- a/backend/backend.proto
+++ b/backend/backend.proto
@@ -23,6 +23,30 @@ service Backend {
  rpc StoresDelete(StoresDeleteOptions) returns (Result) {}
  rpc StoresGet(StoresGetOptions) returns (StoresGetResult) {}
  rpc StoresFind(StoresFindOptions) returns (StoresFindResult) {}
+
+  rpc Rerank(RerankRequest) returns (RerankResult) {}
+}
+
+message RerankRequest {
+  string query = 1;
+  repeated string documents = 2;
+  int32 top_n = 3;
+}
+
+message RerankResult {
+  Usage usage = 1;
+  repeated DocumentResult results = 2;
+}
+
+message Usage {
+  int32 total_tokens = 1;
+  int32 prompt_tokens = 2;
+}
+
+message DocumentResult {
+  int32 index = 1;
+  string text = 2;
+  float relevance_score = 3;
 }

 message StoresKey {
@@ -177,6 +201,7 @@ message ModelOptions {
  bool   EnforceEager = 52;
  int32  SwapSpace = 53;
  int32  MaxModelLen = 54;
+  int32  TensorParallelSize = 55;

  string MMProj = 41;

--- a/backend/python/common-env/transformers/transformers-nvidia.yml
+++ b/backend/python/common-env/transformers/transformers-nvidia.yml
@@ -120,4 +120,6 @@ dependencies:
      - transformers>=4.38.2  # Updated Version
      - transformers_stream_generator==0.0.5
      - xformers==0.0.23.post1  
+      - rerankers[transformers]
+      - pydantic
 prefix: /opt/conda/envs/transformers
--- a/backend/python/common-env/transformers/transformers-rocm.yml
+++ b/backend/python/common-env/transformers/transformers-rocm.yml
@@ -108,4 +108,6 @@ dependencies:
      - transformers>=4.38.2  # Updated Version
      - transformers_stream_generator==0.0.5
      - xformers==0.0.23.post1
+      - rerankers[transformers]
+      - pydantic
 prefix: /opt/conda/envs/transformers
--- a/backend/python/common-env/transformers/transformers.yml
+++ b/backend/python/common-env/transformers/transformers.yml
@@ -111,5 +111,7 @@ dependencies:
      - vllm>=0.4.0
      - transformers>=4.38.2  # Updated Version
      - transformers_stream_generator==0.0.5
-      - xformers==0.0.23.post1  
+      - xformers==0.0.23.post1
+      - rerankers[transformers]
+      - pydantic
 prefix: /opt/conda/envs/transformers
--- a/backend/python/diffusers/diffusers-rocm.yml
+++ b/backend/python/diffusers/diffusers-rocm.yml
@@ -61,4 +61,5 @@ dependencies:
      - urllib3==2.0.6
      - zipp==3.17.0
      - torch
+      - opencv-python
 prefix: /opt/conda/envs/diffusers
--- a/backend/python/diffusers/diffusers.yml
+++ b/backend/python/diffusers/diffusers.yml
@@ -71,4 +71,5 @@ dependencies:
      - typing-extensions==4.8.0
      - urllib3==2.0.6
      - zipp==3.17.0
+      - opencv-python
 prefix: /opt/conda/envs/diffusers
--- a/backend/python/rerankers/Makefile
+++ b/backend/python/rerankers/Makefile
@@ -0,0 +1,27 @@
+.PHONY: rerankers
+rerankers: protogen
+	$(MAKE) -C ../common-env/transformers
+
+
+.PHONY: run
+run: protogen
+	@echo "Running rerankers..."
+	bash run.sh
+	@echo "rerankers run."
+
+# It is not working well by using command line. It only6 works with IDE like VSCode.
+.PHONY: test
+test: protogen
+	@echo "Testing rerankers..."
+	bash test.sh
+	@echo "rerankers tested."
+
+.PHONY: protogen
+protogen: backend_pb2_grpc.py backend_pb2.py
+
+.PHONY: protogen-clean
+protogen-clean:
+	$(RM) backend_pb2_grpc.py backend_pb2.py
+
+backend_pb2_grpc.py backend_pb2.py:
+	python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
--- a/backend/python/rerankers/README.md
+++ b/backend/python/rerankers/README.md
@@ -0,0 +1,5 @@
+# Creating a separate environment for the reranker project
+
+```
+make reranker
+```
--- a/backend/python/rerankers/reranker.py
+++ b/backend/python/rerankers/reranker.py
@@ -0,0 +1,123 @@
+#!/usr/bin/env python3
+"""
+Extra gRPC server for Rerankers models.
+"""
+from concurrent import futures
+
+import argparse
+import signal
+import sys
+import os
+
+import time
+import backend_pb2
+import backend_pb2_grpc
+
+import grpc
+
+from rerankers import Reranker
+
+_ONE_DAY_IN_SECONDS = 60 * 60 * 24
+
+# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
+MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
+
+# Implement the BackendServicer class with the service methods
+class BackendServicer(backend_pb2_grpc.BackendServicer):
+    """
+    A gRPC servicer for the backend service.
+
+    This class implements the gRPC methods for the backend service, including Health, LoadModel, and Embedding.
+    """
+    def Health(self, request, context):
+        """
+        A gRPC method that returns the health status of the backend service.
+
+        Args:
+            request: A HealthRequest object that contains the request parameters.
+            context: A grpc.ServicerContext object that provides information about the RPC.
+
+        Returns:
+            A Reply object that contains the health status of the backend service.
+        """
+        return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
+
+    def LoadModel(self, request, context):
+        """
+        A gRPC method that loads a model into memory.
+
+        Args:
+            request: A LoadModelRequest object that contains the request parameters.
+            context: A grpc.ServicerContext object that provides information about the RPC.
+
+        Returns:
+            A Result object that contains the result of the LoadModel operation.
+        """
+        model_name = request.Model
+        try:
+            kwargs = {}
+            if request.Type != "":
+                kwargs['model_type'] = request.Type
+            if request.PipelineType != "": # Reuse the PipelineType field for language
+                kwargs['lang'] = request.PipelineType
+            self.model_name = model_name
+            self.model = Reranker(model_name, **kwargs)  
+        except Exception as err:
+            return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
+
+        # Implement your logic here for the LoadModel service
+        # Replace this with your desired response
+        return backend_pb2.Result(message="Model loaded successfully", success=True)
+
+    def Rerank(self, request, context):
+        documents = []
+        for idx, doc in enumerate(request.documents):
+            documents.append(doc)
+        ranked_results=self.model.rank(query=request.query, docs=documents, doc_ids=list(range(len(request.documents))))
+        # Prepare results to return
+        results = [
+            backend_pb2.DocumentResult(
+                index=res.doc_id,
+                text=res.text,
+                relevance_score=res.score
+            ) for res in ranked_results.results
+        ]
+
+        # Calculate the usage and total tokens
+        # TODO: Implement the usage calculation with reranker
+        total_tokens = sum(len(doc.split()) for doc in request.documents) + len(request.query.split())
+        prompt_tokens = len(request.query.split())
+        usage = backend_pb2.Usage(total_tokens=total_tokens, prompt_tokens=prompt_tokens)
+        return backend_pb2.RerankResult(usage=usage, results=results)
+
+def serve(address):
+    server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
+    backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
+    server.add_insecure_port(address)
+    server.start()
+    print("Server started. Listening on: " + address, file=sys.stderr)
+
+    # Define the signal handler function
+    def signal_handler(sig, frame):
+        print("Received termination signal. Shutting down...")
+        server.stop(0)
+        sys.exit(0)
+
+    # Set the signal handlers for SIGINT and SIGTERM
+    signal.signal(signal.SIGINT, signal_handler)
+    signal.signal(signal.SIGTERM, signal_handler)
+
+    try:
+        while True:
+            time.sleep(_ONE_DAY_IN_SECONDS)
+    except KeyboardInterrupt:
+        server.stop(0)
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Run the gRPC server.")
+    parser.add_argument(
+        "--addr", default="localhost:50051", help="The address to bind the server to."
+    )
+    args = parser.parse_args()
+
+    serve(args.addr)
--- a/backend/python/rerankers/run.sh
+++ b/backend/python/rerankers/run.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+
+##
+## A bash script wrapper that runs the reranker server with conda
+
+export PATH=$PATH:/opt/conda/bin
+
+# Activate conda environment
+source activate transformers
+
+# get the directory where the bash script is located
+DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+
+python $DIR/reranker.py $@
--- a/backend/python/rerankers/test.sh
+++ b/backend/python/rerankers/test.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+##
+## A bash script wrapper that runs the reranker server with conda
+
+# Activate conda environment
+source activate transformers
+
+# get the directory where the bash script is located
+DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+
+python -m unittest $DIR/test_reranker.py
--- a/backend/python/rerankers/test_reranker.py
+++ b/backend/python/rerankers/test_reranker.py
@@ -0,0 +1,90 @@
+"""
+A test script to test the gRPC service
+"""
+import unittest
+import subprocess
+import time
+import backend_pb2
+import backend_pb2_grpc
+
+import grpc
+
+
+class TestBackendServicer(unittest.TestCase):
+    """
+    TestBackendServicer is the class that tests the gRPC service
+    """
+    def setUp(self):
+        """
+        This method sets up the gRPC service by starting the server
+        """
+        self.service = subprocess.Popen(["python3", "reranker.py", "--addr", "localhost:50051"])
+        time.sleep(10)
+
+    def tearDown(self) -> None:
+        """
+        This method tears down the gRPC service by terminating the server
+        """
+        self.service.kill()
+        self.service.wait()
+
+    def test_server_startup(self):
+        """
+        This method tests if the server starts up successfully
+        """
+        try:
+            self.setUp()
+            with grpc.insecure_channel("localhost:50051") as channel:
+                stub = backend_pb2_grpc.BackendStub(channel)
+                response = stub.Health(backend_pb2.HealthMessage())
+                self.assertEqual(response.message, b'OK')
+        except Exception as err:
+            print(err)
+            self.fail("Server failed to start")
+        finally:
+            self.tearDown()
+
+    def test_load_model(self):
+        """
+        This method tests if the model is loaded successfully
+        """
+        try:
+            self.setUp()
+            with grpc.insecure_channel("localhost:50051") as channel:
+                stub = backend_pb2_grpc.BackendStub(channel)
+                response = stub.LoadModel(backend_pb2.ModelOptions(Model="cross-encoder"))
+                self.assertTrue(response.success)
+                self.assertEqual(response.message, "Model loaded successfully")
+        except Exception as err:
+            print(err)
+            self.fail("LoadModel service failed")
+        finally:
+            self.tearDown()
+
+    def test_rerank(self):
+        """
+        This method tests if the embeddings are generated successfully
+        """
+        try:
+            self.setUp()
+            with grpc.insecure_channel("localhost:50051") as channel:
+                stub = backend_pb2_grpc.BackendStub(channel)
+                request = backend_pb2.RerankRequest(
+                    query="I love you",
+                    documents=["I hate you", "I really like you"],
+                    top_n=2
+                )
+                response = stub.LoadModel(backend_pb2.ModelOptions(Model="cross-encoder"))
+                self.assertTrue(response.success)
+               
+                rerank_response = stub.Rerank(request)
+                print(rerank_response.results[0])
+                self.assertIsNotNone(rerank_response.results)
+                self.assertEqual(len(rerank_response.results), 2)
+                self.assertEqual(rerank_response.results[0].text, "I really like you")
+                self.assertEqual(rerank_response.results[1].text, "I hate you")
+        except Exception as err:
+            print(err)
+            self.fail("Reranker service failed")
+        finally:
+            self.tearDown()
--- a/backend/python/transformers/transformers_server.py
+++ b/backend/python/transformers/transformers_server.py
@@ -148,7 +148,9 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
                else:
                    device_map="CPU"
                self.model = OVModelForCausalLM.from_pretrained(model_name, 
-                                                                compile=True, 
+                                                                compile=True,
+                                                                trust_remote_code=request.TrustRemoteCode,
+                                                                ov_config={"PERFORMANCE_HINT": "LATENCY"}, 
                                                                device=device_map)
                self.OV = True
            else:
@@ -158,6 +160,11 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
                                                       quantization_config=quantization, 
                                                       device_map=device_map, 
                                                       torch_dtype=compute)
+            if request.ContextSize > 0:
+                self.max_tokens = request.ContextSize
+            else:
+                self.max_tokens = self.model.config.max_position_embeddings
+ 
            self.tokenizer = AutoTokenizer.from_pretrained(model_name, use_safetensors=True)
            self.XPU = False

@@ -212,12 +219,27 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
        set_seed(request.Seed)
        if request.TopP == 0:
            request.TopP = 0.9
+        
+        if request.TopK == 0:
+            request.TopK = 40
+
+        prompt = request.Prompt
+        if not request.Prompt and request.UseTokenizerTemplate and request.Messages:    
+            prompt = self.tokenizer.apply_chat_template(request.Messages, tokenize=False, add_generation_prompt=True)
+
+        eos_token_id = self.tokenizer.eos_token_id
+        if request.StopPrompts:
+            eos_token_id = []
+            for word in request.StopPrompts:
+                eos_token_id.append(self.tokenizer.convert_tokens_to_ids(word))
+
+        inputs = self.tokenizer(prompt, return_tensors="pt")

-        max_tokens = 200
        if request.Tokens > 0:
            max_tokens = request.Tokens
+        else:
+            max_tokens = self.max_tokens - inputs["input_ids"].size()[inputs["input_ids"].dim()-1]

-        inputs = self.tokenizer(request.Prompt, return_tensors="pt")
        if self.CUDA:
            inputs = inputs.to("cuda")
        if XPU and self.OV == False:
@@ -235,7 +257,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
                        top_k=request.TopK, 
                        do_sample=True,
                        attention_mask=inputs["attention_mask"],
-                        eos_token_id=self.tokenizer.eos_token_id,
+                        eos_token_id=eos_token_id,
                        pad_token_id=self.tokenizer.eos_token_id,
                        streamer=streamer)
            thread=Thread(target=self.model.generate, kwargs=config)
@@ -264,7 +286,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
                        top_k=request.TopK, 
                        do_sample=True,
                        attention_mask=inputs["attention_mask"],
-                        eos_token_id=self.tokenizer.eos_token_id,
+                        eos_token_id=eos_token_id,
                        pad_token_id=self.tokenizer.eos_token_id)
            generated_text = self.tokenizer.batch_decode(outputs[:, inputs["input_ids"].shape[1]:], skip_special_tokens=True)[0]

--- a/backend/python/vllm/backend_vllm.py
+++ b/backend/python/vllm/backend_vllm.py
@@ -95,6 +95,8 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
            engine_args.trust_remote_code = request.TrustRemoteCode
        if request.EnforceEager:
            engine_args.enforce_eager = request.EnforceEager
+        if request.TensorParallelSize:
+            engine_args.tensor_parallel_size = request.TensorParallelSize
        if request.SwapSpace != 0:
            engine_args.swap_space = request.SwapSpace
        if request.MaxModelLen != 0:
--- a/core/backend/options.go
+++ b/core/backend/options.go
@@ -74,6 +74,7 @@ func gRPCModelOpts(c config.BackendConfig) *pb.ModelOptions {
 		EnforceEager:         c.EnforceEager,
 		SwapSpace:            int32(c.SwapSpace),
 		MaxModelLen:          int32(c.MaxModelLen),
+		TensorParallelSize:   int32(c.TensorParallelSize),
 		MMProj:               c.MMProj,
 		YarnExtFactor:        c.YarnExtFactor,
 		YarnAttnFactor:       c.YarnAttnFactor,
--- a/core/backend/rerank.go
+++ b/core/backend/rerank.go
@@ -0,0 +1,39 @@
+package backend
+
+import (
+	"context"
+	"fmt"
+
+	"github.com/go-skynet/LocalAI/core/config"
+	"github.com/go-skynet/LocalAI/pkg/grpc/proto"
+	model "github.com/go-skynet/LocalAI/pkg/model"
+)
+
+func Rerank(backend, modelFile string, request *proto.RerankRequest, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig config.BackendConfig) (*proto.RerankResult, error) {
+	bb := backend
+	if bb == "" {
+		return nil, fmt.Errorf("backend is required")
+	}
+
+	grpcOpts := gRPCModelOpts(backendConfig)
+
+	opts := modelOpts(config.BackendConfig{}, appConfig, []model.Option{
+		model.WithBackendString(bb),
+		model.WithModel(modelFile),
+		model.WithContext(appConfig.Context),
+		model.WithAssetDir(appConfig.AssetsDestination),
+		model.WithLoadGRPCLoadModelOpts(grpcOpts),
+	})
+	rerankModel, err := loader.BackendLoader(opts...)
+	if err != nil {
+		return nil, err
+	}
+
+	if rerankModel == nil {
+		return nil, fmt.Errorf("could not load rerank model")
+	}
+
+	res, err := rerankModel.Rerank(context.Background(), request)
+
+	return res, err
+}
--- a/core/cli/cli.go
+++ b/core/cli/cli.go
@@ -4,7 +4,7 @@ import "embed"

 type Context struct {
 	Debug    bool    `env:"LOCALAI_DEBUG,DEBUG" default:"false" hidden:"" help:"DEPRECATED, use --log-level=debug instead. Enable debug logging"`
-	LogLevel *string `env:"LOCALAI_LOG_LEVEL" enum:"error,warn,info,debug" help:"Set the level of logs to output [${enum}]"`
+	LogLevel *string `env:"LOCALAI_LOG_LEVEL" enum:"error,warn,info,debug,trace" help:"Set the level of logs to output [${enum}]"`

 	// This field is not a command line argument/flag, the struct tag excludes it from the parsed CLI
 	BackendAssets embed.FS `kong:"-"`
--- a/core/cli/models.go
+++ b/core/cli/models.go
@@ -25,7 +25,7 @@ type ModelsInstall struct {
 }

 type ModelsCMD struct {
-	List    ModelsList    `cmd:"" help:"List the models avaiable in your galleries" default:"withargs"`
+	List    ModelsList    `cmd:"" help:"List the models available in your galleries" default:"withargs"`
 	Install ModelsInstall `cmd:"" help:"Install a model from the gallery"`
 }

--- a/core/cli/run.go
+++ b/core/cli/run.go
@@ -2,30 +2,31 @@ package cli

 import (
 	"fmt"
-	"os"
 	"strings"
 	"time"

 	"github.com/go-skynet/LocalAI/core/config"
 	"github.com/go-skynet/LocalAI/core/http"
 	"github.com/go-skynet/LocalAI/core/startup"
+	"github.com/rs/zerolog"
 	"github.com/rs/zerolog/log"
 )

 type RunCMD struct {
 	ModelArgs []string `arg:"" optional:"" name:"models" help:"Model configuration URLs to load"`

-	ModelsPath        string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
-	BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"`
-	ImagePath         string `env:"LOCALAI_IMAGE_PATH,IMAGE_PATH" type:"path" default:"/tmp/generated/images" help:"Location for images generated by backends (e.g. stablediffusion)" group:"storage"`
-	AudioPath         string `env:"LOCALAI_AUDIO_PATH,AUDIO_PATH" type:"path" default:"/tmp/generated/audio" help:"Location for audio generated by backends (e.g. piper)" group:"storage"`
-	UploadPath        string `env:"LOCALAI_UPLOAD_PATH,UPLOAD_PATH" type:"path" default:"/tmp/localai/upload" help:"Path to store uploads from files api" group:"storage"`
-	ConfigPath        string `env:"LOCALAI_CONFIG_PATH,CONFIG_PATH" default:"/tmp/localai/config" group:"storage"`
-	LocalaiConfigDir  string `env:"LOCALAI_CONFIG_DIR" type:"path" default:"${basepath}/configuration" help:"Directory for dynamic loading of certain configuration files (currently api_keys.json and external_backends.json)" group:"storage"`
+	ModelsPath                   string        `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
+	BackendAssetsPath            string        `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"`
+	ImagePath                    string        `env:"LOCALAI_IMAGE_PATH,IMAGE_PATH" type:"path" default:"/tmp/generated/images" help:"Location for images generated by backends (e.g. stablediffusion)" group:"storage"`
+	AudioPath                    string        `env:"LOCALAI_AUDIO_PATH,AUDIO_PATH" type:"path" default:"/tmp/generated/audio" help:"Location for audio generated by backends (e.g. piper)" group:"storage"`
+	UploadPath                   string        `env:"LOCALAI_UPLOAD_PATH,UPLOAD_PATH" type:"path" default:"/tmp/localai/upload" help:"Path to store uploads from files api" group:"storage"`
+	ConfigPath                   string        `env:"LOCALAI_CONFIG_PATH,CONFIG_PATH" default:"/tmp/localai/config" group:"storage"`
+	LocalaiConfigDir             string        `env:"LOCALAI_CONFIG_DIR" type:"path" default:"${basepath}/configuration" help:"Directory for dynamic loading of certain configuration files (currently api_keys.json and external_backends.json)" group:"storage"`
+	LocalaiConfigDirPollInterval time.Duration `env:"LOCALAI_CONFIG_DIR_POLL_INTERVAL" help:"Typically the config path picks up changes automatically, but if your system has broken fsnotify events, set this to an interval to poll the LocalAI Config Dir (example: 1m)" group:"storage"`
 	// The alias on this option is there to preserve functionality with the old `--config-file` parameter
 	ModelsConfigFile string `env:"LOCALAI_MODELS_CONFIG_FILE,CONFIG_FILE" aliases:"config-file" help:"YAML file containing a list of model backend configs" group:"storage"`

-	Galleries           string   `env:"LOCALAI_GALLERIES,GALLERIES" help:"JSON list of galleries" group:"models"`
+	Galleries           string   `env:"LOCALAI_GALLERIES,GALLERIES" help:"JSON list of galleries" group:"models" default:"${galleries}"`
 	AutoloadGalleries   bool     `env:"LOCALAI_AUTOLOAD_GALLERIES,AUTOLOAD_GALLERIES" group:"models"`
 	RemoteLibrary       string   `env:"LOCALAI_REMOTE_LIBRARY,REMOTE_LIBRARY" default:"${remoteLibraryURL}" help:"A LocalAI remote library URL" group:"models"`
 	PreloadModels       string   `env:"LOCALAI_PRELOAD_MODELS,PRELOAD_MODELS" help:"A List of models to apply in JSON at start" group:"models"`
@@ -60,15 +61,16 @@ func (r *RunCMD) Run(ctx *Context) error {
 		config.WithYAMLConfigPreload(r.PreloadModelsConfig),
 		config.WithModelPath(r.ModelsPath),
 		config.WithContextSize(r.ContextSize),
-		config.WithDebug(*ctx.LogLevel == "debug"),
+		config.WithDebug(zerolog.GlobalLevel() <= zerolog.DebugLevel),
 		config.WithImageDir(r.ImagePath),
 		config.WithAudioDir(r.AudioPath),
 		config.WithUploadDir(r.UploadPath),
 		config.WithConfigsDir(r.ConfigPath),
+		config.WithDynamicConfigDir(r.LocalaiConfigDir),
+		config.WithDynamicConfigDirPollInterval(r.LocalaiConfigDirPollInterval),
 		config.WithF16(r.F16),
 		config.WithStringGalleries(r.Galleries),
 		config.WithModelLibraryURL(r.RemoteLibrary),
-		config.WithDisableMessage(false),
 		config.WithCors(r.CORS),
 		config.WithCorsAllowOrigins(r.CORSAllowOrigins),
 		config.WithThreads(r.Threads),
@@ -129,22 +131,10 @@ func (r *RunCMD) Run(ctx *Context) error {
 	}

 	cl, ml, options, err := startup.Startup(opts...)
-
 	if err != nil {
 		return fmt.Errorf("failed basic startup tasks with error %s", err.Error())
 	}

-	// Watch the configuration directory
-	// If the directory does not exist, we don't watch it
-	if _, err := os.Stat(r.LocalaiConfigDir); err == nil {
-		closeConfigWatcherFn, err := startup.WatchConfigDirectory(r.LocalaiConfigDir, options)
-		defer closeConfigWatcherFn()
-
-		if err != nil {
-			return fmt.Errorf("failed while watching configuration directory %s", r.LocalaiConfigDir)
-		}
-	}
-
 	appHTTP, err := http.App(cl, ml, options)
 	if err != nil {
 		log.Error().Err(err).Msg("error during HTTP App construction")
--- a/core/config/application_config.go
+++ b/core/config/application_config.go
@@ -17,11 +17,13 @@ type ApplicationConfig struct {
 	UploadLimitMB, Threads, ContextSize int
 	DisableWelcomePage                  bool
 	F16                                 bool
-	Debug, DisableMessage               bool
+	Debug                               bool
 	ImageDir                            string
 	AudioDir                            string
 	UploadDir                           string
 	ConfigsDir                          string
+	DynamicConfigsDir                   string
+	DynamicConfigsDirPollInterval       time.Duration
 	CORS                                bool
 	PreloadJSONModels                   string
 	PreloadModelsFromPath               string
@@ -55,12 +57,11 @@ type AppOption func(*ApplicationConfig)

 func NewApplicationConfig(o ...AppOption) *ApplicationConfig {
 	opt := &ApplicationConfig{
-		Context:        context.Background(),
-		UploadLimitMB:  15,
-		Threads:        1,
-		ContextSize:    512,
-		Debug:          true,
-		DisableMessage: true,
+		Context:       context.Background(),
+		UploadLimitMB: 15,
+		Threads:       1,
+		ContextSize:   512,
+		Debug:         true,
 	}
 	for _, oo := range o {
 		oo(opt)
@@ -234,12 +235,6 @@ func WithDebug(debug bool) AppOption {
 	}
 }

-func WithDisableMessage(disableMessage bool) AppOption {
-	return func(o *ApplicationConfig) {
-		o.DisableMessage = disableMessage
-	}
-}
-
 func WithAudioDir(audioDir string) AppOption {
 	return func(o *ApplicationConfig) {
 		o.AudioDir = audioDir
@@ -264,6 +259,18 @@ func WithConfigsDir(configsDir string) AppOption {
 	}
 }

+func WithDynamicConfigDir(dynamicConfigsDir string) AppOption {
+	return func(o *ApplicationConfig) {
+		o.DynamicConfigsDir = dynamicConfigsDir
+	}
+}
+
+func WithDynamicConfigDirPollInterval(interval time.Duration) AppOption {
+	return func(o *ApplicationConfig) {
+		o.DynamicConfigsDirPollInterval = interval
+	}
+}
+
 func WithApiKeys(apiKeys []string) AppOption {
 	return func(o *ApplicationConfig) {
 		o.ApiKeys = apiKeys
--- a/core/config/backend_config.go
+++ b/core/config/backend_config.go
@@ -12,6 +12,7 @@ import (

 	"github.com/go-skynet/LocalAI/core/schema"
 	"github.com/go-skynet/LocalAI/pkg/downloader"
+	"github.com/go-skynet/LocalAI/pkg/functions"
 	"github.com/go-skynet/LocalAI/pkg/utils"
 	"github.com/rs/zerolog/log"
 	"gopkg.in/yaml.v3"
@@ -39,7 +40,7 @@ type BackendConfig struct {
 	InputToken                                 [][]int  `yaml:"-"`
 	functionCallString, functionCallNameString string   `yaml:"-"`

-	FunctionsConfig Functions `yaml:"function"`
+	FunctionsConfig functions.FunctionsConfig `yaml:"function"`

 	FeatureFlag FeatureFlag `yaml:"feature_flags"` // Feature Flag registry. We move fast, and features may break on a per model/backend basis. Registry for (usually temporary) flags that indicate aborting something early.
 	// LLM configs (GPT4ALL, Llama.cpp, ...)
@@ -139,6 +140,7 @@ type LLMConfig struct {
 	EnforceEager         bool    `yaml:"enforce_eager"`          // vLLM
 	SwapSpace            int     `yaml:"swap_space"`             // vLLM
 	MaxModelLen          int     `yaml:"max_model_len"`          // vLLM
+	TensorParallelSize   int     `yaml:"tensor_parallel_size"`          // vLLM
 	MMProj               string  `yaml:"mmproj"`

 	RopeScaling string `yaml:"rope_scaling"`
@@ -157,13 +159,6 @@ type AutoGPTQ struct {
 	UseFastTokenizer bool   `yaml:"use_fast_tokenizer"`
 }

-type Functions struct {
-	DisableNoAction         bool   `yaml:"disable_no_action"`
-	NoActionFunctionName    string `yaml:"no_action_function_name"`
-	NoActionDescriptionName string `yaml:"no_action_description_name"`
-	ParallelCalls           bool   `yaml:"parallel_calls"`
-}
-
 type TemplateConfig struct {
 	Chat                 string `yaml:"chat"`
 	ChatMessage          string `yaml:"chat_message"`
@@ -210,15 +205,15 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) {
 	defaultTopP := 0.95
 	defaultTopK := 40
 	defaultTemp := 0.9
-	defaultMaxTokens := 2048
 	defaultMirostat := 2
 	defaultMirostatTAU := 5.0
 	defaultMirostatETA := 0.1
 	defaultTypicalP := 1.0
 	defaultTFZ := 1.0
+	defaultZero := 0

 	// Try to offload all GPU layers (if GPU is found)
-	defaultNGPULayers := 99999999
+	defaultHigh := 99999999

 	trueV := true
 	falseV := false
@@ -259,7 +254,7 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) {
 	}

 	if cfg.Maxtokens == nil {
-		cfg.Maxtokens = &defaultMaxTokens
+		cfg.Maxtokens = &defaultZero
 	}

 	if cfg.Mirostat == nil {
@@ -274,7 +269,7 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) {
 		cfg.MirostatTAU = &defaultMirostatTAU
 	}
 	if cfg.NGPULayers == nil {
-		cfg.NGPULayers = &defaultNGPULayers
+		cfg.NGPULayers = &defaultHigh
 	}

 	if cfg.LowVRAM == nil {
@@ -517,7 +512,7 @@ func (cl *BackendConfigLoader) Preload(modelPath string) error {
 	for i, config := range cl.configs {

 		// Download files and verify their SHA
-		for _, file := range config.DownloadFiles {
+		for i, file := range config.DownloadFiles {
 			log.Debug().Msgf("Checking %q exists and matches SHA", file.Filename)

 			if err := utils.VerifyPath(file.Filename, modelPath); err != nil {
@@ -526,7 +521,7 @@ func (cl *BackendConfigLoader) Preload(modelPath string) error {
 			// Create file path
 			filePath := filepath.Join(modelPath, file.Filename)

-			if err := downloader.DownloadFile(file.URI, filePath, file.SHA256, status); err != nil {
+			if err := downloader.DownloadFile(file.URI, filePath, file.SHA256, i, len(config.DownloadFiles), status); err != nil {
 				return err
 			}
 		}
@@ -540,7 +535,7 @@ func (cl *BackendConfigLoader) Preload(modelPath string) error {

 			// check if file exists
 			if _, err := os.Stat(filepath.Join(modelPath, md5Name)); errors.Is(err, os.ErrNotExist) {
-				err := downloader.DownloadFile(modelURL, filepath.Join(modelPath, md5Name), "", status)
+				err := downloader.DownloadFile(modelURL, filepath.Join(modelPath, md5Name), "", 0, 0, status)
 				if err != nil {
 					return err
 				}
--- a/core/http/api.go
+++ b/core/http/api.go
@@ -1,309 +0,0 @@
-package http
-
-import (
-	"encoding/json"
-	"errors"
-	"os"
-	"strings"
-
-	"github.com/go-skynet/LocalAI/pkg/utils"
-	"github.com/gofiber/swagger" // swagger handler
-
-	"github.com/go-skynet/LocalAI/core/http/endpoints/elevenlabs"
-	"github.com/go-skynet/LocalAI/core/http/endpoints/localai"
-	"github.com/go-skynet/LocalAI/core/http/endpoints/openai"
-
-	"github.com/go-skynet/LocalAI/core/config"
-	"github.com/go-skynet/LocalAI/core/schema"
-	"github.com/go-skynet/LocalAI/core/services"
-	"github.com/go-skynet/LocalAI/internal"
-	"github.com/go-skynet/LocalAI/pkg/model"
-
-	"github.com/gofiber/fiber/v2"
-	"github.com/gofiber/fiber/v2/middleware/cors"
-	"github.com/gofiber/fiber/v2/middleware/logger"
-	"github.com/gofiber/fiber/v2/middleware/recover"
-)
-
-func readAuthHeader(c *fiber.Ctx) string {
-	authHeader := c.Get("Authorization")
-
-	// elevenlabs
-	xApiKey := c.Get("xi-api-key")
-	if xApiKey != "" {
-		authHeader = "Bearer " + xApiKey
-	}
-
-	// anthropic
-	xApiKey = c.Get("x-api-key")
-	if xApiKey != "" {
-		authHeader = "Bearer " + xApiKey
-	}
-
-	return authHeader
-}
-
-// @title LocalAI API
-// @version 2.0.0
-// @description The LocalAI Rest API.
-// @termsOfService
-// @contact.name LocalAI
-// @contact.url https://localai.io
-// @license.name MIT
-// @license.url https://raw.githubusercontent.com/mudler/LocalAI/master/LICENSE
-// @BasePath /
-// @securityDefinitions.apikey BearerAuth
-// @in header
-// @name Authorization
-
-func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) (*fiber.App, error) {
-	// Return errors as JSON responses
-	app := fiber.New(fiber.Config{
-		Views:                 renderEngine(),
-		BodyLimit:             appConfig.UploadLimitMB * 1024 * 1024, // this is the default limit of 4MB
-		DisableStartupMessage: appConfig.DisableMessage,
-		// Override default error handler
-		ErrorHandler: func(ctx *fiber.Ctx, err error) error {
-			// Status code defaults to 500
-			code := fiber.StatusInternalServerError
-
-			// Retrieve the custom status code if it's a *fiber.Error
-			var e *fiber.Error
-			if errors.As(err, &e) {
-				code = e.Code
-			}
-
-			// Send custom error page
-			return ctx.Status(code).JSON(
-				schema.ErrorResponse{
-					Error: &schema.APIError{Message: err.Error(), Code: code},
-				},
-			)
-		},
-	})
-
-	if appConfig.Debug {
-		app.Use(logger.New(logger.Config{
-			Format: "[${ip}]:${port} ${status} - ${method} ${path}\n",
-		}))
-	}
-
-	// Default middleware config
-
-	if !appConfig.Debug {
-		app.Use(recover.New())
-	}
-
-	metricsService, err := services.NewLocalAIMetricsService()
-	if err != nil {
-		return nil, err
-	}
-
-	if metricsService != nil {
-		app.Use(localai.LocalAIMetricsAPIMiddleware(metricsService))
-		app.Hooks().OnShutdown(func() error {
-			return metricsService.Shutdown()
-		})
-	}
-
-	// Auth middleware checking if API key is valid. If no API key is set, no auth is required.
-	auth := func(c *fiber.Ctx) error {
-		if len(appConfig.ApiKeys) == 0 {
-			return c.Next()
-		}
-
-		// Check for api_keys.json file
-		fileContent, err := os.ReadFile("api_keys.json")
-		if err == nil {
-			// Parse JSON content from the file
-			var fileKeys []string
-			err := json.Unmarshal(fileContent, &fileKeys)
-			if err != nil {
-				return c.Status(fiber.StatusInternalServerError).JSON(fiber.Map{"message": "Error parsing api_keys.json"})
-			}
-
-			// Add file keys to options.ApiKeys
-			appConfig.ApiKeys = append(appConfig.ApiKeys, fileKeys...)
-		}
-
-		if len(appConfig.ApiKeys) == 0 {
-			return c.Next()
-		}
-
-		authHeader := readAuthHeader(c)
-		if authHeader == "" {
-			return c.Status(fiber.StatusUnauthorized).JSON(fiber.Map{"message": "Authorization header missing"})
-		}
-
-		// If it's a bearer token
-		authHeaderParts := strings.Split(authHeader, " ")
-		if len(authHeaderParts) != 2 || authHeaderParts[0] != "Bearer" {
-			return c.Status(fiber.StatusUnauthorized).JSON(fiber.Map{"message": "Invalid Authorization header format"})
-		}
-
-		apiKey := authHeaderParts[1]
-		for _, key := range appConfig.ApiKeys {
-			if apiKey == key {
-				return c.Next()
-			}
-		}
-
-		return c.Status(fiber.StatusUnauthorized).JSON(fiber.Map{"message": "Invalid API key"})
-	}
-
-	if appConfig.CORS {
-		var c func(ctx *fiber.Ctx) error
-		if appConfig.CORSAllowOrigins == "" {
-			c = cors.New()
-		} else {
-			c = cors.New(cors.Config{AllowOrigins: appConfig.CORSAllowOrigins})
-		}
-
-		app.Use(c)
-	}
-
-	// LocalAI API endpoints
-	galleryService := services.NewGalleryService(appConfig.ModelPath)
-	galleryService.Start(appConfig.Context, cl)
-
-	app.Get("/version", auth, func(c *fiber.Ctx) error {
-		return c.JSON(struct {
-			Version string `json:"version"`
-		}{Version: internal.PrintableVersion()})
-	})
-
-	// Make sure directories exists
-	os.MkdirAll(appConfig.ImageDir, 0755)
-	os.MkdirAll(appConfig.AudioDir, 0755)
-	os.MkdirAll(appConfig.UploadDir, 0755)
-	os.MkdirAll(appConfig.ConfigsDir, 0755)
-	os.MkdirAll(appConfig.ModelPath, 0755)
-
-	// Load config jsons
-	utils.LoadConfig(appConfig.UploadDir, openai.UploadedFilesFile, &openai.UploadedFiles)
-	utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsConfigFile, &openai.Assistants)
-	utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsFileConfigFile, &openai.AssistantFiles)
-
-	app.Get("/swagger/*", swagger.HandlerDefault) // default
-
-	welcomeRoute(
-		app,
-		cl,
-		ml,
-		appConfig,
-		auth,
-	)
-
-	modelGalleryEndpointService := localai.CreateModelGalleryEndpointService(appConfig.Galleries, appConfig.ModelPath, galleryService)
-	app.Post("/models/apply", auth, modelGalleryEndpointService.ApplyModelGalleryEndpoint())
-	app.Get("/models/available", auth, modelGalleryEndpointService.ListModelFromGalleryEndpoint())
-	app.Get("/models/galleries", auth, modelGalleryEndpointService.ListModelGalleriesEndpoint())
-	app.Post("/models/galleries", auth, modelGalleryEndpointService.AddModelGalleryEndpoint())
-	app.Delete("/models/galleries", auth, modelGalleryEndpointService.RemoveModelGalleryEndpoint())
-	app.Get("/models/jobs/:uuid", auth, modelGalleryEndpointService.GetOpStatusEndpoint())
-	app.Get("/models/jobs", auth, modelGalleryEndpointService.GetAllStatusEndpoint())
-
-	app.Post("/tts", auth, localai.TTSEndpoint(cl, ml, appConfig))
-
-	// Elevenlabs
-	app.Post("/v1/text-to-speech/:voice-id", auth, elevenlabs.TTSEndpoint(cl, ml, appConfig))
-
-	// Stores
-	sl := model.NewModelLoader("")
-	app.Post("/stores/set", auth, localai.StoresSetEndpoint(sl, appConfig))
-	app.Post("/stores/delete", auth, localai.StoresDeleteEndpoint(sl, appConfig))
-	app.Post("/stores/get", auth, localai.StoresGetEndpoint(sl, appConfig))
-	app.Post("/stores/find", auth, localai.StoresFindEndpoint(sl, appConfig))
-
-	// openAI compatible API endpoint
-
-	// chat
-	app.Post("/v1/chat/completions", auth, openai.ChatEndpoint(cl, ml, appConfig))
-	app.Post("/chat/completions", auth, openai.ChatEndpoint(cl, ml, appConfig))
-
-	// edit
-	app.Post("/v1/edits", auth, openai.EditEndpoint(cl, ml, appConfig))
-	app.Post("/edits", auth, openai.EditEndpoint(cl, ml, appConfig))
-
-	// assistant
-	app.Get("/v1/assistants", auth, openai.ListAssistantsEndpoint(cl, ml, appConfig))
-	app.Get("/assistants", auth, openai.ListAssistantsEndpoint(cl, ml, appConfig))
-	app.Post("/v1/assistants", auth, openai.CreateAssistantEndpoint(cl, ml, appConfig))
-	app.Post("/assistants", auth, openai.CreateAssistantEndpoint(cl, ml, appConfig))
-	app.Delete("/v1/assistants/:assistant_id", auth, openai.DeleteAssistantEndpoint(cl, ml, appConfig))
-	app.Delete("/assistants/:assistant_id", auth, openai.DeleteAssistantEndpoint(cl, ml, appConfig))
-	app.Get("/v1/assistants/:assistant_id", auth, openai.GetAssistantEndpoint(cl, ml, appConfig))
-	app.Get("/assistants/:assistant_id", auth, openai.GetAssistantEndpoint(cl, ml, appConfig))
-	app.Post("/v1/assistants/:assistant_id", auth, openai.ModifyAssistantEndpoint(cl, ml, appConfig))
-	app.Post("/assistants/:assistant_id", auth, openai.ModifyAssistantEndpoint(cl, ml, appConfig))
-	app.Get("/v1/assistants/:assistant_id/files", auth, openai.ListAssistantFilesEndpoint(cl, ml, appConfig))
-	app.Get("/assistants/:assistant_id/files", auth, openai.ListAssistantFilesEndpoint(cl, ml, appConfig))
-	app.Post("/v1/assistants/:assistant_id/files", auth, openai.CreateAssistantFileEndpoint(cl, ml, appConfig))
-	app.Post("/assistants/:assistant_id/files", auth, openai.CreateAssistantFileEndpoint(cl, ml, appConfig))
-	app.Delete("/v1/assistants/:assistant_id/files/:file_id", auth, openai.DeleteAssistantFileEndpoint(cl, ml, appConfig))
-	app.Delete("/assistants/:assistant_id/files/:file_id", auth, openai.DeleteAssistantFileEndpoint(cl, ml, appConfig))
-	app.Get("/v1/assistants/:assistant_id/files/:file_id", auth, openai.GetAssistantFileEndpoint(cl, ml, appConfig))
-	app.Get("/assistants/:assistant_id/files/:file_id", auth, openai.GetAssistantFileEndpoint(cl, ml, appConfig))
-
-	// files
-	app.Post("/v1/files", auth, openai.UploadFilesEndpoint(cl, appConfig))
-	app.Post("/files", auth, openai.UploadFilesEndpoint(cl, appConfig))
-	app.Get("/v1/files", auth, openai.ListFilesEndpoint(cl, appConfig))
-	app.Get("/files", auth, openai.ListFilesEndpoint(cl, appConfig))
-	app.Get("/v1/files/:file_id", auth, openai.GetFilesEndpoint(cl, appConfig))
-	app.Get("/files/:file_id", auth, openai.GetFilesEndpoint(cl, appConfig))
-	app.Delete("/v1/files/:file_id", auth, openai.DeleteFilesEndpoint(cl, appConfig))
-	app.Delete("/files/:file_id", auth, openai.DeleteFilesEndpoint(cl, appConfig))
-	app.Get("/v1/files/:file_id/content", auth, openai.GetFilesContentsEndpoint(cl, appConfig))
-	app.Get("/files/:file_id/content", auth, openai.GetFilesContentsEndpoint(cl, appConfig))
-
-	// completion
-	app.Post("/v1/completions", auth, openai.CompletionEndpoint(cl, ml, appConfig))
-	app.Post("/completions", auth, openai.CompletionEndpoint(cl, ml, appConfig))
-	app.Post("/v1/engines/:model/completions", auth, openai.CompletionEndpoint(cl, ml, appConfig))
-
-	// embeddings
-	app.Post("/v1/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, appConfig))
-	app.Post("/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, appConfig))
-	app.Post("/v1/engines/:model/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, appConfig))
-
-	// audio
-	app.Post("/v1/audio/transcriptions", auth, openai.TranscriptEndpoint(cl, ml, appConfig))
-	app.Post("/v1/audio/speech", auth, localai.TTSEndpoint(cl, ml, appConfig))
-
-	// images
-	app.Post("/v1/images/generations", auth, openai.ImageEndpoint(cl, ml, appConfig))
-
-	if appConfig.ImageDir != "" {
-		app.Static("/generated-images", appConfig.ImageDir)
-	}
-
-	if appConfig.AudioDir != "" {
-		app.Static("/generated-audio", appConfig.AudioDir)
-	}
-
-	ok := func(c *fiber.Ctx) error {
-		return c.SendStatus(200)
-	}
-
-	// Kubernetes health checks
-	app.Get("/healthz", ok)
-	app.Get("/readyz", ok)
-
-	// Experimental Backend Statistics Module
-	backendMonitor := services.NewBackendMonitor(cl, ml, appConfig) // Split out for now
-	app.Get("/backend/monitor", auth, localai.BackendMonitorEndpoint(backendMonitor))
-	app.Post("/backend/shutdown", auth, localai.BackendShutdownEndpoint(backendMonitor))
-
-	// models
-	app.Get("/v1/models", auth, openai.ListModelsEndpoint(cl, ml))
-	app.Get("/models", auth, openai.ListModelsEndpoint(cl, ml))
-
-	app.Get("/metrics", auth, localai.LocalAIMetricsEndpoint())
-
-	// Define a custom 404 handler
-	// Note: keep this at the bottom!
-	app.Use(notFoundHandler)
-
-	return app, nil
-}
--- a/core/http/app.go
+++ b/core/http/app.go
@@ -0,0 +1,204 @@
+package http
+
+import (
+	"encoding/json"
+	"errors"
+	"os"
+	"strings"
+
+	"github.com/go-skynet/LocalAI/pkg/utils"
+
+	"github.com/go-skynet/LocalAI/core/http/endpoints/localai"
+	"github.com/go-skynet/LocalAI/core/http/endpoints/openai"
+	"github.com/go-skynet/LocalAI/core/http/routes"
+
+	"github.com/go-skynet/LocalAI/core/config"
+	"github.com/go-skynet/LocalAI/core/schema"
+	"github.com/go-skynet/LocalAI/core/services"
+	"github.com/go-skynet/LocalAI/pkg/model"
+
+	"github.com/gofiber/contrib/fiberzerolog"
+	"github.com/gofiber/fiber/v2"
+	"github.com/gofiber/fiber/v2/middleware/cors"
+	"github.com/gofiber/fiber/v2/middleware/recover"
+
+	// swagger handler
+	"github.com/rs/zerolog/log"
+)
+
+func readAuthHeader(c *fiber.Ctx) string {
+	authHeader := c.Get("Authorization")
+
+	// elevenlabs
+	xApiKey := c.Get("xi-api-key")
+	if xApiKey != "" {
+		authHeader = "Bearer " + xApiKey
+	}
+
+	// anthropic
+	xApiKey = c.Get("x-api-key")
+	if xApiKey != "" {
+		authHeader = "Bearer " + xApiKey
+	}
+
+	return authHeader
+}
+
+// @title LocalAI API
+// @version 2.0.0
+// @description The LocalAI Rest API.
+// @termsOfService
+// @contact.name LocalAI
+// @contact.url https://localai.io
+// @license.name MIT
+// @license.url https://raw.githubusercontent.com/mudler/LocalAI/master/LICENSE
+// @BasePath /
+// @securityDefinitions.apikey BearerAuth
+// @in header
+// @name Authorization
+
+func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) (*fiber.App, error) {
+	// Return errors as JSON responses
+	app := fiber.New(fiber.Config{
+		Views:     renderEngine(),
+		BodyLimit: appConfig.UploadLimitMB * 1024 * 1024, // this is the default limit of 4MB
+		// We disable the Fiber startup message as it does not conform to structured logging.
+		// We register a startup log line with connection information in the OnListen hook to keep things user friendly though
+		DisableStartupMessage: true,
+		// Override default error handler
+		ErrorHandler: func(ctx *fiber.Ctx, err error) error {
+			// Status code defaults to 500
+			code := fiber.StatusInternalServerError
+
+			// Retrieve the custom status code if it's a *fiber.Error
+			var e *fiber.Error
+			if errors.As(err, &e) {
+				code = e.Code
+			}
+
+			// Send custom error page
+			return ctx.Status(code).JSON(
+				schema.ErrorResponse{
+					Error: &schema.APIError{Message: err.Error(), Code: code},
+				},
+			)
+		},
+	})
+
+	app.Hooks().OnListen(func(listenData fiber.ListenData) error {
+		scheme := "http"
+		if listenData.TLS {
+			scheme = "https"
+		}
+		log.Info().Str("endpoint", scheme+"://"+listenData.Host+":"+listenData.Port).Msg("LocalAI API is listening! Please connect to the endpoint for API documentation.")
+		return nil
+	})
+
+	// Have Fiber use zerolog like the rest of the application rather than it's built-in logger
+	logger := log.Logger
+	app.Use(fiberzerolog.New(fiberzerolog.Config{
+		Logger: &logger,
+	}))
+
+	// Default middleware config
+
+	if !appConfig.Debug {
+		app.Use(recover.New())
+	}
+
+	metricsService, err := services.NewLocalAIMetricsService()
+	if err != nil {
+		return nil, err
+	}
+
+	if metricsService != nil {
+		app.Use(localai.LocalAIMetricsAPIMiddleware(metricsService))
+		app.Hooks().OnShutdown(func() error {
+			return metricsService.Shutdown()
+		})
+	}
+
+	// Auth middleware checking if API key is valid. If no API key is set, no auth is required.
+	auth := func(c *fiber.Ctx) error {
+		if len(appConfig.ApiKeys) == 0 {
+			return c.Next()
+		}
+
+		// Check for api_keys.json file
+		fileContent, err := os.ReadFile("api_keys.json")
+		if err == nil {
+			// Parse JSON content from the file
+			var fileKeys []string
+			err := json.Unmarshal(fileContent, &fileKeys)
+			if err != nil {
+				return c.Status(fiber.StatusInternalServerError).JSON(fiber.Map{"message": "Error parsing api_keys.json"})
+			}
+
+			// Add file keys to options.ApiKeys
+			appConfig.ApiKeys = append(appConfig.ApiKeys, fileKeys...)
+		}
+
+		if len(appConfig.ApiKeys) == 0 {
+			return c.Next()
+		}
+
+		authHeader := readAuthHeader(c)
+		if authHeader == "" {
+			return c.Status(fiber.StatusUnauthorized).JSON(fiber.Map{"message": "Authorization header missing"})
+		}
+
+		// If it's a bearer token
+		authHeaderParts := strings.Split(authHeader, " ")
+		if len(authHeaderParts) != 2 || authHeaderParts[0] != "Bearer" {
+			return c.Status(fiber.StatusUnauthorized).JSON(fiber.Map{"message": "Invalid Authorization header format"})
+		}
+
+		apiKey := authHeaderParts[1]
+		for _, key := range appConfig.ApiKeys {
+			if apiKey == key {
+				return c.Next()
+			}
+		}
+
+		return c.Status(fiber.StatusUnauthorized).JSON(fiber.Map{"message": "Invalid API key"})
+	}
+
+	if appConfig.CORS {
+		var c func(ctx *fiber.Ctx) error
+		if appConfig.CORSAllowOrigins == "" {
+			c = cors.New()
+		} else {
+			c = cors.New(cors.Config{AllowOrigins: appConfig.CORSAllowOrigins})
+		}
+
+		app.Use(c)
+	}
+
+	// Make sure directories exists
+	os.MkdirAll(appConfig.ImageDir, 0755)
+	os.MkdirAll(appConfig.AudioDir, 0755)
+	os.MkdirAll(appConfig.UploadDir, 0755)
+	os.MkdirAll(appConfig.ConfigsDir, 0755)
+	os.MkdirAll(appConfig.ModelPath, 0755)
+
+	// Load config jsons
+	utils.LoadConfig(appConfig.UploadDir, openai.UploadedFilesFile, &openai.UploadedFiles)
+	utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsConfigFile, &openai.Assistants)
+	utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsFileConfigFile, &openai.AssistantFiles)
+
+	galleryService := services.NewGalleryService(appConfig.ModelPath)
+	galleryService.Start(appConfig.Context, cl)
+
+	routes.RegisterElevenLabsRoutes(app, cl, ml, appConfig, auth)
+	routes.RegisterLocalAIRoutes(app, cl, ml, appConfig, galleryService, auth)
+	routes.RegisterOpenAIRoutes(app, cl, ml, appConfig, auth)
+	routes.RegisterPagesRoutes(app, cl, ml, appConfig, auth)
+	routes.RegisterUIRoutes(app, cl, ml, appConfig, galleryService, auth)
+	routes.RegisterJINARoutes(app, cl, ml, appConfig, auth)
+
+	// Define a custom 404 handler
+	// Note: keep this at the bottom!
+	app.Use(notFoundHandler)
+
+	return app, nil
+}
--- a/core/http/app_test.go
+++ b/core/http/app_test.go
@@ -211,7 +211,6 @@ var _ = Describe("API test", func() {

 	commonOpts := []config.AppOption{
 		config.WithDebug(true),
-		config.WithDisableMessage(true),
 	}

 	Context("API with ephemeral models", func() {
@@ -490,11 +489,10 @@ var _ = Describe("API test", func() {
 				if runtime.GOOS != "linux" {
 					Skip("test supported only on linux")
 				}
-				modelName := "codellama"
+
+				modelName := "hermes-2-pro-mistral"
 				response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
-					URL:       "github:go-skynet/model-gallery/codellama-7b-instruct.yaml",
-					Name:      modelName,
-					Overrides: map[string]interface{}{"backend": "llama", "mmap": true, "f16": true, "context_size": 128},
+					ConfigURL: "https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/models/hermes-2-pro-mistral.yaml",
 				})

 				Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
@@ -557,7 +555,7 @@ var _ = Describe("API test", func() {
 				var res map[string]string
 				err = json.Unmarshal([]byte(resp2.Choices[0].Message.FunctionCall.Arguments), &res)
 				Expect(err).ToNot(HaveOccurred())
-				Expect(res["location"]).To(Equal("San Francisco"), fmt.Sprint(res))
+				Expect(res["location"]).To(ContainSubstring("San Francisco"), fmt.Sprint(res))
 				Expect(res["unit"]).To(Equal("celcius"), fmt.Sprint(res))
 				Expect(string(resp2.Choices[0].FinishReason)).To(Equal("function_call"), fmt.Sprint(resp2.Choices[0].FinishReason))
 			})
--- a/core/http/elements/gallery.go
+++ b/core/http/elements/gallery.go
@@ -0,0 +1,247 @@
+package elements
+
+import (
+	"fmt"
+
+	"github.com/chasefleming/elem-go"
+	"github.com/chasefleming/elem-go/attrs"
+	"github.com/go-skynet/LocalAI/pkg/gallery"
+)
+
+const (
+	NoImage = "https://upload.wikimedia.org/wikipedia/commons/6/65/No-Image-Placeholder.svg"
+)
+
+func DoneProgress(uid string) string {
+	return elem.Div(
+		attrs.Props{},
+		elem.H3(
+			attrs.Props{
+				"role":      "status",
+				"id":        "pblabel",
+				"tabindex":  "-1",
+				"autofocus": "",
+			},
+			elem.Text("Installation completed"),
+		),
+	).Render()
+}
+
+func ErrorProgress(err string) string {
+	return elem.Div(
+		attrs.Props{},
+		elem.H3(
+			attrs.Props{
+				"role":      "status",
+				"id":        "pblabel",
+				"tabindex":  "-1",
+				"autofocus": "",
+			},
+			elem.Text("Error"+err),
+		),
+	).Render()
+}
+
+func ProgressBar(progress string) string {
+	return elem.Div(attrs.Props{
+		"class":           "progress",
+		"role":            "progressbar",
+		"aria-valuemin":   "0",
+		"aria-valuemax":   "100",
+		"aria-valuenow":   "0",
+		"aria-labelledby": "pblabel",
+	},
+		elem.Div(attrs.Props{
+			"id":    "pb",
+			"class": "progress-bar",
+			"style": "width:" + progress + "%",
+		}),
+	).Render()
+}
+
+func StartProgressBar(uid, progress string) string {
+	if progress == "" {
+		progress = "0"
+	}
+	return elem.Div(attrs.Props{
+		"hx-trigger": "done",
+		"hx-get":     "/browse/job/" + uid,
+		"hx-swap":    "outerHTML",
+		"hx-target":  "this",
+	},
+		elem.H3(
+			attrs.Props{
+				"role":      "status",
+				"id":        "pblabel",
+				"tabindex":  "-1",
+				"autofocus": "",
+			},
+			elem.Text("Installing"),
+			// This is a simple example of how to use the HTMLX library to create a progress bar that updates every 600ms.
+			elem.Div(attrs.Props{
+				"hx-get":     "/browse/job/progress/" + uid,
+				"hx-trigger": "every 600ms",
+				"hx-target":  "this",
+				"hx-swap":    "innerHTML",
+			},
+				elem.Raw(ProgressBar(progress)),
+			),
+		),
+	).Render()
+}
+
+func cardSpan(text, icon string) elem.Node {
+	return elem.Span(
+		attrs.Props{
+			"class": "inline-block bg-gray-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2",
+		},
+		elem.I(attrs.Props{
+			"class": icon + " pr-2",
+		}),
+		elem.Text(text),
+	)
+}
+
+func ListModels(models []*gallery.GalleryModel) string {
+	modelsElements := []elem.Node{}
+	span := func(s string) elem.Node {
+		return elem.Span(
+			attrs.Props{
+				"class": "float-right inline-block bg-green-500 text-white py-1 px-3 rounded-full text-xs",
+			},
+			elem.Text(s),
+		)
+	}
+	installButton := func(m *gallery.GalleryModel) elem.Node {
+		return elem.Button(
+			attrs.Props{
+				"data-twe-ripple-init":  "",
+				"data-twe-ripple-color": "light",
+				"class":                 "float-right inline-block rounded bg-primary px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong",
+				// post the Model ID as param
+				"hx-post": "/browse/install/model/" + fmt.Sprintf("%s@%s", m.Gallery.Name, m.Name),
+			},
+			elem.I(
+				attrs.Props{
+					"class": "fa-solid fa-download pr-2",
+				},
+			),
+			elem.Text("Install"),
+		)
+	}
+
+	descriptionDiv := func(m *gallery.GalleryModel) elem.Node {
+
+		return elem.Div(
+			attrs.Props{
+				"class": "p-6 text-surface dark:text-white",
+			},
+			elem.H5(
+				attrs.Props{
+					"class": "mb-2 text-xl font-medium leading-tight",
+				},
+				elem.Text(m.Name),
+			),
+			elem.P(
+				attrs.Props{
+					"class": "mb-4 text-base",
+				},
+				elem.Text(m.Description),
+			),
+		)
+	}
+
+	actionDiv := func(m *gallery.GalleryModel) elem.Node {
+		nodes := []elem.Node{
+			cardSpan("Repository: "+m.Gallery.Name, "fa-brands fa-git-alt"),
+		}
+
+		if m.License != "" {
+			nodes = append(nodes,
+				cardSpan("License: "+m.License, "fas fa-book"),
+			)
+		}
+
+		for _, tag := range m.Tags {
+			nodes = append(nodes,
+				cardSpan(tag, "fas fa-tag"),
+			)
+		}
+
+		for i, url := range m.URLs {
+			nodes = append(nodes,
+				elem.A(
+					attrs.Props{
+						"class":  "inline-block bg-gray-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2",
+						"href":   url,
+						"target": "_blank",
+					},
+					elem.I(attrs.Props{
+						"class": "fas fa-link pr-2",
+					}),
+					elem.Text("Link #"+fmt.Sprintf("%d", i+1)),
+				))
+		}
+
+		return elem.Div(
+			attrs.Props{
+				"class": "px-6 pt-4 pb-2",
+			},
+			elem.P(
+				attrs.Props{
+					"class": "mb-4 text-base",
+				},
+				nodes...,
+			),
+			elem.If(m.Installed, span("Installed"), installButton(m)),
+		)
+	}
+
+	for _, m := range models {
+
+		elems := []elem.Node{}
+
+		if m.Icon == "" {
+			m.Icon = NoImage
+		}
+
+		elems = append(elems,
+
+			elem.Div(attrs.Props{
+				"class": "flex justify-center items-center",
+			},
+				elem.A(attrs.Props{
+					"href": "#!",
+					//		"class": "justify-center items-center",
+				},
+					elem.Img(attrs.Props{
+						//	"class": "rounded-t-lg object-fit object-center h-96",
+						"class": "rounded-t-lg max-h-48 max-w-96 object-cover mt-3",
+						"src":   m.Icon,
+					}),
+				),
+			))
+
+		elems = append(elems, descriptionDiv(m), actionDiv(m))
+		modelsElements = append(modelsElements,
+			elem.Div(
+				attrs.Props{
+					"class": " me-4 mb-2 block rounded-lg bg-white shadow-secondary-1  dark:bg-gray-800 dark:bg-surface-dark dark:text-white text-surface pb-2",
+				},
+				elem.Div(
+					attrs.Props{
+						//	"class": "p-6",
+					},
+					elems...,
+				),
+			),
+		)
+	}
+
+	wrapper := elem.Div(attrs.Props{
+		"class": "dark grid grid-cols-1 grid-rows-1 md:grid-cols-3 block rounded-lg shadow-secondary-1 dark:bg-surface-dark",
+		//"class": "block rounded-lg bg-white shadow-secondary-1 dark:bg-surface-dark",
+	}, modelsElements...)
+
+	return wrapper.Render()
+}
--- a/core/http/endpoints/jina/rerank.go
+++ b/core/http/endpoints/jina/rerank.go
@@ -0,0 +1,84 @@
+package jina
+
+import (
+	"github.com/go-skynet/LocalAI/core/backend"
+	"github.com/go-skynet/LocalAI/core/config"
+
+	fiberContext "github.com/go-skynet/LocalAI/core/http/ctx"
+	"github.com/go-skynet/LocalAI/core/schema"
+	"github.com/go-skynet/LocalAI/pkg/grpc/proto"
+	"github.com/go-skynet/LocalAI/pkg/model"
+	"github.com/gofiber/fiber/v2"
+	"github.com/rs/zerolog/log"
+)
+
+func JINARerankEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
+	return func(c *fiber.Ctx) error {
+		req := new(schema.JINARerankRequest)
+		if err := c.BodyParser(req); err != nil {
+			return c.Status(fiber.StatusBadRequest).JSON(fiber.Map{
+				"error": "Cannot parse JSON",
+			})
+		}
+
+		input := new(schema.TTSRequest)
+
+		// Get input data from the request body
+		if err := c.BodyParser(input); err != nil {
+			return err
+		}
+
+		modelFile, err := fiberContext.ModelFromContext(c, ml, input.Model, false)
+		if err != nil {
+			modelFile = input.Model
+			log.Warn().Msgf("Model not found in context: %s", input.Model)
+		}
+
+		cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath,
+			config.LoadOptionDebug(appConfig.Debug),
+			config.LoadOptionThreads(appConfig.Threads),
+			config.LoadOptionContextSize(appConfig.ContextSize),
+			config.LoadOptionF16(appConfig.F16),
+		)
+
+		if err != nil {
+			modelFile = input.Model
+			log.Warn().Msgf("Model not found in context: %s", input.Model)
+		} else {
+			modelFile = cfg.Model
+		}
+		log.Debug().Msgf("Request for model: %s", modelFile)
+
+		if input.Backend != "" {
+			cfg.Backend = input.Backend
+		}
+
+		request := &proto.RerankRequest{
+			Query:     req.Query,
+			TopN:      int32(req.TopN),
+			Documents: req.Documents,
+		}
+
+		results, err := backend.Rerank(cfg.Backend, modelFile, request, ml, appConfig, *cfg)
+		if err != nil {
+			return err
+		}
+
+		response := &schema.JINARerankResponse{
+			Model: req.Model,
+		}
+
+		for _, r := range results.Results {
+			response.Results = append(response.Results, schema.JINADocumentResult{
+				Index:          int(r.Index),
+				Document:       schema.JINAText{Text: r.Text},
+				RelevanceScore: float64(r.RelevanceScore),
+			})
+		}
+
+		response.Usage.TotalTokens = int(results.Usage.TotalTokens)
+		response.Usage.PromptTokens = int(results.Usage.PromptTokens)
+
+		return c.Status(fiber.StatusOK).JSON(response)
+	}
+}
--- a/core/http/endpoints/localai/welcome.go
+++ b/core/http/endpoints/localai/welcome.go
@@ -0,0 +1,32 @@
+package localai
+
+import (
+	"github.com/go-skynet/LocalAI/core/config"
+	"github.com/go-skynet/LocalAI/internal"
+	"github.com/go-skynet/LocalAI/pkg/model"
+	"github.com/gofiber/fiber/v2"
+)
+
+func WelcomeEndpoint(appConfig *config.ApplicationConfig,
+	cl *config.BackendConfigLoader, ml *model.ModelLoader) func(*fiber.Ctx) error {
+	return func(c *fiber.Ctx) error {
+		models, _ := ml.ListModels()
+		backendConfigs := cl.GetAllBackendConfigs()
+
+		summary := fiber.Map{
+			"Title":             "LocalAI API - " + internal.PrintableVersion(),
+			"Version":           internal.PrintableVersion(),
+			"Models":            models,
+			"ModelsConfig":      backendConfigs,
+			"ApplicationConfig": appConfig,
+		}
+
+		if string(c.Context().Request.Header.ContentType()) == "application/json" || len(c.Accepts("html")) == 0 {
+			// The client expects a JSON response
+			return c.Status(fiber.StatusOK).JSON(summary)
+		} else {
+			// Render index
+			return c.Render("views/index", summary)
+		}
+	}
+}
--- a/core/http/endpoints/openai/assistant.go
+++ b/core/http/endpoints/openai/assistant.go
@@ -455,21 +455,19 @@ func DeleteAssistantFileEndpoint(cl *config.BackendConfigLoader, ml *model.Model
 		for i, assistant := range Assistants {
 			if assistant.ID == assistantID {
 				for j, fileId := range assistant.FileIDs {
-					if fileId == fileId {
-						Assistants[i].FileIDs = append(Assistants[i].FileIDs[:j], Assistants[i].FileIDs[j+1:]...)
+					Assistants[i].FileIDs = append(Assistants[i].FileIDs[:j], Assistants[i].FileIDs[j+1:]...)

-						// Check if the file exists in the assistantFiles slice
-						for i, assistantFile := range AssistantFiles {
-							if assistantFile.ID == fileId {
-								// Remove the file from the assistantFiles slice
-								AssistantFiles = append(AssistantFiles[:i], AssistantFiles[i+1:]...)
-								utils.SaveConfig(appConfig.ConfigsDir, AssistantsFileConfigFile, AssistantFiles)
-								return c.Status(fiber.StatusOK).JSON(DeleteAssistantFileResponse{
-									ID:      fileId,
-									Object:  "assistant.file.deleted",
-									Deleted: true,
-								})
-							}
+					// Check if the file exists in the assistantFiles slice
+					for i, assistantFile := range AssistantFiles {
+						if assistantFile.ID == fileId {
+							// Remove the file from the assistantFiles slice
+							AssistantFiles = append(AssistantFiles[:i], AssistantFiles[i+1:]...)
+							utils.SaveConfig(appConfig.ConfigsDir, AssistantsFileConfigFile, AssistantFiles)
+							return c.Status(fiber.StatusOK).JSON(DeleteAssistantFileResponse{
+								ID:      fileId,
+								Object:  "assistant.file.deleted",
+								Deleted: true,
+							})
 						}
 					}
 				}
--- a/core/http/endpoints/openai/chat.go
+++ b/core/http/endpoints/openai/chat.go
@@ -11,9 +11,8 @@ import (
 	"github.com/go-skynet/LocalAI/core/backend"
 	"github.com/go-skynet/LocalAI/core/config"
 	"github.com/go-skynet/LocalAI/core/schema"
-	"github.com/go-skynet/LocalAI/pkg/grammar"
+	"github.com/go-skynet/LocalAI/pkg/functions"
 	model "github.com/go-skynet/LocalAI/pkg/model"
-	"github.com/go-skynet/LocalAI/pkg/utils"
 	"github.com/gofiber/fiber/v2"
 	"github.com/google/uuid"
 	"github.com/rs/zerolog/log"
@@ -68,8 +67,8 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
 			return true
 		})

-		results := parseFunctionCall(result, config.FunctionsConfig.ParallelCalls)
-		noActionToRun := len(results) > 0 && results[0].name == noAction
+		results := functions.ParseFunctionCall(result, config.FunctionsConfig)
+		noActionToRun := len(results) > 0 && results[0].Name == noAction || len(results) == 0

 		switch {
 		case noActionToRun:
@@ -82,7 +81,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
 			}
 			responses <- initialMessage

-			result, err := handleQuestion(config, req, ml, startupOptions, results[0].arguments, prompt)
+			result, err := handleQuestion(config, req, ml, startupOptions, results, prompt)
 			if err != nil {
 				log.Error().Err(err).Msg("error handling question")
 				return
@@ -105,7 +104,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup

 		default:
 			for i, ss := range results {
-				name, args := ss.name, ss.arguments
+				name, args := ss.Name, ss.Arguments

 				initialMessage := schema.OpenAIResponse{
 					ID:      id,
@@ -156,8 +155,6 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
 	}

 	return func(c *fiber.Ctx) error {
-		processFunctions := false
-		funcs := grammar.Functions{}
 		modelFile, input, err := readRequest(c, ml, startupOptions, true)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
@@ -169,6 +166,9 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
 		}
 		log.Debug().Msgf("Configuration read: %+v", config)

+		funcs := input.Functions
+		shouldUseFn := len(input.Functions) > 0 && config.ShouldUseFunctions()
+
 		// Allow the user to set custom actions via config file
 		// to be "embedded" in each model
 		noActionName := "answer"
@@ -182,18 +182,18 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
 		}

 		if input.ResponseFormat.Type == "json_object" {
-			input.Grammar = grammar.JSONBNF
+			input.Grammar = functions.JSONBNF
 		}

 		config.Grammar = input.Grammar

-		// process functions if we have any defined or if we have a function call string
-		if len(input.Functions) > 0 && config.ShouldUseFunctions() {
+		if shouldUseFn {
 			log.Debug().Msgf("Response needs to process functions")
+		}

-			processFunctions = true
-
-			noActionGrammar := grammar.Function{
+		switch {
+		case !config.FunctionsConfig.NoGrammar && shouldUseFn:
+			noActionGrammar := functions.Function{
 				Name:        noActionName,
 				Description: noActionDescription,
 				Parameters: map[string]interface{}{
@@ -206,7 +206,6 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
 			}

 			// Append the no action function
-			funcs = append(funcs, input.Functions...)
 			if !config.FunctionsConfig.DisableNoAction {
 				funcs = append(funcs, noActionGrammar)
 			}
@@ -219,10 +218,17 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
 			// Update input grammar
 			jsStruct := funcs.ToJSONStructure()
 			config.Grammar = jsStruct.Grammar("", config.FunctionsConfig.ParallelCalls)
-		} else if input.JSONFunctionGrammarObject != nil {
+		case input.JSONFunctionGrammarObject != nil:
 			config.Grammar = input.JSONFunctionGrammarObject.Grammar("", config.FunctionsConfig.ParallelCalls)
+		default:
+			// Force picking one of the functions by the request
+			if config.FunctionToCall() != "" {
+				funcs = funcs.Select(config.FunctionToCall())
+			}
 		}

+		// process functions if we have any defined or if we have a function call string
+
 		// functions are not supported in stream mode (yet?)
 		toStream := input.Stream

@@ -232,8 +238,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup

 		// If we are using the tokenizer template, we don't need to process the messages
 		// unless we are processing functions
-		if !config.TemplateConfig.UseTokenizerTemplate || processFunctions {
-
+		if !config.TemplateConfig.UseTokenizerTemplate || shouldUseFn {
 			suppressConfigSystemPrompt := false
 			mess := []string{}
 			for messageIndex, i := range input.Messages {
@@ -346,11 +351,11 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
 				templateFile = config.Model
 			}

-			if config.TemplateConfig.Chat != "" && !processFunctions {
+			if config.TemplateConfig.Chat != "" && !shouldUseFn {
 				templateFile = config.TemplateConfig.Chat
 			}

-			if config.TemplateConfig.Functions != "" && processFunctions {
+			if config.TemplateConfig.Functions != "" && shouldUseFn {
 				templateFile = config.TemplateConfig.Functions
 			}

@@ -370,7 +375,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
 			}

 			log.Debug().Msgf("Prompt (after templating): %s", predInput)
-			if processFunctions {
+			if shouldUseFn && config.Grammar != "" {
 				log.Debug().Msgf("Grammar: %+v", config.Grammar)
 			}
 		}
@@ -388,7 +393,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup

 			responses := make(chan schema.OpenAIResponse)

-			if !processFunctions {
+			if !shouldUseFn {
 				go process(predInput, input, config, ml, responses)
 			} else {
 				go processTools(noActionName, predInput, input, config, ml, responses)
@@ -446,18 +451,18 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
 		// no streaming mode
 		default:
 			result, tokenUsage, err := ComputeChoices(input, predInput, config, startupOptions, ml, func(s string, c *[]schema.Choice) {
-				if !processFunctions {
+				if !shouldUseFn {
 					// no function is called, just reply and use stop as finish reason
 					*c = append(*c, schema.Choice{FinishReason: "stop", Index: 0, Message: &schema.Message{Role: "assistant", Content: &s}})
 					return
 				}

-				results := parseFunctionCall(s, config.FunctionsConfig.ParallelCalls)
-				noActionsToRun := len(results) > 0 && results[0].name == noActionName
+				results := functions.ParseFunctionCall(s, config.FunctionsConfig)
+				noActionsToRun := len(results) > 0 && results[0].Name == noActionName || len(results) == 0

 				switch {
 				case noActionsToRun:
-					result, err := handleQuestion(config, input, ml, startupOptions, results[0].arguments, predInput)
+					result, err := handleQuestion(config, input, ml, startupOptions, results, predInput)
 					if err != nil {
 						log.Error().Err(err).Msg("error handling question")
 						return
@@ -476,7 +481,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
 					}

 					for _, ss := range results {
-						name, args := ss.name, ss.arguments
+						name, args := ss.Name, ss.Arguments
 						if len(input.Tools) > 0 {
 							// If we are using tools, we condense the function calls into
 							// a single response choice with all the tools
@@ -534,16 +539,20 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
 			// Return the prediction in the response body
 			return c.JSON(resp)
 		}
-
 	}
 }

-func handleQuestion(config *config.BackendConfig, input *schema.OpenAIRequest, ml *model.ModelLoader, o *config.ApplicationConfig, args, prompt string) (string, error) {
+func handleQuestion(config *config.BackendConfig, input *schema.OpenAIRequest, ml *model.ModelLoader, o *config.ApplicationConfig, funcResults []functions.FuncCallResults, prompt string) (string, error) {
 	log.Debug().Msgf("nothing to do, computing a reply")
-
+	arg := ""
+	if len(funcResults) > 0 {
+		arg = funcResults[0].Arguments
+	}
 	// If there is a message that the LLM already sends as part of the JSON reply, use it
 	arguments := map[string]interface{}{}
-	json.Unmarshal([]byte(args), &arguments)
+	if err := json.Unmarshal([]byte(arg), &arguments); err != nil {
+		log.Debug().Msg("handleQuestion: function result did not contain a valid JSON object")
+	}
 	m, exists := arguments["message"]
 	if exists {
 		switch message := m.(type) {
@@ -580,63 +589,3 @@ func handleQuestion(config *config.BackendConfig, input *schema.OpenAIRequest, m
 	}
 	return backend.Finetune(*config, prompt, prediction.Response), nil
 }
-
-type funcCallResults struct {
-	name      string
-	arguments string
-}
-
-func parseFunctionCall(llmresult string, multipleResults bool) []funcCallResults {
-	results := []funcCallResults{}
-
-	// TODO: use generics to avoid this code duplication
-	if multipleResults {
-		ss := []map[string]interface{}{}
-		s := utils.EscapeNewLines(llmresult)
-		json.Unmarshal([]byte(s), &ss)
-		log.Debug().Msgf("Function return: %s %+v", s, ss)
-
-		for _, s := range ss {
-			func_name, ok := s["function"]
-			if !ok {
-				continue
-			}
-			args, ok := s["arguments"]
-			if !ok {
-				continue
-			}
-			d, _ := json.Marshal(args)
-			funcName, ok := func_name.(string)
-			if !ok {
-				continue
-			}
-			results = append(results, funcCallResults{name: funcName, arguments: string(d)})
-		}
-	} else {
-		// As we have to change the result before processing, we can't stream the answer token-by-token (yet?)
-		ss := map[string]interface{}{}
-		// This prevent newlines to break JSON parsing for clients
-		s := utils.EscapeNewLines(llmresult)
-		json.Unmarshal([]byte(s), &ss)
-		log.Debug().Msgf("Function return: %s %+v", s, ss)
-
-		// The grammar defines the function name as "function", while OpenAI returns "name"
-		func_name, ok := ss["function"]
-		if !ok {
-			return results
-		}
-		// Similarly, while here arguments is a map[string]interface{}, OpenAI actually want a stringified object
-		args, ok := ss["arguments"] // arguments needs to be a string, but we return an object from the grammar result (TODO: fix)
-		if !ok {
-			return results
-		}
-		d, _ := json.Marshal(args)
-		funcName, ok := func_name.(string)
-		if !ok {
-			return results
-		}
-		results = append(results, funcCallResults{name: funcName, arguments: string(d)})
-	}
-
-	return results
-}
--- a/core/http/endpoints/openai/completion.go
+++ b/core/http/endpoints/openai/completion.go
@@ -12,7 +12,7 @@ import (
 	"github.com/go-skynet/LocalAI/core/config"

 	"github.com/go-skynet/LocalAI/core/schema"
-	"github.com/go-skynet/LocalAI/pkg/grammar"
+	"github.com/go-skynet/LocalAI/pkg/functions"
 	model "github.com/go-skynet/LocalAI/pkg/model"
 	"github.com/gofiber/fiber/v2"
 	"github.com/google/uuid"
@@ -70,7 +70,7 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a
 		}

 		if input.ResponseFormat.Type == "json_object" {
-			input.Grammar = grammar.JSONBNF
+			input.Grammar = functions.JSONBNF
 		}

 		config.Grammar = input.Grammar
--- a/core/http/endpoints/openai/request.go
+++ b/core/http/endpoints/openai/request.go
@@ -12,7 +12,7 @@ import (
 	"github.com/go-skynet/LocalAI/core/config"
 	fiberContext "github.com/go-skynet/LocalAI/core/http/ctx"
 	"github.com/go-skynet/LocalAI/core/schema"
-	"github.com/go-skynet/LocalAI/pkg/grammar"
+	"github.com/go-skynet/LocalAI/pkg/functions"
 	model "github.com/go-skynet/LocalAI/pkg/model"
 	"github.com/gofiber/fiber/v2"
 	"github.com/rs/zerolog/log"
@@ -145,7 +145,7 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque
 	}

 	if input.ToolsChoice != nil {
-		var toolChoice grammar.Tool
+		var toolChoice functions.Tool

 		switch content := input.ToolsChoice.(type) {
 		case string:
--- a/core/http/http_suite_test.go
+++ b/core/http/http_suite_test.go
--- a/core/http/render.go
+++ b/core/http/render.go
@@ -7,10 +7,7 @@ import (
 	"net/http"

 	"github.com/Masterminds/sprig/v3"
-	"github.com/go-skynet/LocalAI/core/config"
 	"github.com/go-skynet/LocalAI/core/schema"
-	"github.com/go-skynet/LocalAI/internal"
-	"github.com/go-skynet/LocalAI/pkg/model"
 	"github.com/gofiber/fiber/v2"
 	fiberhtml "github.com/gofiber/template/html/v2"
 	"github.com/russross/blackfriday"
@@ -33,40 +30,6 @@ func notFoundHandler(c *fiber.Ctx) error {
 	return nil
 }

-func welcomeRoute(
-	app *fiber.App,
-	cl *config.BackendConfigLoader,
-	ml *model.ModelLoader,
-	appConfig *config.ApplicationConfig,
-	auth func(*fiber.Ctx) error,
-) {
-	if appConfig.DisableWelcomePage {
-		return
-	}
-
-	models, _ := ml.ListModels()
-	backendConfigs := cl.GetAllBackendConfigs()
-
-	app.Get("/", auth, func(c *fiber.Ctx) error {
-		summary := fiber.Map{
-			"Title":             "LocalAI API - " + internal.PrintableVersion(),
-			"Version":           internal.PrintableVersion(),
-			"Models":            models,
-			"ModelsConfig":      backendConfigs,
-			"ApplicationConfig": appConfig,
-		}
-
-		if string(c.Context().Request.Header.ContentType()) == "application/json" || len(c.Accepts("html")) == 0 {
-			// The client expects a JSON response
-			return c.Status(fiber.StatusOK).JSON(summary)
-		} else {
-			// Render index
-			return c.Render("views/index", summary)
-		}
-	})
-
-}
-
 func renderEngine() *fiberhtml.Engine {
 	engine := fiberhtml.NewFileSystem(http.FS(viewsfs), ".html")
 	engine.AddFuncMap(sprig.FuncMap())
--- a/core/http/routes/elevenlabs.go
+++ b/core/http/routes/elevenlabs.go
@@ -0,0 +1,19 @@
+package routes
+
+import (
+	"github.com/go-skynet/LocalAI/core/config"
+	"github.com/go-skynet/LocalAI/core/http/endpoints/elevenlabs"
+	"github.com/go-skynet/LocalAI/pkg/model"
+	"github.com/gofiber/fiber/v2"
+)
+
+func RegisterElevenLabsRoutes(app *fiber.App,
+	cl *config.BackendConfigLoader,
+	ml *model.ModelLoader,
+	appConfig *config.ApplicationConfig,
+	auth func(*fiber.Ctx) error) {
+
+	// Elevenlabs
+	app.Post("/v1/text-to-speech/:voice-id", auth, elevenlabs.TTSEndpoint(cl, ml, appConfig))
+
+}
--- a/core/http/routes/jina.go
+++ b/core/http/routes/jina.go
@@ -0,0 +1,19 @@
+package routes
+
+import (
+	"github.com/go-skynet/LocalAI/core/config"
+	"github.com/go-skynet/LocalAI/core/http/endpoints/jina"
+
+	"github.com/go-skynet/LocalAI/pkg/model"
+	"github.com/gofiber/fiber/v2"
+)
+
+func RegisterJINARoutes(app *fiber.App,
+	cl *config.BackendConfigLoader,
+	ml *model.ModelLoader,
+	appConfig *config.ApplicationConfig,
+	auth func(*fiber.Ctx) error) {
+
+	// POST endpoint to mimic the reranking
+	app.Post("/v1/rerank", jina.JINARerankEndpoint(cl, ml, appConfig))
+}
--- a/core/http/routes/localai.go
+++ b/core/http/routes/localai.go
@@ -0,0 +1,63 @@
+package routes
+
+import (
+	"github.com/go-skynet/LocalAI/core/config"
+	"github.com/go-skynet/LocalAI/core/http/endpoints/localai"
+	"github.com/go-skynet/LocalAI/core/services"
+	"github.com/go-skynet/LocalAI/internal"
+	"github.com/go-skynet/LocalAI/pkg/model"
+	"github.com/gofiber/fiber/v2"
+	"github.com/gofiber/swagger"
+)
+
+func RegisterLocalAIRoutes(app *fiber.App,
+	cl *config.BackendConfigLoader,
+	ml *model.ModelLoader,
+	appConfig *config.ApplicationConfig,
+	galleryService *services.GalleryService,
+	auth func(*fiber.Ctx) error) {
+
+	app.Get("/swagger/*", swagger.HandlerDefault) // default
+
+	// LocalAI API endpoints
+
+	modelGalleryEndpointService := localai.CreateModelGalleryEndpointService(appConfig.Galleries, appConfig.ModelPath, galleryService)
+	app.Post("/models/apply", auth, modelGalleryEndpointService.ApplyModelGalleryEndpoint())
+	app.Get("/models/available", auth, modelGalleryEndpointService.ListModelFromGalleryEndpoint())
+	app.Get("/models/galleries", auth, modelGalleryEndpointService.ListModelGalleriesEndpoint())
+	app.Post("/models/galleries", auth, modelGalleryEndpointService.AddModelGalleryEndpoint())
+	app.Delete("/models/galleries", auth, modelGalleryEndpointService.RemoveModelGalleryEndpoint())
+	app.Get("/models/jobs/:uuid", auth, modelGalleryEndpointService.GetOpStatusEndpoint())
+	app.Get("/models/jobs", auth, modelGalleryEndpointService.GetAllStatusEndpoint())
+
+	app.Post("/tts", auth, localai.TTSEndpoint(cl, ml, appConfig))
+
+	// Stores
+	sl := model.NewModelLoader("")
+	app.Post("/stores/set", auth, localai.StoresSetEndpoint(sl, appConfig))
+	app.Post("/stores/delete", auth, localai.StoresDeleteEndpoint(sl, appConfig))
+	app.Post("/stores/get", auth, localai.StoresGetEndpoint(sl, appConfig))
+	app.Post("/stores/find", auth, localai.StoresFindEndpoint(sl, appConfig))
+
+	// Kubernetes health checks
+	ok := func(c *fiber.Ctx) error {
+		return c.SendStatus(200)
+	}
+
+	app.Get("/healthz", ok)
+	app.Get("/readyz", ok)
+
+	app.Get("/metrics", auth, localai.LocalAIMetricsEndpoint())
+
+	// Experimental Backend Statistics Module
+	backendMonitor := services.NewBackendMonitor(cl, ml, appConfig) // Split out for now
+	app.Get("/backend/monitor", auth, localai.BackendMonitorEndpoint(backendMonitor))
+	app.Post("/backend/shutdown", auth, localai.BackendShutdownEndpoint(backendMonitor))
+
+	app.Get("/version", auth, func(c *fiber.Ctx) error {
+		return c.JSON(struct {
+			Version string `json:"version"`
+		}{Version: internal.PrintableVersion()})
+	})
+
+}
--- a/core/http/routes/openai.go
+++ b/core/http/routes/openai.go
@@ -0,0 +1,86 @@
+package routes
+
+import (
+	"github.com/go-skynet/LocalAI/core/config"
+	"github.com/go-skynet/LocalAI/core/http/endpoints/localai"
+	"github.com/go-skynet/LocalAI/core/http/endpoints/openai"
+	"github.com/go-skynet/LocalAI/pkg/model"
+	"github.com/gofiber/fiber/v2"
+)
+
+func RegisterOpenAIRoutes(app *fiber.App,
+	cl *config.BackendConfigLoader,
+	ml *model.ModelLoader,
+	appConfig *config.ApplicationConfig,
+	auth func(*fiber.Ctx) error) {
+	// openAI compatible API endpoint
+
+	// chat
+	app.Post("/v1/chat/completions", auth, openai.ChatEndpoint(cl, ml, appConfig))
+	app.Post("/chat/completions", auth, openai.ChatEndpoint(cl, ml, appConfig))
+
+	// edit
+	app.Post("/v1/edits", auth, openai.EditEndpoint(cl, ml, appConfig))
+	app.Post("/edits", auth, openai.EditEndpoint(cl, ml, appConfig))
+
+	// assistant
+	app.Get("/v1/assistants", auth, openai.ListAssistantsEndpoint(cl, ml, appConfig))
+	app.Get("/assistants", auth, openai.ListAssistantsEndpoint(cl, ml, appConfig))
+	app.Post("/v1/assistants", auth, openai.CreateAssistantEndpoint(cl, ml, appConfig))
+	app.Post("/assistants", auth, openai.CreateAssistantEndpoint(cl, ml, appConfig))
+	app.Delete("/v1/assistants/:assistant_id", auth, openai.DeleteAssistantEndpoint(cl, ml, appConfig))
+	app.Delete("/assistants/:assistant_id", auth, openai.DeleteAssistantEndpoint(cl, ml, appConfig))
+	app.Get("/v1/assistants/:assistant_id", auth, openai.GetAssistantEndpoint(cl, ml, appConfig))
+	app.Get("/assistants/:assistant_id", auth, openai.GetAssistantEndpoint(cl, ml, appConfig))
+	app.Post("/v1/assistants/:assistant_id", auth, openai.ModifyAssistantEndpoint(cl, ml, appConfig))
+	app.Post("/assistants/:assistant_id", auth, openai.ModifyAssistantEndpoint(cl, ml, appConfig))
+	app.Get("/v1/assistants/:assistant_id/files", auth, openai.ListAssistantFilesEndpoint(cl, ml, appConfig))
+	app.Get("/assistants/:assistant_id/files", auth, openai.ListAssistantFilesEndpoint(cl, ml, appConfig))
+	app.Post("/v1/assistants/:assistant_id/files", auth, openai.CreateAssistantFileEndpoint(cl, ml, appConfig))
+	app.Post("/assistants/:assistant_id/files", auth, openai.CreateAssistantFileEndpoint(cl, ml, appConfig))
+	app.Delete("/v1/assistants/:assistant_id/files/:file_id", auth, openai.DeleteAssistantFileEndpoint(cl, ml, appConfig))
+	app.Delete("/assistants/:assistant_id/files/:file_id", auth, openai.DeleteAssistantFileEndpoint(cl, ml, appConfig))
+	app.Get("/v1/assistants/:assistant_id/files/:file_id", auth, openai.GetAssistantFileEndpoint(cl, ml, appConfig))
+	app.Get("/assistants/:assistant_id/files/:file_id", auth, openai.GetAssistantFileEndpoint(cl, ml, appConfig))
+
+	// files
+	app.Post("/v1/files", auth, openai.UploadFilesEndpoint(cl, appConfig))
+	app.Post("/files", auth, openai.UploadFilesEndpoint(cl, appConfig))
+	app.Get("/v1/files", auth, openai.ListFilesEndpoint(cl, appConfig))
+	app.Get("/files", auth, openai.ListFilesEndpoint(cl, appConfig))
+	app.Get("/v1/files/:file_id", auth, openai.GetFilesEndpoint(cl, appConfig))
+	app.Get("/files/:file_id", auth, openai.GetFilesEndpoint(cl, appConfig))
+	app.Delete("/v1/files/:file_id", auth, openai.DeleteFilesEndpoint(cl, appConfig))
+	app.Delete("/files/:file_id", auth, openai.DeleteFilesEndpoint(cl, appConfig))
+	app.Get("/v1/files/:file_id/content", auth, openai.GetFilesContentsEndpoint(cl, appConfig))
+	app.Get("/files/:file_id/content", auth, openai.GetFilesContentsEndpoint(cl, appConfig))
+
+	// completion
+	app.Post("/v1/completions", auth, openai.CompletionEndpoint(cl, ml, appConfig))
+	app.Post("/completions", auth, openai.CompletionEndpoint(cl, ml, appConfig))
+	app.Post("/v1/engines/:model/completions", auth, openai.CompletionEndpoint(cl, ml, appConfig))
+
+	// embeddings
+	app.Post("/v1/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, appConfig))
+	app.Post("/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, appConfig))
+	app.Post("/v1/engines/:model/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, appConfig))
+
+	// audio
+	app.Post("/v1/audio/transcriptions", auth, openai.TranscriptEndpoint(cl, ml, appConfig))
+	app.Post("/v1/audio/speech", auth, localai.TTSEndpoint(cl, ml, appConfig))
+
+	// images
+	app.Post("/v1/images/generations", auth, openai.ImageEndpoint(cl, ml, appConfig))
+
+	if appConfig.ImageDir != "" {
+		app.Static("/generated-images", appConfig.ImageDir)
+	}
+
+	if appConfig.AudioDir != "" {
+		app.Static("/generated-audio", appConfig.AudioDir)
+	}
+
+	// models
+	app.Get("/v1/models", auth, openai.ListModelsEndpoint(cl, ml))
+	app.Get("/models", auth, openai.ListModelsEndpoint(cl, ml))
+}
--- a/core/http/routes/ui.go
+++ b/core/http/routes/ui.go
@@ -0,0 +1,111 @@
+package routes
+
+import (
+	"fmt"
+	"html/template"
+	"strings"
+
+	"github.com/go-skynet/LocalAI/core/config"
+	"github.com/go-skynet/LocalAI/core/http/elements"
+	"github.com/go-skynet/LocalAI/core/services"
+	"github.com/go-skynet/LocalAI/pkg/gallery"
+	"github.com/go-skynet/LocalAI/pkg/model"
+	"github.com/gofiber/fiber/v2"
+	"github.com/google/uuid"
+)
+
+func RegisterUIRoutes(app *fiber.App,
+	cl *config.BackendConfigLoader,
+	ml *model.ModelLoader,
+	appConfig *config.ApplicationConfig,
+	galleryService *services.GalleryService,
+	auth func(*fiber.Ctx) error) {
+
+	// Show the Models page
+	app.Get("/browse", auth, func(c *fiber.Ctx) error {
+		models, _ := gallery.AvailableGalleryModels(appConfig.Galleries, appConfig.ModelPath)
+
+		summary := fiber.Map{
+			"Title":        "LocalAI - Models",
+			"Models":       template.HTML(elements.ListModels(models)),
+			"Repositories": appConfig.Galleries,
+			//	"ApplicationConfig": appConfig,
+		}
+
+		// Render index
+		return c.Render("views/models", summary)
+	})
+
+	// HTMX: return the model details
+	// https://htmx.org/examples/active-search/
+	app.Post("/browse/search/models", auth, func(c *fiber.Ctx) error {
+		form := struct {
+			Search string `form:"search"`
+		}{}
+		if err := c.BodyParser(&form); err != nil {
+			return c.Status(fiber.StatusBadRequest).SendString(err.Error())
+		}
+
+		models, _ := gallery.AvailableGalleryModels(appConfig.Galleries, appConfig.ModelPath)
+
+		filteredModels := []*gallery.GalleryModel{}
+		for _, m := range models {
+			if strings.Contains(m.Name, form.Search) ||
+				strings.Contains(m.Description, form.Search) ||
+				strings.Contains(m.Gallery.Name, form.Search) ||
+				strings.Contains(strings.Join(m.Tags, ","), form.Search) {
+				filteredModels = append(filteredModels, m)
+			}
+		}
+
+		return c.SendString(elements.ListModels(filteredModels))
+	})
+
+	// https://htmx.org/examples/progress-bar/
+	app.Post("/browse/install/model/:id", auth, func(c *fiber.Ctx) error {
+		galleryID := strings.Clone(c.Params("id")) // strings.Clone is required!
+
+		id, err := uuid.NewUUID()
+		if err != nil {
+			return err
+		}
+
+		uid := id.String()
+
+		op := gallery.GalleryOp{
+			Id:          uid,
+			GalleryName: galleryID,
+			Galleries:   appConfig.Galleries,
+		}
+		go func() {
+			galleryService.C <- op
+		}()
+
+		return c.SendString(elements.StartProgressBar(uid, "0"))
+	})
+
+	// https://htmx.org/examples/progress-bar/
+	app.Get("/browse/job/progress/:uid", auth, func(c *fiber.Ctx) error {
+		jobUID := c.Params("uid")
+
+		status := galleryService.GetStatus(jobUID)
+		if status == nil {
+			//fmt.Errorf("could not find any status for ID")
+			return c.SendString(elements.ProgressBar("0"))
+		}
+
+		if status.Progress == 100 {
+			c.Set("HX-Trigger", "done")
+			return c.SendString(elements.ProgressBar("100"))
+		}
+		if status.Error != nil {
+			return c.SendString(elements.ErrorProgress(status.Error.Error()))
+		}
+
+		return c.SendString(elements.ProgressBar(fmt.Sprint(status.Progress)))
+	})
+
+	app.Get("/browse/job/:uid", auth, func(c *fiber.Ctx) error {
+		return c.SendString(elements.DoneProgress(c.Params("uid")))
+	})
+}
--- a/core/http/routes/welcome.go
+++ b/core/http/routes/welcome.go
@@ -0,0 +1,19 @@
+package routes
+
+import (
+	"github.com/go-skynet/LocalAI/core/config"
+	"github.com/go-skynet/LocalAI/core/http/endpoints/localai"
+	"github.com/go-skynet/LocalAI/pkg/model"
+	"github.com/gofiber/fiber/v2"
+)
+
+func RegisterPagesRoutes(app *fiber.App,
+	cl *config.BackendConfigLoader,
+	ml *model.ModelLoader,
+	appConfig *config.ApplicationConfig,
+	auth func(*fiber.Ctx) error) {
+
+	if !appConfig.DisableWelcomePage {
+		app.Get("/", auth, localai.WelcomeEndpoint(appConfig, cl, ml))
+	}
+}
--- a/core/http/views/models.html
+++ b/core/http/views/models.html
@@ -0,0 +1,34 @@
+<!DOCTYPE html>
+<html lang="en">
+{{template "views/partials/head" .}}
+
+<body class="bg-gray-900 text-gray-200">
+<div class="flex flex-col min-h-screen">
+   
+    {{template "views/partials/navbar" .}}
+    <div class="container mx-auto px-4 flex-grow">
+
+        <div class="models mt-12">
+            <h2 class="text-center text-3xl font-semibold text-gray-100">
+                🖼️ Available models from <i>{{ len .Repositories }}</i> repositories     <a href="https://localai.io/models/" target="_blank" >
+                    <i class="fas fa-circle-info pr-2"></i>
+                </a></h2> 
+            
+        
+            <span class="htmx-indicator loader"></span>
+            <input class="form-control appearance-none block w-full px-3 py-2 text-base font-normal text-gray-300 pb-2 mb-5 bg-gray-800 bg-clip-padding border border-solid border-gray-600 rounded transition ease-in-out m-0 focus:text-gray-300 focus:bg-gray-900 focus:border-blue-500 focus:outline-none" type="search" 
+                name="search" placeholder="Begin Typing To Search models..." 
+                hx-post="/browse/search/models" 
+                hx-trigger="input changed delay:500ms, search" 
+                hx-target="#search-results" 
+                hx-indicator=".htmx-indicator">
+
+            <div id="search-results">{{.Models}}</div>
+        </div>
+    </div>
+
+    {{template "views/partials/footer" .}}
+</div>
+
+</body>
+</html>
--- a/core/http/views/partials/head.html
+++ b/core/http/views/partials/head.html
@@ -3,11 +3,76 @@
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>{{.Title}}</title>
    <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700&family=Roboto:wght@400;500&display=swap" rel="stylesheet">
-    <link href="https://cdn.jsdelivr.net/npm/tailwindcss@2.2.19/dist/tailwind.min.css" rel="stylesheet">
+    <link
+    href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700,900&display=swap"
+    rel="stylesheet" />
+  <link
+    rel="stylesheet"
+    href="https://cdn.jsdelivr.net/npm/tw-elements/css/tw-elements.min.css" />
+  <script src="https://cdn.tailwindcss.com/3.3.0"></script>
+  <script>
+    tailwind.config = {
+      darkMode: "class",
+      theme: {
+        fontFamily: {
+          sans: ["Roboto", "sans-serif"],
+          body: ["Roboto", "sans-serif"],
+          mono: ["ui-monospace", "monospace"],
+        },
+      },
+      corePlugins: {
+        preflight: false,
+      },
+    };
+  </script>
    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.1.1/css/all.min.css">
+    <script src="https://unpkg.com/htmx.org@1.9.12" integrity="sha384-ujb1lZYygJmzgSwoxRggbCHcjc0rB2XoQrxeTUQyRjrOnlCoYta87iKBWq3EsdM2" crossorigin="anonymous"></script>
    <style>
        body {
            font-family: 'Inter', sans-serif;
        }
+        /* Loader (https://cssloaders.github.io/) */
+        .loader {
+          width: 12px;
+          height: 12px;
+          border-radius: 50%;
+          display: block;
+          margin:15px auto;
+          position: relative;
+          color: #FFF;
+          box-sizing: border-box;
+          animation: animloader 2s linear infinite;
+        }
+
+        @keyframes animloader {
+          0% { box-shadow: 14px 0 0 -2px,  38px 0 0 -2px,  -14px 0 0 -2px,  -38px 0 0 -2px; }
+          25% { box-shadow: 14px 0 0 -2px,  38px 0 0 -2px,  -14px 0 0 -2px,  -38px 0 0 2px; }
+          50% { box-shadow: 14px 0 0 -2px,  38px 0 0 -2px,  -14px 0 0 2px,  -38px 0 0 -2px; }
+          75% { box-shadow: 14px 0 0 2px,  38px 0 0 -2px,  -14px 0 0 -2px,  -38px 0 0 -2px; }
+          100% { box-shadow: 14px 0 0 -2px,  38px 0 0 2px,  -14px 0 0 -2px,  -38px 0 0 -2px; }
+        }
+        .progress {
+            height: 20px;
+            margin-bottom: 20px;
+            overflow: hidden;
+            background-color: #f5f5f5;
+            border-radius: 4px;
+            box-shadow: inset 0 1px 2px rgba(0,0,0,.1);
+        }
+        .progress-bar {
+            float: left;
+            width: 0%;
+            height: 100%;
+            font-size: 12px;
+            line-height: 20px;
+            color: #fff;
+            text-align: center;
+            background-color: #337ab7;
+            -webkit-box-shadow: inset 0 -1px 0 rgba(0,0,0,.15);
+            box-shadow: inset 0 -1px 0 rgba(0,0,0,.15);
+            -webkit-transition: width .6s ease;
+            -o-transition: width .6s ease;
+            transition: width .6s ease;
+        }
    </style>
 </head>
--- a/core/http/views/partials/navbar.html
+++ b/core/http/views/partials/navbar.html
@@ -9,6 +9,7 @@
            <div>
                <a href="/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-home pr-2"></i>Home</a>
                <a href="https://localai.io" class="text-gray-400 hover:text-white px-3 py-2 rounded" target="_blank" ><i class="fas fa-book-reader pr-2"></i> Documentation</a>
+                <a href="/browse/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-brain pr-2"></i> Models</a>
                <a href="/swagger/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-code pr-2"></i> API</a>
            </div>
        </div>
--- a/core/schema/jina.go
+++ b/core/schema/jina.go
@@ -0,0 +1,34 @@
+package schema
+
+// RerankRequest defines the structure of the request payload
+type JINARerankRequest struct {
+	Model     string   `json:"model"`
+	Query     string   `json:"query"`
+	Documents []string `json:"documents"`
+	TopN      int      `json:"top_n"`
+}
+
+// DocumentResult represents a single document result
+type JINADocumentResult struct {
+	Index          int      `json:"index"`
+	Document       JINAText `json:"document"`
+	RelevanceScore float64  `json:"relevance_score"`
+}
+
+// Text holds the text of the document
+type JINAText struct {
+	Text string `json:"text"`
+}
+
+// RerankResponse defines the structure of the response payload
+type JINARerankResponse struct {
+	Model   string               `json:"model"`
+	Usage   JINAUsageInfo        `json:"usage"`
+	Results []JINADocumentResult `json:"results"`
+}
+
+// UsageInfo holds information about usage of tokens
+type JINAUsageInfo struct {
+	TotalTokens  int `json:"total_tokens"`
+	PromptTokens int `json:"prompt_tokens"`
+}
--- a/core/schema/openai.go
+++ b/core/schema/openai.go
@@ -3,7 +3,7 @@ package schema
 import (
 	"context"

-	"github.com/go-skynet/LocalAI/pkg/grammar"
+	functions "github.com/go-skynet/LocalAI/pkg/functions"
 )

 // APIError provides error information returned by the OpenAI API.
@@ -108,7 +108,7 @@ type ChatCompletionResponseFormat struct {
 type OpenAIRequest struct {
 	PredictionOptions

-	Context context.Context  `json:"-"`
+	Context context.Context    `json:"-"`
 	Cancel  context.CancelFunc `json:"-"`

 	// whisper
@@ -130,11 +130,11 @@ type OpenAIRequest struct {
 	Messages []Message `json:"messages" yaml:"messages"`

 	// A list of available functions to call
-	Functions    []grammar.Function `json:"functions" yaml:"functions"`
-	FunctionCall interface{}        `json:"function_call" yaml:"function_call"` // might be a string or an object
+	Functions    functions.Functions `json:"functions" yaml:"functions"`
+	FunctionCall interface{}         `json:"function_call" yaml:"function_call"` // might be a string or an object

-	Tools       []grammar.Tool `json:"tools,omitempty" yaml:"tools"`
-	ToolsChoice interface{}    `json:"tool_choice,omitempty" yaml:"tool_choice"`
+	Tools       []functions.Tool `json:"tools,omitempty" yaml:"tools"`
+	ToolsChoice interface{}      `json:"tool_choice,omitempty" yaml:"tool_choice"`

 	Stream bool `json:"stream"`

@@ -145,7 +145,7 @@ type OpenAIRequest struct {
 	// A grammar to constrain the LLM output
 	Grammar string `json:"grammar" yaml:"grammar"`

-	JSONFunctionGrammarObject *grammar.JSONFunctionStructure `json:"grammar_json_functions" yaml:"grammar_json_functions"`
+	JSONFunctionGrammarObject *functions.JSONFunctionStructure `json:"grammar_json_functions" yaml:"grammar_json_functions"`

 	Backend string `json:"backend" yaml:"backend"`

--- a/core/startup/config_file_watcher.go
+++ b/core/startup/config_file_watcher.go
@@ -5,6 +5,7 @@ import (
 	"fmt"
 	"os"
 	"path"
+	"time"

 	"github.com/fsnotify/fsnotify"
 	"github.com/go-skynet/LocalAI/core/config"
@@ -12,89 +13,157 @@ import (
 	"github.com/rs/zerolog/log"
 )

-type WatchConfigDirectoryCloser func() error
+type fileHandler func(fileContent []byte, appConfig *config.ApplicationConfig) error

-func ReadApiKeysJson(configDir string, appConfig *config.ApplicationConfig) error {
-	fileContent, err := os.ReadFile(path.Join(configDir, "api_keys.json"))
-	if err == nil {
-		// Parse JSON content from the file
-		var fileKeys []string
-		err := json.Unmarshal(fileContent, &fileKeys)
-		if err == nil {
-			appConfig.ApiKeys = append(appConfig.ApiKeys, fileKeys...)
-			return nil
-		}
-		return err
-	}
-	return err
+type configFileHandler struct {
+	handlers map[string]fileHandler
+
+	watcher *fsnotify.Watcher
+
+	configDir string
+	appConfig *config.ApplicationConfig
 }

-func ReadExternalBackendsJson(configDir string, appConfig *config.ApplicationConfig) error {
-	fileContent, err := os.ReadFile(path.Join(configDir, "external_backends.json"))
-	if err != nil {
-		return err
+// TODO: This should be a singleton eventually so other parts of the code can register config file handlers,
+// then we can export it to other packages
+func newConfigFileHandler(appConfig *config.ApplicationConfig) configFileHandler {
+	c := configFileHandler{
+		handlers:  make(map[string]fileHandler),
+		configDir: appConfig.DynamicConfigsDir,
+		appConfig: appConfig,
 	}
-	// Parse JSON content from the file
-	var fileBackends map[string]string
-	err = json.Unmarshal(fileContent, &fileBackends)
-	if err != nil {
-		return err
+	c.Register("api_keys.json", readApiKeysJson(*appConfig), true)
+	c.Register("external_backends.json", readExternalBackendsJson(*appConfig), true)
+	return c
+}
+
+func (c *configFileHandler) Register(filename string, handler fileHandler, runNow bool) error {
+	_, ok := c.handlers[filename]
+	if ok {
+		return fmt.Errorf("handler already registered for file %s", filename)
 	}
-	err = mergo.Merge(&appConfig.ExternalGRPCBackends, fileBackends)
-	if err != nil {
-		return err
+	c.handlers[filename] = handler
+	if runNow {
+		c.callHandler(path.Join(c.appConfig.DynamicConfigsDir, filename), handler)
 	}
 	return nil
 }

-var CONFIG_FILE_UPDATES = map[string]func(configDir string, appConfig *config.ApplicationConfig) error{
-	"api_keys.json":          ReadApiKeysJson,
-	"external_backends.json": ReadExternalBackendsJson,
+func (c *configFileHandler) callHandler(filename string, handler fileHandler) {
+	fileContent, err := os.ReadFile(filename)
+	if err != nil && !os.IsNotExist(err) {
+		log.Error().Err(err).Str("filename", filename).Msg("could not read file")
+	}
+
+	if err = handler(fileContent, c.appConfig); err != nil {
+		log.Error().Err(err).Msg("WatchConfigDirectory goroutine failed to update options")
+	}
 }

-func WatchConfigDirectory(configDir string, appConfig *config.ApplicationConfig) (WatchConfigDirectoryCloser, error) {
-	if len(configDir) == 0 {
-		return nil, fmt.Errorf("configDir blank")
-	}
+func (c *configFileHandler) Watch() error {
 	configWatcher, err := fsnotify.NewWatcher()
+	c.watcher = configWatcher
 	if err != nil {
-		log.Fatal().Msgf("Unable to create a watcher for the LocalAI Configuration Directory: %+v", err)
+		log.Fatal().Err(err).Str("configdir", c.configDir).Msg("wnable to create a watcher for configuration directory")
 	}
-	ret := func() error {
-		configWatcher.Close()
-		return nil
+
+	if c.appConfig.DynamicConfigsDirPollInterval > 0 {
+		log.Debug().Msg("Poll interval set, falling back to polling for configuration changes")
+		ticker := time.NewTicker(c.appConfig.DynamicConfigsDirPollInterval)
+		go func() {
+			for {
+				<-ticker.C
+				for file, handler := range c.handlers {
+					log.Debug().Str("file", file).Msg("polling config file")
+					c.callHandler(file, handler)
+				}
+			}
+		}()
 	}

 	// Start listening for events.
 	go func() {
 		for {
 			select {
-			case event, ok := <-configWatcher.Events:
+			case event, ok := <-c.watcher.Events:
 				if !ok {
 					return
 				}
-				if event.Has(fsnotify.Write) {
-					for targetName, watchFn := range CONFIG_FILE_UPDATES {
-						if event.Name == targetName {
-							err := watchFn(configDir, appConfig)
-							log.Warn().Msgf("WatchConfigDirectory goroutine for %s: failed to update options: %+v", targetName, err)
-						}
+				if event.Has(fsnotify.Write | fsnotify.Create | fsnotify.Remove) {
+					handler, ok := c.handlers[path.Base(event.Name)]
+					if !ok {
+						continue
 					}
+
+					c.callHandler(event.Name, handler)
 				}
-			case _, ok := <-configWatcher.Errors:
+			case err, ok := <-c.watcher.Errors:
+				log.Error().Err(err).Msg("config watcher error received")
 				if !ok {
 					return
 				}
-				log.Error().Err(err).Msg("error encountered while watching config directory")
 			}
 		}
 	}()

 	// Add a path.
-	err = configWatcher.Add(configDir)
+	err = c.watcher.Add(c.appConfig.DynamicConfigsDir)
 	if err != nil {
-		return ret, fmt.Errorf("unable to establish watch on the LocalAI Configuration Directory: %+v", err)
+		return fmt.Errorf("unable to establish watch on the LocalAI Configuration Directory: %+v", err)
 	}

-	return ret, nil
+	return nil
+}
+
+// TODO: When we institute graceful shutdown, this should be called
+func (c *configFileHandler) Stop() {
+	c.watcher.Close()
+}
+
+func readApiKeysJson(startupAppConfig config.ApplicationConfig) fileHandler {
+	handler := func(fileContent []byte, appConfig *config.ApplicationConfig) error {
+		log.Debug().Msg("processing api_keys.json")
+
+		if len(fileContent) > 0 {
+			// Parse JSON content from the file
+			var fileKeys []string
+			err := json.Unmarshal(fileContent, &fileKeys)
+			if err != nil {
+				return err
+			}
+
+			appConfig.ApiKeys = append(startupAppConfig.ApiKeys, fileKeys...)
+		} else {
+			appConfig.ApiKeys = startupAppConfig.ApiKeys
+		}
+		log.Debug().Msg("api keys loaded from api_keys.json")
+		return nil
+	}
+
+	return handler
+}
+
+func readExternalBackendsJson(startupAppConfig config.ApplicationConfig) fileHandler {
+	handler := func(fileContent []byte, appConfig *config.ApplicationConfig) error {
+		log.Debug().Msg("processing external_backends.json")
+
+		if len(fileContent) > 0 {
+			// Parse JSON content from the file
+			var fileBackends map[string]string
+			err := json.Unmarshal(fileContent, &fileBackends)
+			if err != nil {
+				return err
+			}
+			appConfig.ExternalGRPCBackends = startupAppConfig.ExternalGRPCBackends
+			err = mergo.Merge(&appConfig.ExternalGRPCBackends, &fileBackends)
+			if err != nil {
+				return err
+			}
+		} else {
+			appConfig.ExternalGRPCBackends = startupAppConfig.ExternalGRPCBackends
+		}
+		log.Debug().Msg("external backends loaded from external_backends.json")
+		return nil
+	}
+	return handler
 }
--- a/core/startup/startup.go
+++ b/core/startup/startup.go
@@ -10,18 +10,12 @@ import (
 	"github.com/go-skynet/LocalAI/pkg/assets"
 	"github.com/go-skynet/LocalAI/pkg/model"
 	pkgStartup "github.com/go-skynet/LocalAI/pkg/startup"
-	"github.com/rs/zerolog"
 	"github.com/rs/zerolog/log"
 )

 func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.ModelLoader, *config.ApplicationConfig, error) {
 	options := config.NewApplicationConfig(opts...)

-	zerolog.SetGlobalLevel(zerolog.InfoLevel)
-	if options.Debug {
-		zerolog.SetGlobalLevel(zerolog.DebugLevel)
-	}
-
 	log.Info().Msgf("Starting LocalAI using %d threads, with models path: %s", options.Threads, options.ModelPath)
 	log.Info().Msgf("LocalAI version: %s", internal.PrintableVersion())

@@ -125,6 +119,11 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode
 		}()
 	}

+	// Watch the configuration directory
+	// If the directory does not exist, we don't watch it
+	configHandler := newConfigFileHandler(options)
+	configHandler.Watch()
+
 	log.Info().Msg("core/startup process completed!")
 	return cl, ml, options, nil
 }
--- a/docs/content/docs/advanced/advanced-usage.md
+++ b/docs/content/docs/advanced/advanced-usage.md
@@ -402,6 +402,7 @@ In the help text below, BASEPATH is the location that local-ai is being executed
 | --upload-path | /tmp/localai/upload | Path to store uploads from files api | $LOCALAI_UPLOAD_PATH |
 | --config-path | /tmp/localai/config | | $LOCALAI_CONFIG_PATH |
 | --localai-config-dir | BASEPATH/configuration | Directory for dynamic loading of certain configuration files (currently api_keys.json and external_backends.json) | $LOCALAI_CONFIG_DIR |
+| --localai-config-dir-poll-interval |  | Typically the config path picks up changes automatically, but if your system has broken fsnotify events, set this to a time duration to poll the LocalAI Config Dir (example: 1m) | $LOCALAI_CONFIG_DIR_POLL_INTERVAL |
 | --models-config-file | STRING | YAML file containing a list of model backend configs | $LOCALAI_MODELS_CONFIG_FILE |

 #### Models Flags
--- a/docs/content/docs/features/GPU-acceleration.md
+++ b/docs/content/docs/features/GPU-acceleration.md
@@ -12,7 +12,7 @@ Section under construction
 This section contains instruction on how to use LocalAI with GPU acceleration.

 {{% alert icon="⚡" context="warning" %}}
-For accelleration for AMD or Metal HW there are no specific container images, see the [build]({{%relref "docs/getting-started/build#Acceleration" %}})
+For accelleration for AMD or Metal HW is still in development, for additional details see the [build]({{%relref "docs/getting-started/build#Acceleration" %}})
 {{% /alert %}}


@@ -110,6 +110,143 @@ llama_model_load_internal: total VRAM used: 1598 MB
 llama_init_from_file: kv self size  =  512.00 MB
 ```

+## ROCM(AMD) acceleration
+
+There are a limited number of tested configurations for ROCm systems however most newer deditated GPU consumer grade devices seem to be supported under the current ROCm6 implementation. 
+
+Due to the nature of ROCm it is best to run all implementations in containers as this limits the number of packages required for installation on host system, compatability and package versions for dependencies across all variations of OS must be tested independently if disired, please refer to the [build]({{%relref "docs/getting-started/build#Acceleration" %}}) documentation.
+
+### Requirements
+
+- `ROCm 6.x.x` compatible GPU/accelerator
+- OS: `Ubuntu` (22.04, 20.04), `RHEL` (9.3, 9.2, 8.9, 8.8), `SLES` (15.5, 15.4)
+- Installed to host: `amdgpu-dkms` and `rocm` >=6.0.0 as per ROCm documentation.
+
+### Recommendations
+
+- Do not use on a system running Wayland.
+- If running with Xorg do not use GPU assigned for compute for desktop rendering.
+- Ensure at least 100GB of free space on disk hosting container runtime and storing images prior to installation.
+
+### Limitations
+
+Ongoing verification testing of ROCm compatability with integrated backends.
+Please note the following list of verified backends and devices.
+
+### Verified 
+
+The devices in the following list have been tested with `hipblas` images running `ROCm 6.0.0`
+
+| Backend | Verified | Devices |
+| ---- | ---- | ---- |
+| llama.cpp | yes | Radeon VII (gfx906) |
+| diffusers | yes | Radeon VII (gfx906) |
+| piper | yes | Radeon VII (gfx906) |
+| whisper | no | none |
+| autogptq | no | none |
+| bark | no | none |
+| coqui | no | none |
+| transformers | no | none |
+| exllama | no | none |
+| exllama2 | no | none |
+| mamba | no | none |
+| petals | no | none |
+| sentencetransformers | no | none |
+| transformers-musicgen | no | none |
+| vall-e-x | no | none |
+| vllm | no | none |
+
+**You can help by expanding this list.**
+
+### System Prep
+
+1. Check your GPU LLVM target is compatible with the version of ROCm. This can be found in the [LLVM Docs](https://llvm.org/docs/AMDGPUUsage.html).
+2. Check which ROCm version is compatible with your LLVM target and your chosen OS (pay special attention to supported kernel versions). See the following for compatability for ([ROCm 6.0.0](https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.0.0/reference/system-requirements.html)) or ([ROCm 6.0.2](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/system-requirements.html))
+3. Install you chosen version of the `dkms` and `rocm` (it is recommended that the native package manager be used for this process for any OS as version changes are executed more easily via this method if updates are required). Take care to restart after installing `amdgpu-dkms` and before installing `rocm`, for details regarding this see the installation documentation for your chosen OS ([6.0.2](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/native-install/index.html) or [6.0.0](https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.0.0/how-to/native-install/index.html))
+4. Deploy. Yes it's that easy. 
+
+#### Setup Example (Docker/containerd)
+
+The following are examples of the ROCm specific configuration elements required.
+
+```yaml
+# docker-compose.yaml
+    # For full functionality select a non-'core' image, version locking the image is recommended for debug purposes.
+    image: quay.io/go-skynet/local-ai:master-aio-gpu-hipblas
+    environment:
+      - DEBUG=true
+      # If your gpu is not already included in the current list of default targets the following build details are required.
+      - REBUILD=true
+      - BUILD_TYPE=hipblas
+      - GPU_TARGETS=gfx906 # Example for Radeon VII
+    devices:
+      # AMD GPU only require the following devices be passed through to the container for offloading to occur.
+      - /dev/dri
+      - /dev/kfd
+```
+
+The same can also be executed as a `run` for your container runtime
+
+```
+docker run \
+ -e DEBUG=true \
+ -e REBUILD=true \
+ -e BUILD_TYPE=hipblas \
+ -e GPU_TARGETS=gfx906 \
+ --device /dev/dri \
+ --device /dev/kfd \
+ quay.io/go-skynet/local-ai:master-aio-gpu-hipblas
+```
+
+Please ensure to add all other required environment variables, port forwardings, etc to your `compose` file or `run` command.
+
+The rebuild process will take some time to complete when deploying these containers and it is recommended that you `pull` the image prior to deployment as depending on the version these images may be ~20GB in size.
+
+#### Example (k8s) (Advanced Deployment/WIP)
+
+For k8s deployments there is an additional step required before deployment, this is the deployment of the [ROCm/k8s-device-plugin](https://artifacthub.io/packages/helm/amd-gpu-helm/amd-gpu).
+For any k8s environment the documentation provided by AMD from the ROCm project should be successful. It is recommended that if you use rke2 or OpenShift that you deploy the SUSE or RedHat provided version of this resource to ensure compatability.
+After this has been completed the [helm chart from go-skynet](https://github.com/go-skynet/helm-charts) can be configured and deployed mostly un-edited.
+
+The following are details of the changes that should be made to ensure proper function.
+While these details may be configurable in the `values.yaml` development of this Helm chart is ongoing and is subject to change.
+
+The following details indicate the final state of the localai deployment relevant to GPU function.
+
+```yaml
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {NAME}-local-ai
+...
+spec:
+  ...
+  template:
+    ...
+    spec:
+      containers:
+        - env:
+            - name: HIP_VISIBLE_DEVICES
+              value: '0'
+              # This variable indicates the devices availible to container (0:device1 1:device2 2:device3) etc.
+              # For multiple devices (say device 1 and 3) the value would be equivelant to HIP_VISIBLE_DEVICES="0,2"
+              # Please take note of this when an iGPU is present in host system as compatability is not assured.
+          ...
+          resources:
+            limits:
+              amd.com/gpu: '1'
+            requests:
+              amd.com/gpu: '1'
+```
+
+This configuration has been tested on a 'custom' cluster managed by SUSE Rancher that was deployed on top of Ubuntu 22.04.4, certification of other configuration is ongoing and compatability is not gauranteed.
+
+### Notes
+
+- When installing the ROCM kernel driver on your system ensure that you are installing an equal or newer version that that which is currently implemented in LocalAI (6.0.0 at time of writing).
+- AMD documentation indicates that this will ensure functionality however your milage may vary depending on the GPU and distro you are using.
+- If you encounter an `Error 413` on attempting to upload an audio file or image for whisper or llava/bakllava on a k8s deployment, note that the ingress for your deployment may require the annontation `nginx.ingress.kubernetes.io/proxy-body-size: "25m"` to allow larger uploads. This may be included in future versions of the helm chart. 
+
 ## Intel acceleration (sycl)

 ### Requirements
--- a/docs/content/docs/features/text-generation.md
+++ b/docs/content/docs/features/text-generation.md
@@ -257,6 +257,10 @@ parameters:
 # swap_space: 2
 # Uncomment to specify the maximum length of a sequence (including prompt and output)
 # max_model_len: 32768
+# Uncomment and specify the number of Tensor divisions.
+# Allows you to partition and run large models. Performance gains are limited.
+# https://github.com/vllm-project/vllm/issues/1435
+# tensor_parallel_size: 2
 ```

 The backend will automatically download the required files in order to run the model.
@@ -356,4 +360,4 @@ template:

  completion: |
    {{.Input}}
-```
+```
--- a/docs/content/docs/integrations.md
+++ b/docs/content/docs/integrations.md
@@ -15,6 +15,7 @@ The list below is a list of software that integrates with LocalAI.
 - [AnythingLLM](https://github.com/Mintplex-Labs/anything-llm)
 - [Logseq GPT3 OpenAI plugin](https://github.com/briansunter/logseq-plugin-gpt3-openai) allows to set a base URL, and works with LocalAI.
 - https://plugins.jetbrains.com/plugin/21056-codegpt allows for custom OpenAI compatible endpoints since 2.4.0
+- [Wave Terminal](https://docs.waveterm.dev/features/supportedLLMs/localai) has native support for LocalAI!
 - https://github.com/longy2k/obsidian-bmo-chatbot
 - https://github.com/FlowiseAI/Flowise
 - https://github.com/k8sgpt-ai/k8sgpt
--- a/docs/content/docs/overview.md
+++ b/docs/content/docs/overview.md
@@ -56,7 +56,7 @@ icon = "info"



-**LocalAI** is the free, Open Source OpenAI alternative. LocalAI act as a drop-in replacement REST API that's compatible with OpenAI API specifications for local inferencing. It allows you to run LLMs, generate images, audio (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families and architectures. Does not require GPU. It is maintained by [mudler](https://github.com/mudler).
+**LocalAI** is the free, Open Source OpenAI alternative. LocalAI act as a drop-in replacement REST API that's compatible with OpenAI API specifications for local inferencing. It allows you to run LLMs, generate images, audio (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families and architectures. Does not require GPU. It is created and maintained by [Ettore Di Giacinto](https://github.com/mudler).


 ## Start LocalAI
--- a/docs/content/docs/reference/aio-images.md
+++ b/docs/content/docs/reference/aio-images.md
@@ -9,13 +9,14 @@ All-In-One images are images that come pre-configured with a set of models and b

 In the AIO images there are models configured with the names of OpenAI models, however, they are really backed by Open Source models. You can find the table below

-| Category | Model name | Real model |
-| Text Generation | `gpt-4` | `phi-2`(CPU) or `hermes-2-pro-mistral`(GPU) |
-| Multimodal | `gpt-4-vision-preview` | `bakllava`(CPU) or `llava-1.6-mistral`(GPU) |
-| Text generation | `stablediffusion` | `stablediffusion`(CPU) `dreamshaper-8` (GPU) |
-| Audio transcription | `whisper-1` | `whisper` with the `whisper-base` model |
-| Text to Audio | `tts-1` | the `en-us-amy-low.onnx` model with `rhasspy` |
-| Embeddings | `text-embedding-ada-002` | | 
+| Category | Model name | Real model (CPU) | Real model (GPU) |
+| ---- | ---- | ---- | ---- |
+| Text Generation | `gpt-4` | `phi-2` | `hermes-2-pro-mistral` |
+| Multimodal Vision | `gpt-4-vision-preview` | `bakllava` | `llava-1.6-mistral` |
+| Image Generation | `stablediffusion` | `stablediffusion` | `dreamshaper-8` |
+| Speech to Text | `whisper-1` | `whisper` with `whisper-base` model | <= same |
+| Text to Speech | `tts-1` | `en-us-amy-low.onnx` from `rhasspy/piper` | <= same |
+| Embeddings | `text-embedding-ada-002` | `all-MiniLM-L6-v2` in Q4 | `all-MiniLM-L6-v2` |

 ## Usage

--- a/embedded/models/hermes-2-pro-mistral.yaml
+++ b/embedded/models/hermes-2-pro-mistral.yaml
@@ -6,15 +6,22 @@ parameters:
 template:
  chat_message: |
    <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
-    {{- if .FunctionCall }}<tool_call>{{end}}
-    {{- if eq .RoleName "tool" }}<tool_result>{{end }}
-    {{- if .Content}}
-    {{.Content}}
+    {{- if .FunctionCall }}
+    <tool_call>
+    {{- else if eq .RoleName "tool" }}
+    <tool_response>
    {{- end }}
-    {{- if .FunctionCall}}{{toJson .FunctionCall}}{{end }}
-    {{- if .FunctionCall }}</tool_call>{{end }}
-    {{- if eq .RoleName "tool" }}</tool_result>{{end }}
-    <|im_end|>
+    {{- if .Content}}
+    {{.Content }}
+    {{- end }}
+    {{- if .FunctionCall}}
+    {{toJson .FunctionCall}}
+    {{- end }}
+    {{- if .FunctionCall }}
+    </tool_call>
+    {{- else if eq .RoleName "tool" }}
+    </tool_response>
+    {{- end }}<|im_end|>
  # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
  function: |
    <|im_start|>system
@@ -29,8 +36,7 @@ template:
    For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
    <tool_call>
    {'arguments': <args-dict>, 'name': <function-name>}
-    </tool_call>
-    <|im_end|>
+    </tool_call><|im_end|>
    {{.Input -}}
    <|im_start|>assistant
    <tool_call>
--- a/embedded/models/llama3-instruct.yaml
+++ b/embedded/models/llama3-instruct.yaml
@@ -0,0 +1,48 @@
+name: llama3-8b-instruct
+mmap: true
+parameters:
+  model: huggingface://second-state/Llama-3-8B-Instruct-GGUF/Meta-Llama-3-8B-Instruct-Q5_K_M.gguf
+
+template:
+  chat_message: |
+    <|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|>
+
+    {{ if .FunctionCall -}}
+    Function call:
+    {{ else if eq .RoleName "tool" -}}
+    Function response:
+    {{ end -}}
+    {{ if .Content -}}
+    {{.Content -}}
+    {{ else if .FunctionCall -}}
+    {{ toJson .FunctionCall -}}
+    {{ end -}}
+    <|eot_id|>
+  function: |
+    <|start_header_id|>system<|end_header_id|>
+
+    You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
+    <tools>
+    {{range .Functions}}
+    {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
+    {{end}}
+    </tools>
+    Use the following pydantic model json schema for each tool call you will make:
+    {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
+    Function call:
+  chat: |
+    <|begin_of_text|>{{.Input }}
+    <|start_header_id|>assistant<|end_header_id|>
+  completion: |
+    {{.Input}}
+context_size: 8192
+f16: true
+stopwords:
+- <|im_end|>
+- <dummy32000>
+- "<|eot_id|>"
+usage: |
+      curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
+          "model": "llama3-8b-instruct",
+          "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
+      }'
--- a/examples/langchain/langchainpy-localai-example/requirements.txt
+++ b/examples/langchain/langchainpy-localai-example/requirements.txt
@@ -1,4 +1,4 @@
-aiohttp==3.9.2
+aiohttp==3.9.4
 aiosignal==1.3.1
 async-timeout==4.0.2
 attrs==23.1.0
@@ -20,7 +20,7 @@ numpy==1.24.3
 openai==0.27.6
 openapi-schema-pydantic==1.2.4
 packaging==23.1
-pydantic==1.10.7
+pydantic==1.10.13
 PyYAML==6.0
 requests==2.31.0
 SQLAlchemy==2.0.12
--- a/gallery/bert-embeddings.yaml
+++ b/gallery/bert-embeddings.yaml
@@ -0,0 +1,11 @@
+name: "bert-embeddings"
+
+config_file: |
+    parameters:
+      model: bert-MiniLM-L6-v2q4_0.bin
+    backend: bert-embeddings
+    embeddings: true
+files:
+- filename: "bert-MiniLM-L6-v2q4_0.bin"
+  sha256: "a5a174d8772c8a569faf9f3136c441f2c3855b5bf35ed32274294219533feaad"
+  uri: "https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin"
--- a/gallery/cerbero.yaml
+++ b/gallery/cerbero.yaml
@@ -0,0 +1,19 @@
+config_file: |
+  backend: llama-cpp
+  context_size: 8192
+  f16: false
+  name: cerbero
+
+  template:
+    completion: "{{.Input}}"
+    chat: "Questa è una conversazione tra un umano ed un assistente AI.\n{{.Input}}\n[|Assistente|]  "
+  roles:
+    user: "[|Umano|] "
+    system: "[|Umano|] "
+    assistant: "[|Assistente|] "
+
+  stopwords:
+  - "[|Umano|]"
+
+  trimsuffix: 
+  - "\n"
--- a/gallery/codellama.yaml
+++ b/gallery/codellama.yaml
@@ -0,0 +1,7 @@
+name: "codellama"
+
+config_file: |
+  backend: llama-cpp
+  context_size: 4096
+  f16: true
+  mmap: true
--- a/gallery/dreamshaper.yaml
+++ b/gallery/dreamshaper.yaml
@@ -0,0 +1,13 @@
+name: "dreamshaper"
+
+
+config_file: |
+  backend: diffusers
+  step: 25
+  f16: true
+
+  diffusers:
+    pipeline_type: StableDiffusionPipeline
+    cuda: true
+    enable_parameters: "negative_prompt,num_inference_steps"
+    scheduler_type: "k_dpmpp_2m"
--- a/gallery/hermes-2-pro-mistral.yaml
+++ b/gallery/hermes-2-pro-mistral.yaml
@@ -0,0 +1,55 @@
+name: "hermes-2-pro-mistral"
+
+
+config_file: |
+  mmap: true
+  template:
+    chat_message: |
+      <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
+      {{- if .FunctionCall }}
+      <tool_call>
+      {{- else if eq .RoleName "tool" }}
+      <tool_response>
+      {{- end }}
+      {{- if .Content}}
+      {{.Content }}
+      {{- end }}
+      {{- if .FunctionCall}}
+      {{toJson .FunctionCall}}
+      {{- end }}
+      {{- if .FunctionCall }}
+      </tool_call>
+      {{- else if eq .RoleName "tool" }}
+      </tool_response>
+      {{- end }}<|im_end|>
+    # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
+    function: |
+      <|im_start|>system
+      You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
+      <tools>
+      {{range .Functions}}
+      {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
+      {{end}}
+      </tools>
+      Use the following pydantic model json schema for each tool call you will make:
+      {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}
+      For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
+      <tool_call>
+      {'arguments': <args-dict>, 'name': <function-name>}
+      </tool_call><|im_end|>
+      {{.Input -}}
+      <|im_start|>assistant
+      <tool_call>
+    chat: |
+      {{.Input -}}
+      <|im_start|>assistant
+    completion: |
+      {{.Input}}
+  context_size: 4096
+  f16: true
+  stopwords:
+  - <|im_end|>
+  - <dummy32000>
+  - "\n</tool_call>"
+  - "\n\n\n"
+
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
--- a/gallery/llama3-instruct.yaml
+++ b/gallery/llama3-instruct.yaml
@@ -0,0 +1,43 @@
+name: "llama3-instruct"
+
+
+config_file: |
+  mmap: true
+  template:
+    chat_message: |
+      <|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|>
+
+      {{ if .FunctionCall -}}
+      Function call:
+      {{ else if eq .RoleName "tool" -}}
+      Function response:
+      {{ end -}}
+      {{ if .Content -}}
+      {{.Content -}}
+      {{ else if .FunctionCall -}}
+      {{ toJson .FunctionCall -}}
+      {{ end -}}
+      <|eot_id|>
+    function: |
+      <|start_header_id|>system<|end_header_id|>
+
+      You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
+      <tools>
+      {{range .Functions}}
+      {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
+      {{end}}
+      </tools>
+      Use the following pydantic model json schema for each tool call you will make:
+      {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
+      Function call:
+    chat: |
+      <|begin_of_text|>{{.Input }}
+      <|start_header_id|>assistant<|end_header_id|>
+    completion: |
+      {{.Input}}
+  context_size: 8192
+  f16: true
+  stopwords:
+  - <|im_end|>
+  - <dummy32000>
+  - "<|eot_id|>"
--- a/gallery/llava.yaml
+++ b/gallery/llava.yaml
@@ -0,0 +1,19 @@
+name: "llava"
+
+
+config_file: |
+  backend: llama-cpp
+  context_size: 4096
+  f16: true
+
+  mmap: true
+  roles:
+    user: "USER:"
+    assistant: "ASSISTANT:"
+    system: "SYSTEM:"
+
+  template:
+    chat: |
+      A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
+      {{.Input}}
+      ASSISTANT:
--- a/gallery/noromaid.yaml
+++ b/gallery/noromaid.yaml
@@ -0,0 +1,53 @@
+config_file: |
+  mmap: true
+  backend: llama-cpp
+  template:
+    chat_message: |
+      <|im_{{if eq .RoleName "assistant"}}bot{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}|>
+      {{- if .FunctionCall }}
+      <tool_call>
+      {{- else if eq .RoleName "tool" }}
+      <tool_response>
+      {{- end }}
+      {{- if .Content}}
+      {{.Content }}
+      {{- end }}
+      {{- if .FunctionCall}}
+      {{toJson .FunctionCall}}
+      {{- end }}
+      {{- if .FunctionCall }}
+      </tool_call>
+      {{- else if eq .RoleName "tool" }}
+      </tool_response>
+      {{- end }}<|im_end|>
+    # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
+    function: |
+      <|im_system|>
+      You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
+      <tools>
+      {{range .Functions}}
+      {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
+      {{end}}
+      </tools>
+      Use the following pydantic model json schema for each tool call you will make:
+      {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}
+      For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
+      <tool_call>
+      {'arguments': <args-dict>, 'name': <function-name>}
+      </tool_call><|im_end|>
+      {{.Input -}}
+      <|im_bot|>
+      <tool_call>
+    chat: |
+      {{.Input -}}
+      <|im_bot|>
+    completion: |
+      {{.Input}}
+  context_size: 4096
+  f16: true
+  stopwords:
+  - <|im_end|>
+  - <dummy32000>
+  - "\n</tool_call>"
+  - "\n\n\n"
+
--- a/gallery/parler-tts.yaml
+++ b/gallery/parler-tts.yaml
@@ -0,0 +1,2 @@
+config_file: |
+  backend: parler-tts
--- a/gallery/phi-2-chat.yaml
+++ b/gallery/phi-2-chat.yaml
@@ -0,0 +1,19 @@
+name: "phi-2-chatml"
+
+
+config_file: |
+  mmap: true
+  template:
+    chat_message: |
+      <|im_start|>{{ .RoleName }}
+      {{.Content}}<|im_end|>
+    chat: |
+      {{.Input}}
+      <|im_start|>assistant
+    completion: |
+      {{.Input}}
+  context_size: 4096
+  f16: true
+  stopwords:
+  - <|im_end|>
+
--- a/gallery/phi-2-orange.yaml
+++ b/gallery/phi-2-orange.yaml
@@ -0,0 +1,18 @@
+name: "phi-2-orange"
+
+config_file: |
+  mmap: true
+  template:
+    chat_message: |
+      <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}}
+      {{if .Content}}{{.Content}}{{end}}<|im_end|>
+    chat: |
+      {{.Input}}
+      <|im_start|>assistant
+    completion: |
+      {{.Input}}
+  context_size: 4096
+  f16: true
+  stopwords:
+  - <|im_end|>
+  - <dummy32000>
--- a/gallery/phi-3-chat.yaml
+++ b/gallery/phi-3-chat.yaml
@@ -0,0 +1,18 @@
+name: "phi-3-chat"
+
+config_file: |
+  mmap: true
+  template:
+    chat_message: |
+      <|{{ .RoleName }}|>
+      {{.Content}}<|end|>
+    chat: |
+      {{.Input}}
+      <|assistant|>
+    completion: |
+      {{.Input}}
+  context_size: 4096
+  f16: true
+  stopwords:
+  - <|end|>
+
--- a/gallery/piper.yaml
+++ b/gallery/piper.yaml
@@ -0,0 +1,2 @@
+config_file: |
+  backend: piper
--- a/gallery/rerankers.yaml
+++ b/gallery/rerankers.yaml
@@ -0,0 +1,2 @@
+config_file: |
+  backend: rerankers
--- a/gallery/sentencetransformers.yaml
+++ b/gallery/sentencetransformers.yaml
@@ -0,0 +1,4 @@
+name: "sentencetransformers"
+
+config_file: |
+  backend: sentencetransformers
--- a/gallery/stablediffusion.yaml
+++ b/gallery/stablediffusion.yaml
@@ -0,0 +1,48 @@
+name: "stablediffusion-cpp"
+
+config_file: |
+  name: stablediffusion-cpp
+  backend: stablediffusion
+  parameters:
+    model: stablediffusion_assets
+
+files:
+- filename: "stablediffusion_assets/AutoencoderKL-256-256-fp16-opt.param"
+  sha256: "18ca4b66685e21406bcf64c484b3b680b4949900415536d599cc876579c85c82"
+  uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-256-256-fp16-opt.param"
+- filename: "stablediffusion_assets/AutoencoderKL-512-512-fp16-opt.param"
+  sha256: "cf45f63aacf3dbbab0f59ed92a6f2c14d9a1801314631cd3abe91e3c85639a20"
+  uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-512-512-fp16-opt.param"
+- filename: "stablediffusion_assets/AutoencoderKL-base-fp16.param"
+  sha256: "0254a056dce61b0c27dc9ec1b78b53bcf55315c540f55f051eb841aa992701ba"
+  uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-base-fp16.param"
+- filename: "stablediffusion_assets/AutoencoderKL-encoder-512-512-fp16.bin"
+  sha256: "ddcb79a9951b9f91e05e087739ed69da2c1c4ae30ba4168cce350b49d617c9fa"
+  uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/AutoencoderKL-encoder-512-512-fp16.bin"
+- filename: "stablediffusion_assets/AutoencoderKL-fp16.bin"
+  sha256: "f02e71f80e70252734724bbfaed5c4ddd3a8ed7e61bb2175ff5f53099f0e35dd"
+  uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/AutoencoderKL-fp16.bin"
+- filename: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.bin"
+  sha256: "1c9a12f4e1dd1b295a388045f7f28a2352a4d70c3dc96a542189a3dd7051fdd6"
+  uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/FrozenCLIPEmbedder-fp16.bin"
+- filename: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.param"
+  sha256: "471afbe678dd1fd3fe764ef9c6eccaccb0a7d7e601f27b462aa926b20eb368c9"
+  uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/FrozenCLIPEmbedder-fp16.param"
+- filename: "stablediffusion_assets/log_sigmas.bin"
+  sha256: "a2089f8aa4c61f9c200feaec541ab3f5c94233b28deb6d5e8bcd974fa79b68ac"
+  uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/raw/main/x86/linux/assets/log_sigmas.bin"
+- filename: "stablediffusion_assets/UNetModel-256-256-MHA-fp16-opt.param"
+  sha256: "a58c380229f09491776df837b7aa7adffc0a87821dc4708b34535da2e36e3da1"
+  uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-256-256-MHA-fp16-opt.param"
+- filename: "stablediffusion_assets/UNetModel-512-512-MHA-fp16-opt.param"
+  sha256: "f12034067062827bd7f43d1d21888d1f03905401acf6c6eea22be23c259636fa"
+  uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-512-512-MHA-fp16-opt.param"
+- filename: "stablediffusion_assets/UNetModel-base-MHA-fp16.param"
+  sha256: "696f6975de49f4325b53ce32aff81861a6d6c07cd9ce3f0aae2cc405350af38d"
+  uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-base-MHA-fp16.param"
+- filename: "stablediffusion_assets/UNetModel-MHA-fp16.bin"
+  sha256: "d618918d011bfc1f644c0f2a33bf84931bd53b28a98492b0a8ed6f3a818852c3"
+  uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/UNetModel-MHA-fp16.bin"
+- filename: "stablediffusion_assets/vocab.txt"
+  sha256: "e30e57b6f1e47616982ef898d8922be24e535b4fa3d0110477b3a6f02ebbae7d"
+  uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/vocab.txt"
--- a/gallery/tinydream.yaml
+++ b/gallery/tinydream.yaml
@@ -0,0 +1,36 @@
+name: "tinydream"
+
+config_file: |
+  name: tinydream
+  backend: tinydream
+  parameters:
+    model: tinydream_assets
+
+files:
+  - filename: "tinydream_assets/AutoencoderKL-fp16.bin"
+    sha256: "f02e71f80e70252734724bbfaed5c4ddd3a8ed7e61bb2175ff5f53099f0e35dd"
+    uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/AutoencoderKL-fp16.bin"
+  - filename: "tinydream_assets/AutoencoderKL-fp16.param"
+    sha256: "0254a056dce61b0c27dc9ec1b78b53bcf55315c540f55f051eb841aa992701ba"
+    uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/AutoencoderKL-fp16.param"
+  - filename: "tinydream_assets/FrozenCLIPEmbedder-fp16.bin"
+    sha256: "1c9a12f4e1dd1b295a388045f7f28a2352a4d70c3dc96a542189a3dd7051fdd6"
+    uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/FrozenCLIPEmbedder-fp16.bin"
+  - filename: "tinydream_assets/FrozenCLIPEmbedder-fp16.param"
+    sha256: "471afbe678dd1fd3fe764ef9c6eccaccb0a7d7e601f27b462aa926b20eb368c9"
+    uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/FrozenCLIPEmbedder-fp16.param"
+  - filename: "tinydream_assets/RealESRGAN_x4plus_anime.bin"
+    sha256: "fe01c269cfd10cdef8e018ab66ebe750cf79c7af4d1f9c16c737e1295229bacc"
+    uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/RealESRGAN_x4plus_anime.bin"
+  - filename: "tinydream_assets/RealESRGAN_x4plus_anime.param"
+    sha256: "2b8fb6e0ae4d2d85704ca08c119a2f5ea40add4f2ecd512eb7f4cd44b6127ed4"
+    uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/RealESRGAN_x4plus_anime.param"
+  - filename: "tinydream_assets/UNetModel-fp16.bin"
+    sha256: "d618918d011bfc1f644c0f2a33bf84931bd53b28a98492b0a8ed6f3a818852c3"
+    uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/UNetModel-fp16.bin"
+  - filename: "tinydream_assets/UNetModel-fp16.param"
+    sha256: "696f6975de49f4325b53ce32aff81861a6d6c07cd9ce3f0aae2cc405350af38d"
+    uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/UNetModel-fp16.param"
+  - filename: "tinydream_assets/vocab.txt"
+    sha256: "e30e57b6f1e47616982ef898d8922be24e535b4fa3d0110477b3a6f02ebbae7d"
+    uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/vocab.txt"
--- a/gallery/vicuna-chat.yaml
+++ b/gallery/vicuna-chat.yaml
@@ -0,0 +1,21 @@
+name: "vicuna-chat"
+
+description: |
+     Vicuna chat
+
+license: "LLaMA"
+
+config_file: |
+    backend: llama-cpp
+    context_size: 4096
+    roles:
+      user: "User: "
+      system: "System: "
+      assistant: "Assistant: "
+    f16: true
+    template:
+      completion: |
+        Complete the following sentence: {{.Input}}
+      chat: |
+          {{.Input}}
+          ASSISTANT:
--- a/gallery/virtual.yaml
+++ b/gallery/virtual.yaml
@@ -0,0 +1,6 @@
+name: "virtual"
+
+description: |
+    A Base model definition
+
+license: "N/A"
--- a/gallery/whisper-base.yaml
+++ b/gallery/whisper-base.yaml
@@ -0,0 +1,12 @@
+name: "whisper-base"
+
+    
+config_file: |
+  backend: whisper
+  parameters:
+    model: ggml-whisper-base.bin
+
+files:
+- filename: "ggml-whisper-base.bin"
+  sha256: "60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe"
+  uri: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin"
--- a/go.mod
+++ b/go.mod
@@ -1,6 +1,8 @@
 module github.com/go-skynet/LocalAI

-go 1.21
+go 1.21.1
+
+toolchain go1.22.2

 require (
 	github.com/M0Rf30/go-tiny-dream v0.0.0-20231128165230-772a9c0d9aaf
@@ -29,7 +31,7 @@ require (
 	github.com/otiai10/openaigo v1.6.0
 	github.com/phayes/freeport v0.0.0-20220201140144-74d24b5ae9f5
 	github.com/prometheus/client_golang v1.17.0
-	github.com/rs/zerolog v1.31.0
+	github.com/rs/zerolog v1.32.0
 	github.com/russross/blackfriday v1.6.0
 	github.com/sashabaranov/go-openai v1.20.4
 	github.com/schollz/progressbar/v3 v3.13.1
@@ -71,6 +73,7 @@ require (
 	github.com/beorn7/perks v1.0.1 // indirect
 	github.com/cenkalti/backoff/v4 v4.1.3 // indirect
 	github.com/cespare/xxhash/v2 v2.2.0 // indirect
+	github.com/chasefleming/elem-go v0.25.0 // indirect
 	github.com/containerd/continuity v0.3.0 // indirect
 	github.com/davecgh/go-spew v1.1.1 // indirect
 	github.com/dlclark/regexp2 v1.8.1 // indirect
@@ -145,6 +148,7 @@ require (
 	github.com/go-audio/riff v1.0.0 // indirect
 	github.com/go-logr/logr v1.2.4 // indirect
 	github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect
+	github.com/gofiber/contrib/fiberzerolog v1.0.0
 	github.com/google/go-cmp v0.6.0 // indirect
 	github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 // indirect
 	github.com/hashicorp/errwrap v1.0.0 // indirect
--- a/go.sum
+++ b/go.sum
@@ -37,6 +37,8 @@ github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj
 github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
 github.com/charmbracelet/glamour v0.7.0 h1:2BtKGZ4iVJCDfMF229EzbeR1QRKLWztO9dMtjmqZSng=
 github.com/charmbracelet/glamour v0.7.0/go.mod h1:jUMh5MeihljJPQbJ/wf4ldw2+yBP59+ctV36jASy7ps=
+github.com/chasefleming/elem-go v0.25.0 h1:LYzr1auk39Bh3bdKloArOFV7sOBnOfSOKxsg58eWL0Q=
+github.com/chasefleming/elem-go v0.25.0/go.mod h1:hz73qILBIKnTgOujnSMtEj20/epI+f6vg71RUilJAA4=
 github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
 github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
 github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
@@ -100,6 +102,8 @@ github.com/go-task/slim-sprig v0.0.0-20210107165309-348f09dbbbc0/go.mod h1:fyg78
 github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI=
 github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls=
 github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
+github.com/gofiber/contrib/fiberzerolog v1.0.0 h1:IB8q+NO2zPNS4VHKde1x5DqtMJ5vGrvDCydnAjlFw3E=
+github.com/gofiber/contrib/fiberzerolog v1.0.0/go.mod h1:SOi+Wo7RQlO/HV0jsYTu6uFQy+8ZPTzCZW4fDEKD3l8=
 github.com/gofiber/fiber/v2 v2.52.4 h1:P+T+4iK7VaqUsq2PALYEfBBo6bJZ4q3FP8cZ84EggTM=
 github.com/gofiber/fiber/v2 v2.52.4/go.mod h1:KEOE+cXMhXG0zHc9d8+E38hoX+ZN7bhOtgeF2oT6jrQ=
 github.com/gofiber/swagger v1.0.0 h1:BzUzDS9ZT6fDUa692kxmfOjc1DZiloLiPK/W5z1H1tc=
@@ -281,6 +285,8 @@ github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUz
 github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg=
 github.com/rs/zerolog v1.31.0 h1:FcTR3NnLWW+NnTwwhFWiJSZr4ECLpqCm6QsEnyvbV4A=
 github.com/rs/zerolog v1.31.0/go.mod h1:/7mN4D5sKwJLZQ2b/znpjC3/GQWY/xaDXUM0kKWRHss=
+github.com/rs/zerolog v1.32.0 h1:keLypqrlIjaFsbmJOBdB/qvyF8KEtCWHwobLp5l/mQ0=
+github.com/rs/zerolog v1.32.0/go.mod h1:/7mN4D5sKwJLZQ2b/znpjC3/GQWY/xaDXUM0kKWRHss=
 github.com/russross/blackfriday v1.6.0 h1:KqfZb0pUVN2lYqZUYRddxF4OR8ZMURnJIG5Y3VRLtww=
 github.com/russross/blackfriday v1.6.0/go.mod h1:ti0ldHuxg49ri4ksnFxlkCfN+hvslNlmVHqNRXXJNAY=
 github.com/sashabaranov/go-openai v1.20.4 h1:095xQ/fAtRa0+Rj21sezVJABgKfGPNbyx/sAN/hJUmg=
--- a/main.go
+++ b/main.go
@@ -72,6 +72,7 @@ Version: ${version}
 		kong.Vars{
 			"basepath":         kong.ExpandPath("."),
 			"remoteLibraryURL": "https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/model_library.yaml",
+			"galleries":        `[{"name":"localai", "url":"github:mudler/LocalAI/gallery/index.yaml@master"}]`,
 			"version":          internal.PrintableVersion(),
 		},
 	)
@@ -91,17 +92,20 @@ Version: ${version}

 	switch *cli.CLI.LogLevel {
 	case "error":
-		log.Info().Msg("Setting logging to error")
 		zerolog.SetGlobalLevel(zerolog.ErrorLevel)
+		log.Info().Msg("Setting logging to error")
 	case "warn":
-		log.Info().Msg("Setting logging to warn")
 		zerolog.SetGlobalLevel(zerolog.WarnLevel)
+		log.Info().Msg("Setting logging to warn")
 	case "info":
-		log.Info().Msg("Setting logging to info")
 		zerolog.SetGlobalLevel(zerolog.InfoLevel)
+		log.Info().Msg("Setting logging to info")
 	case "debug":
-		log.Info().Msg("Setting logging to debug")
 		zerolog.SetGlobalLevel(zerolog.DebugLevel)
+		log.Debug().Msg("Setting logging to debug")
+	case "trace":
+		zerolog.SetGlobalLevel(zerolog.TraceLevel)
+		log.Trace().Msg("Setting logging to trace")
 	}

 	// Populate the application with the embedded backend assets
--- a/pkg/downloader/progress.go
+++ b/pkg/downloader/progress.go
@@ -5,6 +5,8 @@ import "hash"
 type progressWriter struct {
 	fileName       string
 	total          int64
+	fileNo         int
+	totalFiles     int
 	written        int64
 	downloadStatus func(string, string, string, float64)
 	hash           hash.Hash
@@ -16,6 +18,17 @@ func (pw *progressWriter) Write(p []byte) (n int, err error) {

 	if pw.total > 0 {
 		percentage := float64(pw.written) / float64(pw.total) * 100
+		if pw.totalFiles > 1 {
+			// This is a multi-file download
+			// so we need to adjust the percentage
+			// to reflect the progress of the whole download
+			// This is the file pw.fileNo of pw.totalFiles files. We assume that
+			// the files before successfully downloaded.
+			percentage = percentage / float64(pw.totalFiles)
+			if pw.fileNo > 1 {
+				percentage += float64(pw.fileNo-1) * 100 / float64(pw.totalFiles)
+			}
+		}
 		//log.Debug().Msgf("Downloading %s: %s/%s (%.2f%%)", pw.fileName, formatBytes(pw.written), formatBytes(pw.total), percentage)
 		pw.downloadStatus(pw.fileName, formatBytes(pw.written), formatBytes(pw.total), percentage)
 	} else {
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Dave	2c97440be5	fix: newline in virtual.yaml Stupid one line fix, but it will fix CI Signed-off-by: Dave <dave@gray101.com>	2024-04-25 10:39:07 -04:00
LocalAI [bot]	4ae4e44506	feat(swagger): update swagger (#2128 ) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2024-04-25 16:10:08 +02:00
Ettore Di Giacinto	2ada13b1ad	models(gallery): add more models (#2129 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-04-25 16:06:18 +02:00
Ettore Di Giacinto	5d170e9264	Update yaml-check.yml Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>	2024-04-25 16:05:02 +02:00
Ettore Di Giacinto	1b0a64aa46	Update yaml-check.yml Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>	2024-04-25 15:57:06 +02:00
Ettore Di Giacinto	aa8e1c63d5	Create yaml-check.yml Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>	2024-04-25 15:52:52 +02:00
Ettore Di Giacinto	60690c9fc4	ci: add swagger pipeline Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-04-25 15:11:01 +02:00
dependabot[bot]	758b0c9042	build(deps): bump pydantic from 1.10.7 to 1.10.13 in /examples/langchain/langchainpy-localai-example in the pip group across 1 directory (#2125 ) build(deps): bump pydantic Bumps the pip group with 1 update in the /examples/langchain/langchainpy-localai-example directory: [pydantic](https://github.com/pydantic/pydantic). Updates `pydantic` from 1.10.7 to 1.10.13 - [Release notes](https://github.com/pydantic/pydantic/releases) - [Changelog](https://github.com/pydantic/pydantic/blob/main/HISTORY.md) - [Commits](https://github.com/pydantic/pydantic/compare/v1.10.7...v1.10.13) --- updated-dependencies: - dependency-name: pydantic dependency-type: direct:production dependency-group: pip ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2024-04-25 06:49:29 +00:00
Ettore Di Giacinto	48d0aa2f6d	models(gallery): add new models to the gallery (#2124 ) * models: add reranker and parler-tts-mini Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix: chatml im_end should not have a newline Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * models(noromaid): add Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * models(llama3): add 70b, add dolphin2.9 Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * models(llama3): add unholy-8b Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * models(llama3): add therapyllama3, aura Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-04-25 01:28:02 +02:00
Ettore Di Giacinto	b664edde29	feat(rerankers): Add new backend, support jina rerankers API (#2121 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-04-25 00:19:02 +02:00
LocalAI [bot]	e16658b7ec	⬆️ Update ggerganov/llama.cpp (#2123 ) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2024-04-24 22:00:17 +00:00
LocalAI [bot]	d30280ed23	⬆️ Update ggerganov/whisper.cpp (#2122 ) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2024-04-24 21:55:30 +00:00
Ettore Di Giacinto	9dbd217c59	docs(integrations): add Wave terminal Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>	2024-04-24 19:56:51 +02:00
Ettore Di Giacinto	23eac98b3c	docs: update hot topics Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>	2024-04-24 19:43:07 +02:00
Ettore Di Giacinto	4fffc47e77	deps(llama.cpp): update, use better model for function call tests (#2119 ) deps(llama.cpp): update Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-04-24 18:44:04 +02:00
LocalAI [bot]	d65214a234	⬆️ Update docs version mudler/LocalAI (#2113 ) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2024-04-24 11:11:41 +02:00
jtwolfe	2fb34b00b5	Incl ocv pkg for diffsusers utils (#2115 ) * Update diffusers.yml Signed-off-by: jtwolfe <jamie.t.wolfe@gmail.com> * Update diffusers-rocm.yml Signed-off-by: jtwolfe <jamie.t.wolfe@gmail.com> --------- Signed-off-by: jtwolfe <jamie.t.wolfe@gmail.com>	2024-04-24 09:17:49 +02:00
fakezeta	f718a391c0	fix missing TrustRemoteCode in OpenVINO model load (#2114 )	2024-04-24 00:45:37 +00:00
Ettore Di Giacinto	ac56ac2b2d	fix(gallery): show a fake image if no there is no icon (#2111 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-04-23 20:10:58 +02:00
Ettore Di Giacinto	34c3f563fd	fix(gallery): fixup dreamshaper icon Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-04-23 20:05:59 +02:00
Ettore Di Giacinto	d2bea6f9e3	fix(gallery): fixup hermes q8 entry Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-04-23 20:01:56 +02:00
Ettore Di Giacinto	a09fe1b9ba	fix(gallery): set margin for images Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-04-23 20:00:20 +02:00
Ettore Di Giacinto	55778b35ff	fix(gallery): move metadata where it belongs Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-04-23 19:47:45 +02:00
Ettore Di Giacinto	8b169f1dac	feat(gallery): add llama3, hermes, phi-3, and others (#2110 ) Also adds embeddings and llava models Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-04-23 19:35:45 +02:00
Ettore Di Giacinto	d344daf129	feat(models-ui): minor visual enhancements (#2109 ) Show image if present, URL, tags, and better display buttons Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-04-23 18:43:25 +02:00
cryptk	3411e072ca	Fix cleanup sonarqube findings (#2106 ) * fix: update dockerignore and gitignore to exclude sonarqube work dir Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * fix: remove useless equality check Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * fix: use sonarqube Dockerfile recommendations Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> --------- Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com>	2024-04-23 18:43:00 +02:00
fakezeta	8e36fe9b6f	Transformers Backend: max_tokens adherence to OpenAI API (#2108 ) max token adherence to OpenAI API improve adherence to OpenAI API when max tokens is omitted or equal to 0 in the request	2024-04-23 18:42:17 +02:00
Ettore Di Giacinto	0d8bf91699	feat: Galleries UI (#2104 ) * WIP: add models to webui Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Register routes Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix: don't cache models Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * small fixups Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix: fixup multiple installs (strings.Clone) Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-04-23 09:22:58 +02:00
LocalAI [bot]	bd507678be	⬆️ Update docs version mudler/LocalAI (#2105 ) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2024-04-22 22:04:57 +00:00
Taikono-Himazin	b6f0e80d54	Update text-generation.md (#2095 ) Signed-off-by: Taikono-Himazin <kazu@po.harenet.ne.jp>	2024-04-22 16:37:13 +02:00
jtwolfe	729378ca98	AMD/ROCm Documentation update + formatting fix (#2100 ) * Update aio-images.md Signed-off-by: jtwolfe <jamie.t.wolfe@gmail.com> * Update aio-images.md Signed-off-by: jtwolfe <jamie.t.wolfe@gmail.com> * Update aio-images.md Signed-off-by: jtwolfe <jamie.t.wolfe@gmail.com> * Update GPU-acceleration.md Signed-off-by: jtwolfe <jamie.t.wolfe@gmail.com> * Update GPU-acceleration.md Signed-off-by: jtwolfe <jamie.t.wolfe@gmail.com> * Update GPU-acceleration.md Signed-off-by: jtwolfe <jamie.t.wolfe@gmail.com> * Update GPU-acceleration.md Signed-off-by: jtwolfe <jamie.t.wolfe@gmail.com> * Update GPU-acceleration.md Signed-off-by: jtwolfe <jamie.t.wolfe@gmail.com> * Update GPU-acceleration.md Signed-off-by: jtwolfe <jamie.t.wolfe@gmail.com> --------- Signed-off-by: jtwolfe <jamie.t.wolfe@gmail.com>	2024-04-22 15:47:51 +02:00
Ikko Eltociear Ashimine	220958a87c	fix: typo in models.go (#2099 )	2024-04-22 04:34:59 +00:00
Ettore Di Giacinto	f3f6535aad	fix: rename fiber entrypoint from http/api to http/app (#2096 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Co-authored-by: Dave <dave@gray101.com>	2024-04-21 22:39:28 +02:00
Dave	228bc4903f	fix: action-tmate detached (#2092 ) connect-timeout-seconds works best with `detached: true` Signed-off-by: Dave <dave@gray101.com>	2024-04-21 22:39:17 +02:00
LocalAI [bot]	38c9abed8b	⬆️ Update ggerganov/llama.cpp (#2089 ) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2024-04-21 16:35:30 +00:00
fakezeta	66b002458d	Transformer Backend: Implementing use_tokenizer_template and stop_prompts options (#2090 ) * fix regression #1971 fixes regression #1971 introduced by intel_extension_for_transformers==1.4 * UseTokenizerTemplate and StopPrompt Implementation of use_tokenizer_template and stopwords options	2024-04-21 16:20:25 +00:00
Ettore Di Giacinto	39814cab32	Update README.md Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>	2024-04-21 16:46:13 +02:00
Ettore Di Giacinto	180cd4ccda	fix(llama.cpp-ggml): fixup `max_tokens` for old backend (#2094 ) fix(llama.cpp-ggml): set 0 as default for `max_tokens` Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-04-21 16:34:00 +02:00
Ettore Di Giacinto	284ad026b1	refactor(routes): split routes registration (#2077 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-04-21 01:19:57 +02:00
Ettore Di Giacinto	afa1bca1e3	fix(llama.cpp): set -1 as default for max tokens (#2087 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-04-20 20:20:10 +02:00
Taikono-Himazin	03adc1f60d	Add tensor_parallel_size setting to vllm setting items (#2085 ) Signed-off-by: Taikono-Himazin <kazu@po.harenet.ne.jp>	2024-04-20 14:37:02 +00:00
Ettore Di Giacinto	b319ed58b0	models(gallery): add gallery (#2078 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-04-20 15:22:54 +02:00
cryptk	8d30b39811	feat: fiber logs with zerlog and add trace level (#2082 ) Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com>	2024-04-20 10:43:37 +02:00
Dave	1038f7469c	fix: action-tmate: use connect-timeout-sections and limit-access-to-actor (#2083 ) fix for action-tmate: connect-timeout-sections and limit-access-to-actor Signed-off-by: Dave Lee <dave@gray101.com>	2024-04-20 08:42:02 +00:00
cryptk	b9e7708643	feat: enable polling configs for systems with broken fsnotify (docker volumes on windows) (#2081 ) * feat: enable polling configs for systems with broken fsnotify (docker volumes on windows) Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * fix: update logging to make it clear that the config file is being polled Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> --------- Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com>	2024-04-19 19:31:15 -05:00
LocalAI [bot]	1e37101930	⬆️ Update ggerganov/llama.cpp (#2080 ) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2024-04-20 00:05:16 +00:00
Ettore Di Giacinto	b2772509b4	models(llama3): add llama3 to embedded models (#2074 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-04-19 18:23:44 +02:00
Ettore Di Giacinto	27ec84827c	refactor(template): isolate and add tests (#2069 ) * refactor(template): isolate and add tests Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Signed-off-by: Dave <dave@gray101.com> Co-authored-by: Dave <dave@gray101.com>	2024-04-19 02:40:18 +00:00
cryptk	852316c5a6	fix: move the GRPC cache generation workflow into it's own concurrency group (#2071 ) Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com>	2024-04-18 20:52:34 -04:00
LocalAI [bot]	e9448005a5	⬆️ Update ggerganov/llama.cpp (#2051 ) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2024-04-18 21:30:55 +00:00
Ettore Di Giacinto	bbea62b907	feat(functions): support models with no grammar, add tests (#2068 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-04-18 22:43:12 +02:00
cryptk	13012cfa70	feat: better control of GRPC docker cache (#2070 ) Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com>	2024-04-18 16:19:36 -04:00
dependabot[bot]	8f2681f904	build(deps): bump aiohttp from 3.9.2 to 3.9.4 in /examples/langchain/langchainpy-localai-example in the pip group across 1 directory (#2067 ) build(deps): bump aiohttp Bumps the pip group with 1 update in the /examples/langchain/langchainpy-localai-example directory: [aiohttp](https://github.com/aio-libs/aiohttp). Updates `aiohttp` from 3.9.2 to 3.9.4 - [Release notes](https://github.com/aio-libs/aiohttp/releases) - [Changelog](https://github.com/aio-libs/aiohttp/blob/master/CHANGES.rst) - [Commits](https://github.com/aio-libs/aiohttp/compare/v3.9.2...v3.9.4) --- updated-dependencies: - dependency-name: aiohttp dependency-type: direct:production dependency-group: pip ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2024-04-18 17:17:33 +00:00
Ettore Di Giacinto	f9c75d4878	tests: add template tests (#2063 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-04-18 10:57:24 +02:00
cryptk	502c1eedaa	feat: refactor the dynamic json configs for api_keys and external_backends (#2055 ) * feat: refactor the dynamic json configs for api_keys and external_backends Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> * fix: remove commented code Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> --------- Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com> Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>	2024-04-18 03:21:55 +00:00
cryptk	e9f090257c	fix: adjust some sources names to match the naming of their repositories (#2061 ) Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com>	2024-04-18 01:59:05 +00:00