chore: ⬆️ Update ggerganov/llama.cpp to ba8a1f9c5b675459c55a83e3f97f10df3a66c788 (#4575 )

⬆️ Update ggerganov/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
chore(model gallery): add phi-3.5-moe-instruct (#4574 )
2026-02-03 11:13:31 -05:00 · 2025-01-10 21:45:11 +00:00 · 2025-01-10 16:59:34 +01:00 · 2025-01-10 16:27:22 +01:00 · 2025-01-10 16:26:20 +01:00 · 2025-01-10 09:20:33 +01:00
114 changed files with 3897 additions and 1107 deletions
--- a/.env
+++ b/.env
@@ -82,6 +82,15 @@
 # Enable to allow p2p mode
 # LOCALAI_P2P=true

+# Enable to use federated mode
+# LOCALAI_FEDERATED=true
+
+# Enable to start federation server
+# FEDERATED_SERVER=true
+
+# Define to use federation token
+# TOKEN=""
+
 ### Watchdog settings
 ###
 # Enables watchdog to kill backends that are inactive for too much time
--- a/.github/labeler.yml
+++ b/.github/labeler.yml
@@ -5,6 +5,10 @@ dependencies:
 - any:
  - changed-files:
    - any-glob-to-any-file: 'Makefile'
+  - changed-files:
+    - any-glob-to-any-file: '*.mod'
+  - changed-files:
+    - any-glob-to-any-file: '*.sum'

 kind/documentation:
 - any:
--- a/.github/workflows/image.yml
+++ b/.github/workflows/image.yml
@@ -280,6 +280,7 @@ jobs:
      makeflags: ${{ matrix.makeflags }}
      latest-image: ${{ matrix.latest-image }}
      latest-image-aio: ${{ matrix.latest-image-aio }}
+      skip-drivers: ${{ matrix.skip-drivers }}
    secrets:
      dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
      dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
@@ -301,6 +302,7 @@ jobs:
            latest-image: 'latest-cpu'
            latest-image-aio: 'latest-aio-cpu'
            makeflags: "--jobs=4 --output-sync=target"
+            skip-drivers: 'false'
          - build-type: 'cublas'
            cuda-major-version: "11"
            cuda-minor-version: "7"
@@ -312,6 +314,7 @@ jobs:
            base-image: "ubuntu:22.04"
            runs-on: 'arc-runner-set'
            makeflags: "--jobs=4 --output-sync=target"
+            skip-drivers: 'false'
          - build-type: 'cublas'
            cuda-major-version: "12"
            cuda-minor-version: "0"
@@ -323,6 +326,7 @@ jobs:
            base-image: "ubuntu:22.04"
            runs-on: 'arc-runner-set'
            makeflags: "--jobs=4 --output-sync=target"
+            skip-drivers: 'false'
          - build-type: 'cublas'
            cuda-major-version: "11"
            cuda-minor-version: "7"
@@ -334,6 +338,7 @@ jobs:
            runs-on: 'arc-runner-set'
            base-image: "ubuntu:22.04"
            makeflags: "--jobs=4 --output-sync=target"
+            skip-drivers: 'false'
          - build-type: 'cublas'
            cuda-major-version: "12"
            cuda-minor-version: "0"
@@ -344,6 +349,7 @@ jobs:
            image-type: 'core'
            runs-on: 'arc-runner-set'
            base-image: "ubuntu:22.04"
+            skip-drivers: 'false'
            makeflags: "--jobs=4 --output-sync=target"
          - build-type: 'vulkan'
            platforms: 'linux/amd64'
@@ -354,4 +360,45 @@ jobs:
            image-type: 'core'
            runs-on: 'arc-runner-set'
            base-image: "ubuntu:22.04"
+            skip-drivers: 'false'
            makeflags: "--jobs=4 --output-sync=target"
+#  parallel-builds:
+#    uses: ./.github/workflows/image_build.yml
+#    with:
+#      tag-latest: ${{ matrix.tag-latest }}
+#      tag-suffix: ${{ matrix.tag-suffix }}
+#      ffmpeg: ${{ matrix.ffmpeg }}
+#      image-type: ${{ matrix.image-type }}
+#      build-type: ${{ matrix.build-type }}
+#      cuda-major-version: ${{ matrix.cuda-major-version }}
+#      cuda-minor-version: ${{ matrix.cuda-minor-version }}
+#      platforms: ${{ matrix.platforms }}
+#      runs-on: ${{ matrix.runs-on }}
+#      aio: ${{ matrix.aio }}
+#      base-image: ${{ matrix.base-image }}
+#      grpc-base-image: ${{ matrix.grpc-base-image }}
+#      makeflags: ${{ matrix.makeflags }}
+#      latest-image: ${{ matrix.latest-image }}
+#      latest-image-aio: ${{ matrix.latest-image-aio }}
+#      skip-drivers: ${{ matrix.skip-drivers }}
+#    secrets:
+#      dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
+#      dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
+#      quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
+#      quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
+#    strategy:
+#      matrix:
+#        include:
+#          - build-type: 'cublas'
+#            cuda-major-version: "12"
+#            cuda-minor-version: "0"
+#            platforms: 'linux/arm64'
+#            tag-latest: 'false'
+#            tag-suffix: '-nvidia-l4t-arm64-core'
+#            latest-image: 'latest-nvidia-l4t-arm64-core'
+#            ffmpeg: 'true'
+#            image-type: 'core'
+#            base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
+#            runs-on: 'self-hosted'
+#            makeflags: "--jobs=4 --output-sync=target"
+#            skip-drivers: 'true'
--- a/.github/workflows/image_build.yml
+++ b/.github/workflows/image_build.yml
@@ -49,6 +49,10 @@ on:
        description: 'FFMPEG'
        default: ''
        type: string
+      skip-drivers:
+        description: 'Skip drivers by default'
+        default: 'false'
+        type: string
      image-type:
        description: 'Image type'
        default: ''
@@ -234,6 +238,7 @@ jobs:
            GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
            GRPC_VERSION=v1.65.0
            MAKEFLAGS=${{ inputs.makeflags }}
+            SKIP_DRIVERS=${{ inputs.skip-drivers }}
          context: .
          file: ./Dockerfile
          cache-from: type=gha
@@ -262,6 +267,7 @@ jobs:
            GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
            GRPC_VERSION=v1.65.0
            MAKEFLAGS=${{ inputs.makeflags }}
+            SKIP_DRIVERS=${{ inputs.skip-drivers }}
          context: .
          file: ./Dockerfile
          cache-from: type=gha
--- a/9
+++ b/9
@@ -115,12 +115,13 @@ FROM requirements-${IMAGE_TYPE} AS requirements-drivers
 ARG BUILD_TYPE
 ARG CUDA_MAJOR_VERSION=12
 ARG CUDA_MINOR_VERSION=0
+ARG SKIP_DRIVERS=false

 ENV BUILD_TYPE=${BUILD_TYPE}

 # Vulkan requirements
 RUN <<EOT bash
-    if [ "${BUILD_TYPE}" = "vulkan" ]; then
+    if [ "${BUILD_TYPE}" = "vulkan" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
        apt-get update && \
        apt-get install -y  --no-install-recommends \
            software-properties-common pciutils wget gpg-agent && \
@@ -136,7 +137,7 @@ EOT

 # CuBLAS requirements
 RUN <<EOT bash
-    if [ "${BUILD_TYPE}" = "cublas" ]; then
+    if [ "${BUILD_TYPE}" = "cublas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
        apt-get update && \
        apt-get install -y  --no-install-recommends \
            software-properties-common pciutils
@@ -162,7 +163,7 @@ RUN <<EOT bash
 EOT

 # If we are building with clblas support, we need the libraries for the builds
-RUN if [ "${BUILD_TYPE}" = "clblas" ]; then \
+RUN if [ "${BUILD_TYPE}" = "clblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \
        apt-get update && \
        apt-get install -y --no-install-recommends \
            libclblast-dev && \
@@ -170,7 +171,7 @@ RUN if [ "${BUILD_TYPE}" = "clblas" ]; then \
        rm -rf /var/lib/apt/lists/* \
    ; fi

-RUN if [ "${BUILD_TYPE}" = "hipblas" ]; then \
+RUN if [ "${BUILD_TYPE}" = "hipblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \
        apt-get update && \
        apt-get install -y --no-install-recommends \
            hipblas-dev \
--- a/4
+++ b/4
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=cc98896db858df7aa40d0e16a505883ef196a482
+CPPLLAMA_VERSION?=ba8a1f9c5b675459c55a83e3f97f10df3a66c788

 # whisper.cpp version
 WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
@@ -32,7 +32,7 @@ BARKCPP_VERSION?=v1.0.0

 # stablediffusion.cpp (ggml)
 STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
-STABLEDIFFUSION_GGML_VERSION?=4570715727f35e5a07a76796d823824c8f42206c
+STABLEDIFFUSION_GGML_VERSION?=dcf91f9e0f2cbf9da472ee2a556751ed4bab2d2a

 ONNX_VERSION?=1.20.0
 ONNX_ARCH?=x64
--- a/README.md
+++ b/README.md
@@ -126,10 +126,10 @@ If you want to help and contribute, issues up for grabs: https://github.com/mudl

 ## 🚀 [Features](https://localai.io/features/)

- 📖 [Text generation with GPTs](https://localai.io/features/text-generation/) (`llama.cpp`, `gpt4all.cpp`, ... [:book: and more](https://localai.io/model-compatibility/index.html#model-compatibility-table))
+- 📖 [Text generation with GPTs](https://localai.io/features/text-generation/) (`llama.cpp`, `transformers`, `vllm` ... [:book: and more](https://localai.io/model-compatibility/index.html#model-compatibility-table))
 - 🗣 [Text to Audio](https://localai.io/features/text-to-audio/)
 - 🔈 [Audio to Text](https://localai.io/features/audio-to-text/) (Audio transcription with `whisper.cpp`)
- 🎨 [Image generation with stable diffusion](https://localai.io/features/image-generation)
+- 🎨 [Image generation](https://localai.io/features/image-generation)
 - 🔥 [OpenAI-alike tools API](https://localai.io/features/openai-functions/) 
 - 🧠 [Embeddings generation for vector databases](https://localai.io/features/embeddings/)
 - ✍️ [Constrained grammars](https://localai.io/features/constrained_grammars/)
@@ -137,6 +137,7 @@ If you want to help and contribute, issues up for grabs: https://github.com/mudl
 - 🥽 [Vision API](https://localai.io/features/gpt-vision/)
 - 📈 [Reranker API](https://localai.io/features/reranker/)
 - 🆕🖧 [P2P Inferencing](https://localai.io/features/distribute/)
+- 🔊 Voice activity detection (Silero-VAD support)
 - 🌍 Integrated WebUI!

 ## 💻 Usage
@@ -159,6 +160,7 @@ Model galleries
 Other:
 - Helm chart https://github.com/go-skynet/helm-charts
 - VSCode extension https://github.com/badgooooor/localai-vscode-plugin
+- Langchain: https://python.langchain.com/docs/integrations/providers/localai/
 - Terminal utility https://github.com/djcopley/ShellOracle
 - Local Smart assistant https://github.com/mudler/LocalAGI
 - Home Assistant https://github.com/sammcj/homeassistant-localai / https://github.com/drndos/hass-openai-custom-conversation / https://github.com/valentinfrlch/ha-gpt4vision
--- a/backend/backend.proto
+++ b/backend/backend.proto
@@ -242,6 +242,9 @@ message ModelOptions {
  repeated float LoraScales = 61;

  repeated string Options = 62;
+
+  string CacheTypeKey = 63;
+  string CacheTypeValue = 64;
 }

 message Result {
--- a/backend/cpp/llama/grpc-server.cpp
+++ b/backend/cpp/llama/grpc-server.cpp
@@ -492,8 +492,8 @@ struct llama_server_context
        }

        common_init_result common_init = common_init_from_params(params);
-        model = common_init.model;
-        ctx = common_init.context;
+        model = common_init.model.release();
+        ctx = common_init.context.release();
        if (model == nullptr)
        {
            LOG_ERR("unable to load model: %s", params.model.c_str());
@@ -681,7 +681,6 @@ struct llama_server_context
        slot->sparams.mirostat          = json_value(data, "mirostat",          default_sparams.mirostat);
        slot->sparams.mirostat_tau      = json_value(data, "mirostat_tau",      default_sparams.mirostat_tau);
        slot->sparams.mirostat_eta      = json_value(data, "mirostat_eta",      default_sparams.mirostat_eta);
-        slot->sparams.penalize_nl       = json_value(data, "penalize_nl",       default_sparams.penalize_nl);
        slot->params.n_keep             = json_value(data, "n_keep",            slot->params.n_keep);
        slot->sparams.seed               = json_value(data, "seed",              default_sparams.seed);
        slot->sparams.grammar           = json_value(data, "grammar",           default_sparams.grammar);
@@ -1213,13 +1212,12 @@ struct llama_server_context
            {"mirostat",          slot.sparams.mirostat},
            {"mirostat_tau",      slot.sparams.mirostat_tau},
            {"mirostat_eta",      slot.sparams.mirostat_eta},
-            {"penalize_nl",       slot.sparams.penalize_nl},
            {"stop",              slot.params.antiprompt},
            {"n_predict",         slot.params.n_predict},
            {"n_keep",            params.n_keep},
            {"ignore_eos",        slot.sparams.ignore_eos},
            {"stream",            slot.params.stream},
-      //      {"logit_bias",        slot.sparams.logit_bias},
+             //      {"logit_bias",        slot.sparams.logit_bias},
            {"n_probs",           slot.sparams.n_probs},
            {"min_keep",          slot.sparams.min_keep},
            {"grammar",           slot.sparams.grammar},
@@ -2112,7 +2110,6 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
    //     slot->sparams.mirostat        = json_value(data, "mirostat",          default_sparams.mirostat);
    //     slot->sparams.mirostat_tau    = json_value(data, "mirostat_tau",      default_sparams.mirostat_tau);
    //     slot->sparams.mirostat_eta    = json_value(data, "mirostat_eta",      default_sparams.mirostat_eta);
-    //     slot->sparams.penalize_nl     = json_value(data, "penalize_nl",       default_sparams.penalize_nl);
    //     slot->params.n_keep           = json_value(data, "n_keep",            slot->params.n_keep);
    //     slot->params.seed             = json_value(data, "seed",              default_params.seed);
    //     slot->sparams.grammar         = json_value(data, "grammar",           default_sparams.grammar);
@@ -2135,7 +2132,6 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
    data["mirostat"] = predict->mirostat();
    data["mirostat_tau"] = predict->mirostattau();
    data["mirostat_eta"] = predict->mirostateta();
-    data["penalize_nl"] = predict->penalizenl();
    data["n_keep"] = predict->nkeep();
    data["seed"] = predict->seed();
    data["grammar"] = predict->grammar();
@@ -2181,7 +2177,6 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
 //     llama.params.sparams.mirostat = predict->mirostat();
 //     llama.params.sparams.mirostat_tau = predict->mirostattau();
 //     llama.params.sparams.mirostat_eta = predict->mirostateta();
-//     llama.params.sparams.penalize_nl = predict->penalizenl();
 //     llama.params.n_keep = predict->nkeep();
 //     llama.params.seed = predict->seed();
 //     llama.params.sparams.grammar = predict->grammar();
@@ -2228,6 +2223,35 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
 //     }
 // }

+const std::vector<ggml_type> kv_cache_types = {
+    GGML_TYPE_F32,
+    GGML_TYPE_F16,
+    GGML_TYPE_BF16,
+    GGML_TYPE_Q8_0,
+    GGML_TYPE_Q4_0,
+    GGML_TYPE_Q4_1,
+    GGML_TYPE_IQ4_NL,
+    GGML_TYPE_Q5_0,
+    GGML_TYPE_Q5_1,
+};
+
+static ggml_type kv_cache_type_from_str(const std::string & s) {
+    for (const auto & type : kv_cache_types) {
+        if (ggml_type_name(type) == s) {
+            return type;
+        }
+    }
+    throw std::runtime_error("Unsupported cache type: " + s);
+}
+
+static std::string get_all_kv_cache_types() {
+    std::ostringstream msg;
+    for (const auto & type : kv_cache_types) {
+        msg << ggml_type_name(type) << (&type == &kv_cache_types.back() ? "" : ", ");
+    }
+    return msg.str();
+}
+
 static void params_parse(const backend::ModelOptions* request,
                                common_params & params) {
   
@@ -2241,6 +2265,12 @@ static void params_parse(const backend::ModelOptions* request,
    }
    //  params.model_alias ??
    params.model_alias =  request->modelfile();
+    if (!request->cachetypekey().empty()) {
+        params.cache_type_k = kv_cache_type_from_str(request->cachetypekey());
+    }
+    if (!request->cachetypevalue().empty()) {
+        params.cache_type_v = kv_cache_type_from_str(request->cachetypevalue());
+    }
    params.n_ctx = request->contextsize();
    //params.memory_f16 = request->f16memory();
    params.cpuparams.n_threads = request->threads();
--- a/backend/cpp/llama/patches/01-llava.patch
+++ b/backend/cpp/llama/patches/01-llava.patch
@@ -1,13 +1,13 @@
 diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp
-index 342042ff..224db9b5 100644
+index 3cd0d2fa..6c5e811a 100644
 --- a/examples/llava/clip.cpp
 +++ b/examples/llava/clip.cpp
-@@ -2419,7 +2419,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
-             struct ggml_tensor * patches = ggml_graph_get_tensor(gf, "patches");
-             int* patches_data = (int*)malloc(ggml_nbytes(patches));
-             for (int i = 0; i < num_patches; i++) {
-                patches_data[i] = i + 1;
-+                patches_data[i] = i;
-             }
-             ggml_backend_tensor_set(patches, patches_data, 0, ggml_nbytes(patches));
-             free(patches_data);
+@@ -2608,7 +2608,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
+                 struct ggml_tensor * patches = ggml_graph_get_tensor(gf, "patches");
+                 int* patches_data = (int*)malloc(ggml_nbytes(patches));
+                 for (int i = 0; i < num_patches; i++) {
+-                    patches_data[i] = i + 1;
+                    patches_data[i] = i;
+                 }
+                 ggml_backend_tensor_set(patches, patches_data, 0, ggml_nbytes(patches));
+                 free(patches_data);
--- a/backend/python/autogptq/requirements-intel.txt
+++ b/backend/python/autogptq/requirements-intel.txt
@@ -1,5 +1,6 @@
 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
-intel-extension-for-pytorch
-torch
+intel-extension-for-pytorch==2.3.110+xpu
+torch==2.3.1+cxx11.abi
+oneccl_bind_pt==2.3.100+xpu
 optimum[openvino]
-setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
+setuptools
--- a/backend/python/autogptq/requirements.txt
+++ b/backend/python/autogptq/requirements.txt
@@ -1,6 +1,6 @@
 accelerate
 auto-gptq==0.7.1
-grpcio==1.68.1
+grpcio==1.69.0
 protobuf
 certifi
 transformers
--- a/backend/python/bark/requirements-intel.txt
+++ b/backend/python/bark/requirements-intel.txt
@@ -1,8 +1,9 @@
 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
-intel-extension-for-pytorch
-torch
-torchaudio
+intel-extension-for-pytorch==2.3.110+xpu
+torch==2.3.1+cxx11.abi
+torchaudio==2.3.1+cxx11.abi
+oneccl_bind_pt==2.3.100+xpu
 optimum[openvino]
-setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
+setuptools
 transformers
 accelerate
--- a/backend/python/bark/requirements.txt
+++ b/backend/python/bark/requirements.txt
@@ -1,4 +1,4 @@
 bark==0.1.5
-grpcio==1.68.1
+grpcio==1.69.0
 protobuf
 certifi
--- a/backend/python/common/libbackend.sh
+++ b/backend/python/common/libbackend.sh
@@ -17,6 +17,9 @@
 # LIMIT_TARGETS="cublas12"
 # source $(dirname $0)/../common/libbackend.sh
 #
+
+PYTHON_VERSION="3.10"
+
 function init() {
    # Name of the backend (directory name)
    BACKEND_NAME=${PWD##*/}
@@ -88,7 +91,7 @@ function getBuildProfile() {
 # always result in an activated virtual environment
 function ensureVenv() {
    if [ ! -d "${EDIR}/venv" ]; then
-        uv venv ${EDIR}/venv
+        uv venv --python ${PYTHON_VERSION} ${EDIR}/venv
        echo "virtualenv created"
    fi

--- a/backend/python/common/template/requirements-intel.txt
+++ b/backend/python/common/template/requirements-intel.txt
@@ -1,4 +1,5 @@
 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
-intel-extension-for-pytorch
-torch
+intel-extension-for-pytorch==2.3.110+xpu
+torch==2.3.1+cxx11.abi
+oneccl_bind_pt==2.3.100+xpu
 optimum[openvino]
--- a/backend/python/common/template/requirements.txt
+++ b/backend/python/common/template/requirements.txt
@@ -1,3 +1,3 @@
-grpcio==1.68.1
+grpcio==1.69.0
 protobuf
 grpcio-tools
--- a/backend/python/coqui/requirements-intel.txt
+++ b/backend/python/coqui/requirements-intel.txt
@@ -1,9 +1,10 @@
 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
-intel-extension-for-pytorch
-torch
-torchaudio
+intel-extension-for-pytorch==2.3.110+xpu
+torch==2.3.1+cxx11.abi
+torchaudio==2.3.1+cxx11.abi
+oneccl_bind_pt==2.3.100+xpu
 optimum[openvino]
-setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
+setuptools
 transformers
 accelerate
 coqui-tts
--- a/backend/python/coqui/requirements.txt
+++ b/backend/python/coqui/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.68.1
+grpcio==1.69.0
 protobuf
 certifi
 packaging==24.1
--- a/backend/python/diffusers/requirements-intel.txt
+++ b/backend/python/diffusers/requirements-intel.txt
@@ -1,9 +1,10 @@
 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
-intel-extension-for-pytorch
-torch
-torchvision
+intel-extension-for-pytorch==2.3.110+xpu
+torch==2.3.1+cxx11.abi
+torchvision==0.18.1+cxx11.abi
+oneccl_bind_pt==2.3.100+xpu
 optimum[openvino]
-setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
+setuptools
 diffusers
 opencv-python
 transformers
--- a/backend/python/diffusers/requirements.txt
+++ b/backend/python/diffusers/requirements.txt
@@ -1,5 +1,5 @@
 setuptools
-grpcio==1.68.1
+grpcio==1.69.0
 pillow
 protobuf
 certifi
--- a/backend/python/exllama2/requirements.txt
+++ b/backend/python/exllama2/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.68.1
+grpcio==1.69.0
 protobuf
 certifi
 wheel
--- a/backend/python/mamba/requirements.txt
+++ b/backend/python/mamba/requirements.txt
@@ -1,3 +1,3 @@
-grpcio==1.68.1
+grpcio==1.69.0
 protobuf
 certifi
--- a/backend/python/openvoice/requirements-cpu.txt
+++ b/backend/python/openvoice/requirements-cpu.txt
@@ -1,3 +1,7 @@
 torch==2.4.1
 git+https://github.com/myshell-ai/MeloTTS.git
-git+https://github.com/myshell-ai/OpenVoice.git
+git+https://github.com/myshell-ai/OpenVoice.git
+whisper-timestamped
+pydub==0.25.1
+wavmark==0.0.3
+eng_to_ipa==0.0.2
--- a/backend/python/openvoice/requirements-cublas11.txt
+++ b/backend/python/openvoice/requirements-cublas11.txt
@@ -1,4 +1,8 @@
 --extra-index-url https://download.pytorch.org/whl/cu118
 torch==2.4.1+cu118
 git+https://github.com/myshell-ai/MeloTTS.git
-git+https://github.com/myshell-ai/OpenVoice.git
+git+https://github.com/myshell-ai/OpenVoice.git
+whisper-timestamped
+pydub==0.25.1
+wavmark==0.0.3
+eng_to_ipa==0.0.2
--- a/backend/python/openvoice/requirements-cublas12.txt
+++ b/backend/python/openvoice/requirements-cublas12.txt
@@ -1,3 +1,7 @@
 torch==2.4.1
 git+https://github.com/myshell-ai/MeloTTS.git
-git+https://github.com/myshell-ai/OpenVoice.git
+git+https://github.com/myshell-ai/OpenVoice.git
+whisper-timestamped
+pydub==0.25.1
+wavmark==0.0.3
+eng_to_ipa==0.0.2
--- a/backend/python/openvoice/requirements-hipblas.txt
+++ b/backend/python/openvoice/requirements-hipblas.txt
@@ -1,4 +1,8 @@
 --extra-index-url https://download.pytorch.org/whl/rocm6.0
 torch==2.4.1+rocm6.0
 git+https://github.com/myshell-ai/MeloTTS.git
-git+https://github.com/myshell-ai/OpenVoice.git
+git+https://github.com/myshell-ai/OpenVoice.git
+whisper-timestamped
+pydub==0.25.1
+wavmark==0.0.3
+eng_to_ipa==0.0.2
--- a/backend/python/openvoice/requirements-intel.txt
+++ b/backend/python/openvoice/requirements-intel.txt
@@ -1,14 +1,15 @@
 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
-intel-extension-for-pytorch
-torch
+intel-extension-for-pytorch==2.3.110+xpu
+torch==2.3.1+cxx11.abi
+torchaudio==2.3.1+cxx11.abi
+oneccl_bind_pt==2.3.100+xpu
 optimum[openvino]
-grpcio==1.68.1
+grpcio==1.69.0
 protobuf
 librosa==0.9.1
 faster-whisper==0.9.0
 pydub==0.25.1
 wavmark==0.0.3
-numpy==1.22.0
 eng_to_ipa==0.0.2
 inflect==7.0.0
 unidecode==1.3.7
--- a/backend/python/openvoice/requirements.txt
+++ b/backend/python/openvoice/requirements.txt
@@ -1,20 +1,17 @@
-grpcio==1.68.1
+grpcio==1.69.0
 protobuf
 librosa
 faster-whisper
-pydub==0.25.1
-wavmark==0.0.3
-numpy==1.22.0
-eng_to_ipa==0.0.2
 inflect
 unidecode
-whisper-timestamped
 openai
 python-dotenv
 pypinyin
 cn2an==0.5.22
+numpy==1.22.0
 networkx==2.8.8
 jieba==0.42.1
-gradio==3.48.0
+gradio==5.9.1
 langid==1.1.6
 llvmlite==0.43.0
+setuptools
--- a/backend/python/parler-tts/requirements-intel.txt
+++ b/backend/python/parler-tts/requirements-intel.txt
@@ -1,8 +1,8 @@
 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
-intel-extension-for-pytorch
-torch
-torchaudio
+intel-extension-for-pytorch==2.3.110+xpu
+torch==2.3.1+cxx11.abi
+torchaudio==2.3.1+cxx11.abi
+oneccl_bind_pt==2.3.100+xpu
 optimum[openvino]
-setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
 transformers
 accelerate
--- a/backend/python/parler-tts/requirements.txt
+++ b/backend/python/parler-tts/requirements.txt
@@ -1,3 +1,4 @@
-grpcio==1.68.1
+grpcio==1.69.0
 certifi
 llvmlite==0.43.0
+setuptools
--- a/backend/python/rerankers/requirements-intel.txt
+++ b/backend/python/rerankers/requirements-intel.txt
@@ -1,8 +1,9 @@
 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
-intel-extension-for-pytorch
+intel-extension-for-pytorch==2.3.110+xpu
 transformers
 accelerate
-torch
+torch==2.3.1+cxx11.abi
+oneccl_bind_pt==2.3.100+xpu
 rerankers[transformers]
 optimum[openvino]
-setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
+setuptools
--- a/backend/python/rerankers/requirements.txt
+++ b/backend/python/rerankers/requirements.txt
@@ -1,3 +1,3 @@
-grpcio==1.68.1
+grpcio==1.69.0
 protobuf
 certifi
--- a/backend/python/sentencetransformers/requirements-intel.txt
+++ b/backend/python/sentencetransformers/requirements-intel.txt
@@ -1,8 +1,9 @@
 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
-intel-extension-for-pytorch
-torch
+intel-extension-for-pytorch==2.3.110+xpu
+torch==2.3.1+cxx11.abi
+oneccl_bind_pt==2.3.100+xpu
 optimum[openvino]
-setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
+setuptools
 accelerate
 sentence-transformers==3.3.1
 transformers
--- a/backend/python/sentencetransformers/requirements.txt
+++ b/backend/python/sentencetransformers/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.68.1
+grpcio==1.69.0
 protobuf
 certifi
 datasets
--- a/backend/python/transformers-musicgen/requirements-intel.txt
+++ b/backend/python/transformers-musicgen/requirements-intel.txt
@@ -1,7 +1,8 @@
 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
-intel-extension-for-pytorch
+intel-extension-for-pytorch==2.3.110+xpu
 transformers
+oneccl_bind_pt==2.3.100+xpu
 accelerate
-torch
+torch==2.3.1+cxx11.abi
 optimum[openvino]
-setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
+setuptools
--- a/backend/python/transformers-musicgen/requirements.txt
+++ b/backend/python/transformers-musicgen/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.68.1
+grpcio==1.69.0
 protobuf
 scipy==1.14.0
 certifi
--- a/backend/python/transformers/requirements-intel.txt
+++ b/backend/python/transformers/requirements-intel.txt
@@ -1,6 +1,7 @@
 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
-intel-extension-for-pytorch
-torch
+intel-extension-for-pytorch==2.3.110+xpu
+torch==2.3.1+cxx11.abi
+oneccl_bind_pt==2.3.100+xpu
 optimum[openvino]
 intel-extension-for-transformers
 bitsandbytes
--- a/backend/python/transformers/requirements.txt
+++ b/backend/python/transformers/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.68.1
+grpcio==1.69.0
 protobuf
 certifi
-setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
+setuptools
--- a/backend/python/vall-e-x/requirements-intel.txt
+++ b/backend/python/vall-e-x/requirements-intel.txt
@@ -1,7 +1,7 @@
 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
-intel-extension-for-pytorch
+intel-extension-for-pytorch==2.3.110+xpu
 accelerate
-torch
-torchaudio
+torch==2.3.1+cxx11.abi
+torchaudio==2.3.1+cxx11.abi
 optimum[openvino]
-setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
+oneccl_bind_pt==2.3.100+xpu
--- a/backend/python/vall-e-x/requirements.txt
+++ b/backend/python/vall-e-x/requirements.txt
@@ -1,3 +1,4 @@
-grpcio==1.68.1
+grpcio==1.69.0
 protobuf
-certifi
+certifi
+setuptools
--- a/backend/python/vllm/requirements-intel.txt
+++ b/backend/python/vllm/requirements-intel.txt
@@ -1,8 +1,9 @@
 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
-intel-extension-for-pytorch
+intel-extension-for-pytorch==2.3.110+xpu
 accelerate
-torch
+torch==2.3.1+cxx11.abi
 transformers
 optimum[openvino]
-setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
-bitsandbytes
+setuptools
+bitsandbytes
+oneccl_bind_pt==2.3.100+xpu
--- a/backend/python/vllm/requirements.txt
+++ b/backend/python/vllm/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.68.1
+grpcio==1.69.0
 protobuf
 certifi
 setuptools
--- a/core/application.go
+++ b/core/application.go
@@ -1,38 +0,0 @@
-package core
-
-import (
-	"github.com/mudler/LocalAI/core/config"
-	"github.com/mudler/LocalAI/core/services"
-	"github.com/mudler/LocalAI/pkg/model"
-)
-
-// The purpose of this structure is to hold pointers to all initialized services, to make plumbing easy
-// Perhaps a proper DI system is worth it in the future, but for now keep things simple.
-type Application struct {
-
-	// Application-Level Config
-	ApplicationConfig *config.ApplicationConfig
-	// ApplicationState *ApplicationState
-
-	// Core Low-Level Services
-	BackendConfigLoader *config.BackendConfigLoader
-	ModelLoader         *model.ModelLoader
-
-	// Backend Services
-	// EmbeddingsBackendService      *backend.EmbeddingsBackendService
-	// ImageGenerationBackendService *backend.ImageGenerationBackendService
-	// LLMBackendService             *backend.LLMBackendService
-	// TranscriptionBackendService *backend.TranscriptionBackendService
-	// TextToSpeechBackendService  *backend.TextToSpeechBackendService
-
-	// LocalAI System Services
-	BackendMonitorService *services.BackendMonitorService
-	GalleryService        *services.GalleryService
-	LocalAIMetricsService *services.LocalAIMetricsService
-	// OpenAIService         *services.OpenAIService
-}
-
-// TODO [NEXT PR?]: Break up ApplicationConfig.
-// Migrate over stuff that is not set via config at all - especially runtime stuff
-type ApplicationState struct {
-}
--- a/core/application/application.go
+++ b/core/application/application.go
@@ -0,0 +1,39 @@
+package application
+
+import (
+	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/pkg/model"
+	"github.com/mudler/LocalAI/pkg/templates"
+)
+
+type Application struct {
+	backendLoader      *config.BackendConfigLoader
+	modelLoader        *model.ModelLoader
+	applicationConfig  *config.ApplicationConfig
+	templatesEvaluator *templates.Evaluator
+}
+
+func newApplication(appConfig *config.ApplicationConfig) *Application {
+	return &Application{
+		backendLoader:      config.NewBackendConfigLoader(appConfig.ModelPath),
+		modelLoader:        model.NewModelLoader(appConfig.ModelPath),
+		applicationConfig:  appConfig,
+		templatesEvaluator: templates.NewEvaluator(appConfig.ModelPath),
+	}
+}
+
+func (a *Application) BackendLoader() *config.BackendConfigLoader {
+	return a.backendLoader
+}
+
+func (a *Application) ModelLoader() *model.ModelLoader {
+	return a.modelLoader
+}
+
+func (a *Application) ApplicationConfig() *config.ApplicationConfig {
+	return a.applicationConfig
+}
+
+func (a *Application) TemplatesEvaluator() *templates.Evaluator {
+	return a.templatesEvaluator
+}
--- a/core/application/config_file_watcher.go
+++ b/core/application/config_file_watcher.go
@@ -1,4 +1,4 @@
-package startup
+package application

 import (
 	"encoding/json"
@@ -8,8 +8,8 @@ import (
 	"path/filepath"
 	"time"

-	"github.com/fsnotify/fsnotify"
 	"dario.cat/mergo"
+	"github.com/fsnotify/fsnotify"
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/rs/zerolog/log"
 )
--- a/core/application/startup.go
+++ b/core/application/startup.go
@@ -1,15 +1,15 @@
-package startup
+package application

 import (
 	"fmt"
 	"os"

-	"github.com/mudler/LocalAI/core"
 	"github.com/mudler/LocalAI/core/backend"
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/services"
 	"github.com/mudler/LocalAI/internal"
 	"github.com/mudler/LocalAI/pkg/assets"
+
 	"github.com/mudler/LocalAI/pkg/library"
 	"github.com/mudler/LocalAI/pkg/model"
 	pkgStartup "github.com/mudler/LocalAI/pkg/startup"
@@ -17,8 +17,9 @@ import (
 	"github.com/rs/zerolog/log"
 )

-func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.ModelLoader, *config.ApplicationConfig, error) {
+func New(opts ...config.AppOption) (*Application, error) {
 	options := config.NewApplicationConfig(opts...)
+	application := newApplication(options)

 	log.Info().Msgf("Starting LocalAI using %d threads, with models path: %s", options.Threads, options.ModelPath)
 	log.Info().Msgf("LocalAI version: %s", internal.PrintableVersion())
@@ -36,28 +37,28 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode

 	// Make sure directories exists
 	if options.ModelPath == "" {
-		return nil, nil, nil, fmt.Errorf("options.ModelPath cannot be empty")
+		return nil, fmt.Errorf("options.ModelPath cannot be empty")
 	}
 	err = os.MkdirAll(options.ModelPath, 0750)
 	if err != nil {
-		return nil, nil, nil, fmt.Errorf("unable to create ModelPath: %q", err)
+		return nil, fmt.Errorf("unable to create ModelPath: %q", err)
 	}
 	if options.ImageDir != "" {
 		err := os.MkdirAll(options.ImageDir, 0750)
 		if err != nil {
-			return nil, nil, nil, fmt.Errorf("unable to create ImageDir: %q", err)
+			return nil, fmt.Errorf("unable to create ImageDir: %q", err)
 		}
 	}
 	if options.AudioDir != "" {
 		err := os.MkdirAll(options.AudioDir, 0750)
 		if err != nil {
-			return nil, nil, nil, fmt.Errorf("unable to create AudioDir: %q", err)
+			return nil, fmt.Errorf("unable to create AudioDir: %q", err)
 		}
 	}
 	if options.UploadDir != "" {
 		err := os.MkdirAll(options.UploadDir, 0750)
 		if err != nil {
-			return nil, nil, nil, fmt.Errorf("unable to create UploadDir: %q", err)
+			return nil, fmt.Errorf("unable to create UploadDir: %q", err)
 		}
 	}

@@ -65,39 +66,36 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode
 		log.Error().Err(err).Msg("error installing models")
 	}

-	cl := config.NewBackendConfigLoader(options.ModelPath)
-	ml := model.NewModelLoader(options.ModelPath)
-
 	configLoaderOpts := options.ToConfigLoaderOptions()

-	if err := cl.LoadBackendConfigsFromPath(options.ModelPath, configLoaderOpts...); err != nil {
+	if err := application.BackendLoader().LoadBackendConfigsFromPath(options.ModelPath, configLoaderOpts...); err != nil {
 		log.Error().Err(err).Msg("error loading config files")
 	}

 	if options.ConfigFile != "" {
-		if err := cl.LoadMultipleBackendConfigsSingleFile(options.ConfigFile, configLoaderOpts...); err != nil {
+		if err := application.BackendLoader().LoadMultipleBackendConfigsSingleFile(options.ConfigFile, configLoaderOpts...); err != nil {
 			log.Error().Err(err).Msg("error loading config file")
 		}
 	}

-	if err := cl.Preload(options.ModelPath); err != nil {
+	if err := application.BackendLoader().Preload(options.ModelPath); err != nil {
 		log.Error().Err(err).Msg("error downloading models")
 	}

 	if options.PreloadJSONModels != "" {
 		if err := services.ApplyGalleryFromString(options.ModelPath, options.PreloadJSONModels, options.EnforcePredownloadScans, options.Galleries); err != nil {
-			return nil, nil, nil, err
+			return nil, err
 		}
 	}

 	if options.PreloadModelsFromPath != "" {
 		if err := services.ApplyGalleryFromFile(options.ModelPath, options.PreloadModelsFromPath, options.EnforcePredownloadScans, options.Galleries); err != nil {
-			return nil, nil, nil, err
+			return nil, err
 		}
 	}

 	if options.Debug {
-		for _, v := range cl.GetAllBackendConfigs() {
+		for _, v := range application.BackendLoader().GetAllBackendConfigs() {
 			log.Debug().Msgf("Model: %s (config: %+v)", v.Name, v)
 		}
 	}
@@ -123,7 +121,7 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode
 	go func() {
 		<-options.Context.Done()
 		log.Debug().Msgf("Context canceled, shutting down")
-		err := ml.StopAllGRPC()
+		err := application.ModelLoader().StopAllGRPC()
 		if err != nil {
 			log.Error().Err(err).Msg("error while stopping all grpc backends")
 		}
@@ -131,12 +129,12 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode

 	if options.WatchDog {
 		wd := model.NewWatchDog(
-			ml,
+			application.ModelLoader(),
 			options.WatchDogBusyTimeout,
 			options.WatchDogIdleTimeout,
 			options.WatchDogBusy,
 			options.WatchDogIdle)
-		ml.SetWatchDog(wd)
+		application.ModelLoader().SetWatchDog(wd)
 		go wd.Run()
 		go func() {
 			<-options.Context.Done()
@@ -147,7 +145,7 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode

 	if options.LoadToMemory != nil {
 		for _, m := range options.LoadToMemory {
-			cfg, err := cl.LoadBackendConfigFileByName(m, options.ModelPath,
+			cfg, err := application.BackendLoader().LoadBackendConfigFileByName(m, options.ModelPath,
 				config.LoadOptionDebug(options.Debug),
 				config.LoadOptionThreads(options.Threads),
 				config.LoadOptionContextSize(options.ContextSize),
@@ -155,7 +153,7 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode
 				config.ModelPath(options.ModelPath),
 			)
 			if err != nil {
-				return nil, nil, nil, err
+				return nil, err
 			}

 			log.Debug().Msgf("Auto loading model %s into memory from file: %s", m, cfg.Model)
@@ -163,9 +161,9 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode
 			o := backend.ModelOptions(*cfg, options)

 			var backendErr error
-			_, backendErr = ml.Load(o...)
+			_, backendErr = application.ModelLoader().Load(o...)
 			if backendErr != nil {
-				return nil, nil, nil, err
+				return nil, err
 			}
 		}
 	}
@@ -174,7 +172,7 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode
 	startWatcher(options)

 	log.Info().Msg("core/startup process completed!")
-	return cl, ml, options, nil
+	return application, nil
 }

 func startWatcher(options *config.ApplicationConfig) {
@@ -201,32 +199,3 @@ func startWatcher(options *config.ApplicationConfig) {
 		log.Error().Err(err).Msg("failed creating watcher")
 	}
 }
-
-// In Lieu of a proper DI framework, this function wires up the Application manually.
-// This is in core/startup rather than core/state.go to keep package references clean!
-func createApplication(appConfig *config.ApplicationConfig) *core.Application {
-	app := &core.Application{
-		ApplicationConfig:   appConfig,
-		BackendConfigLoader: config.NewBackendConfigLoader(appConfig.ModelPath),
-		ModelLoader:         model.NewModelLoader(appConfig.ModelPath),
-	}
-
-	var err error
-
-	// app.EmbeddingsBackendService = backend.NewEmbeddingsBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
-	// app.ImageGenerationBackendService = backend.NewImageGenerationBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
-	// app.LLMBackendService = backend.NewLLMBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
-	// app.TranscriptionBackendService = backend.NewTranscriptionBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
-	// app.TextToSpeechBackendService = backend.NewTextToSpeechBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
-
-	app.BackendMonitorService = services.NewBackendMonitorService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
-	app.GalleryService = services.NewGalleryService(app.ApplicationConfig)
-	// app.OpenAIService = services.NewOpenAIService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig, app.LLMBackendService)
-
-	app.LocalAIMetricsService, err = services.NewLocalAIMetricsService()
-	if err != nil {
-		log.Error().Err(err).Msg("encountered an error initializing metrics service, startup will continue but metrics will not be tracked.")
-	}
-
-	return app
-}
--- a/core/backend/llm.go
+++ b/core/backend/llm.go
@@ -118,7 +118,7 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im

 			var partialRune []byte
 			err := inferenceModel.PredictStream(ctx, opts, func(reply *proto.Reply) {
-				msg := reply.GetMessage()
+				msg := reply.Message
 				partialRune = append(partialRune, msg...)

 				tokenUsage.Prompt = int(reply.PromptTokens)
--- a/core/backend/options.go
+++ b/core/backend/options.go
@@ -151,6 +151,8 @@ func grpcModelOpts(c config.BackendConfig) *pb.ModelOptions {
 		TensorParallelSize:   int32(c.TensorParallelSize),
 		MMProj:               c.MMProj,
 		FlashAttention:       c.FlashAttention,
+		CacheTypeKey:         c.CacheTypeK,
+		CacheTypeValue:       c.CacheTypeV,
 		NoKVOffload:          c.NoKVOffloading,
 		YarnExtFactor:        c.YarnExtFactor,
 		YarnAttnFactor:       c.YarnAttnFactor,
--- a/core/cli/run.go
+++ b/core/cli/run.go
@@ -6,12 +6,12 @@ import (
 	"strings"
 	"time"

+	"github.com/mudler/LocalAI/core/application"
 	cli_api "github.com/mudler/LocalAI/core/cli/api"
 	cliContext "github.com/mudler/LocalAI/core/cli/context"
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/http"
 	"github.com/mudler/LocalAI/core/p2p"
-	"github.com/mudler/LocalAI/core/startup"
 	"github.com/rs/zerolog"
 	"github.com/rs/zerolog/log"
 )
@@ -186,16 +186,16 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
 	}

 	if r.PreloadBackendOnly {
-		_, _, _, err := startup.Startup(opts...)
+		_, err := application.New(opts...)
 		return err
 	}

-	cl, ml, options, err := startup.Startup(opts...)
+	app, err := application.New(opts...)
 	if err != nil {
 		return fmt.Errorf("failed basic startup tasks with error %s", err.Error())
 	}

-	appHTTP, err := http.App(cl, ml, options)
+	appHTTP, err := http.API(app)
 	if err != nil {
 		log.Error().Err(err).Msg("error during HTTP App construction")
 		return err
--- a/core/config/backend_config.go
+++ b/core/config/backend_config.go
@@ -155,8 +155,10 @@ type LLMConfig struct {
 	TensorParallelSize   int       `yaml:"tensor_parallel_size"`   // vLLM
 	MMProj               string    `yaml:"mmproj"`

-	FlashAttention bool `yaml:"flash_attention"`
-	NoKVOffloading bool `yaml:"no_kv_offloading"`
+	FlashAttention bool   `yaml:"flash_attention"`
+	NoKVOffloading bool   `yaml:"no_kv_offloading"`
+	CacheTypeK     string `yaml:"cache_type_k"`
+	CacheTypeV     string `yaml:"cache_type_v"`

 	RopeScaling string `yaml:"rope_scaling"`
 	ModelType   string `yaml:"type"`
@@ -204,6 +206,8 @@ type TemplateConfig struct {
 	JoinChatMessagesByCharacter *string `yaml:"join_chat_messages_by_character"`

 	Multimodal string `yaml:"multimodal"`
+
+	JinjaTemplate bool `yaml:"jinja_template"`
 }

 func (c *BackendConfig) UnmarshalYAML(value *yaml.Node) error {
--- a/core/config/guesser.go
+++ b/core/config/guesser.go
@@ -26,14 +26,14 @@ const (
 type settingsConfig struct {
 	StopWords      []string
 	TemplateConfig TemplateConfig
-	RepeatPenalty float64
+	RepeatPenalty  float64
 }

 // default settings to adopt with a given model family
 var defaultsSettings map[familyType]settingsConfig = map[familyType]settingsConfig{
 	Gemma: {
 		RepeatPenalty: 1.0,
-		StopWords: []string{"<|im_end|>", "<end_of_turn>", "<start_of_turn>"},
+		StopWords:     []string{"<|im_end|>", "<end_of_turn>", "<start_of_turn>"},
 		TemplateConfig: TemplateConfig{
 			Chat:        "{{.Input }}\n<start_of_turn>model\n",
 			ChatMessage: "<start_of_turn>{{if eq .RoleName \"assistant\" }}model{{else}}{{ .RoleName }}{{end}}\n{{ if .Content -}}\n{{.Content -}}\n{{ end -}}<end_of_turn>",
@@ -200,6 +200,18 @@ func guessDefaultsFromFile(cfg *BackendConfig, modelPath string) {
 	} else {
 		log.Debug().Any("family", family).Msgf("guessDefaultsFromFile: no template found for family")
 	}
+
+	if cfg.HasTemplate() {
+		return
+	}
+
+	// identify from well known templates first, otherwise use the raw jinja template
+	chatTemplate, found := f.Header.MetadataKV.Get("tokenizer.chat_template")
+	if found {
+		// try to use the jinja template
+		cfg.TemplateConfig.JinjaTemplate = true
+		cfg.TemplateConfig.ChatMessage = chatTemplate.ValueString()
+	}
 }

 func identifyFamily(f *gguf.GGUFFile) familyType {
--- a/core/http/app.go
+++ b/core/http/app.go
@@ -14,10 +14,9 @@ import (
 	"github.com/mudler/LocalAI/core/http/middleware"
 	"github.com/mudler/LocalAI/core/http/routes"

-	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/application"
 	"github.com/mudler/LocalAI/core/schema"
 	"github.com/mudler/LocalAI/core/services"
-	"github.com/mudler/LocalAI/pkg/model"

 	"github.com/gofiber/contrib/fiberzerolog"
 	"github.com/gofiber/fiber/v2"
@@ -49,18 +48,18 @@ var embedDirStatic embed.FS
 // @in header
 // @name Authorization

-func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) (*fiber.App, error) {
+func API(application *application.Application) (*fiber.App, error) {

 	fiberCfg := fiber.Config{
 		Views:     renderEngine(),
-		BodyLimit: appConfig.UploadLimitMB * 1024 * 1024, // this is the default limit of 4MB
+		BodyLimit: application.ApplicationConfig().UploadLimitMB * 1024 * 1024, // this is the default limit of 4MB
 		// We disable the Fiber startup message as it does not conform to structured logging.
 		// We register a startup log line with connection information in the OnListen hook to keep things user friendly though
 		DisableStartupMessage: true,
 		// Override default error handler
 	}

-	if !appConfig.OpaqueErrors {
+	if !application.ApplicationConfig().OpaqueErrors {
 		// Normally, return errors as JSON responses
 		fiberCfg.ErrorHandler = func(ctx *fiber.Ctx, err error) error {
 			// Status code defaults to 500
@@ -86,9 +85,11 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi
 		}
 	}

-	app := fiber.New(fiberCfg)
+	router := fiber.New(fiberCfg)

-	app.Hooks().OnListen(func(listenData fiber.ListenData) error {
+	router.Use(middleware.StripPathPrefix())
+
+	router.Hooks().OnListen(func(listenData fiber.ListenData) error {
 		scheme := "http"
 		if listenData.TLS {
 			scheme = "https"
@@ -99,82 +100,82 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi

 	// Have Fiber use zerolog like the rest of the application rather than it's built-in logger
 	logger := log.Logger
-	app.Use(fiberzerolog.New(fiberzerolog.Config{
+	router.Use(fiberzerolog.New(fiberzerolog.Config{
 		Logger: &logger,
 	}))

 	// Default middleware config

-	if !appConfig.Debug {
-		app.Use(recover.New())
+	if !application.ApplicationConfig().Debug {
+		router.Use(recover.New())
 	}

-	if !appConfig.DisableMetrics {
+	if !application.ApplicationConfig().DisableMetrics {
 		metricsService, err := services.NewLocalAIMetricsService()
 		if err != nil {
 			return nil, err
 		}

 		if metricsService != nil {
-			app.Use(localai.LocalAIMetricsAPIMiddleware(metricsService))
-			app.Hooks().OnShutdown(func() error {
+			router.Use(localai.LocalAIMetricsAPIMiddleware(metricsService))
+			router.Hooks().OnShutdown(func() error {
 				return metricsService.Shutdown()
 			})
 		}

 	}
 	// Health Checks should always be exempt from auth, so register these first
-	routes.HealthRoutes(app)
+	routes.HealthRoutes(router)

-	kaConfig, err := middleware.GetKeyAuthConfig(appConfig)
+	kaConfig, err := middleware.GetKeyAuthConfig(application.ApplicationConfig())
 	if err != nil || kaConfig == nil {
 		return nil, fmt.Errorf("failed to create key auth config: %w", err)
 	}

 	// Auth is applied to _all_ endpoints. No exceptions. Filtering out endpoints to bypass is the role of the Filter property of the KeyAuth Configuration
-	app.Use(v2keyauth.New(*kaConfig))
+	router.Use(v2keyauth.New(*kaConfig))

-	if appConfig.CORS {
+	if application.ApplicationConfig().CORS {
 		var c func(ctx *fiber.Ctx) error
-		if appConfig.CORSAllowOrigins == "" {
+		if application.ApplicationConfig().CORSAllowOrigins == "" {
 			c = cors.New()
 		} else {
-			c = cors.New(cors.Config{AllowOrigins: appConfig.CORSAllowOrigins})
+			c = cors.New(cors.Config{AllowOrigins: application.ApplicationConfig().CORSAllowOrigins})
 		}

-		app.Use(c)
+		router.Use(c)
 	}

-	if appConfig.CSRF {
+	if application.ApplicationConfig().CSRF {
 		log.Debug().Msg("Enabling CSRF middleware. Tokens are now required for state-modifying requests")
-		app.Use(csrf.New())
+		router.Use(csrf.New())
 	}

 	// Load config jsons
-	utils.LoadConfig(appConfig.UploadDir, openai.UploadedFilesFile, &openai.UploadedFiles)
-	utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsConfigFile, &openai.Assistants)
-	utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsFileConfigFile, &openai.AssistantFiles)
+	utils.LoadConfig(application.ApplicationConfig().UploadDir, openai.UploadedFilesFile, &openai.UploadedFiles)
+	utils.LoadConfig(application.ApplicationConfig().ConfigsDir, openai.AssistantsConfigFile, &openai.Assistants)
+	utils.LoadConfig(application.ApplicationConfig().ConfigsDir, openai.AssistantsFileConfigFile, &openai.AssistantFiles)

-	galleryService := services.NewGalleryService(appConfig)
-	galleryService.Start(appConfig.Context, cl)
+	galleryService := services.NewGalleryService(application.ApplicationConfig())
+	galleryService.Start(application.ApplicationConfig().Context, application.BackendLoader())

-	routes.RegisterElevenLabsRoutes(app, cl, ml, appConfig)
-	routes.RegisterLocalAIRoutes(app, cl, ml, appConfig, galleryService)
-	routes.RegisterOpenAIRoutes(app, cl, ml, appConfig)
-	if !appConfig.DisableWebUI {
-		routes.RegisterUIRoutes(app, cl, ml, appConfig, galleryService)
+	routes.RegisterElevenLabsRoutes(router, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())
+	routes.RegisterLocalAIRoutes(router, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig(), galleryService)
+	routes.RegisterOpenAIRoutes(router, application)
+	if !application.ApplicationConfig().DisableWebUI {
+		routes.RegisterUIRoutes(router, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig(), galleryService)
 	}
-	routes.RegisterJINARoutes(app, cl, ml, appConfig)
+	routes.RegisterJINARoutes(router, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())

 	httpFS := http.FS(embedDirStatic)

-	app.Use(favicon.New(favicon.Config{
+	router.Use(favicon.New(favicon.Config{
 		URL:        "/favicon.ico",
 		FileSystem: httpFS,
 		File:       "static/favicon.ico",
 	}))

-	app.Use("/static", filesystem.New(filesystem.Config{
+	router.Use("/static", filesystem.New(filesystem.Config{
 		Root:       httpFS,
 		PathPrefix: "static",
 		Browse:     true,
@@ -182,7 +183,7 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi

 	// Define a custom 404 handler
 	// Note: keep this at the bottom!
-	app.Use(notFoundHandler)
+	router.Use(notFoundHandler)

-	return app, nil
+	return router, nil
 }
--- a/core/http/app_test.go
+++ b/core/http/app_test.go
@@ -12,15 +12,14 @@ import (
 	"path/filepath"
 	"runtime"

+	"github.com/mudler/LocalAI/core/application"
 	"github.com/mudler/LocalAI/core/config"
 	. "github.com/mudler/LocalAI/core/http"
 	"github.com/mudler/LocalAI/core/schema"
-	"github.com/mudler/LocalAI/core/startup"

 	"github.com/gofiber/fiber/v2"
 	"github.com/mudler/LocalAI/core/gallery"
 	"github.com/mudler/LocalAI/pkg/downloader"
-	"github.com/mudler/LocalAI/pkg/model"
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
 	"gopkg.in/yaml.v3"
@@ -238,6 +237,31 @@ func postInvalidRequest(url string) (error, int) {
 	return nil, resp.StatusCode
 }

+func getRequest(url string, header http.Header) (error, int, []byte) {
+
+	req, err := http.NewRequest("GET", url, nil)
+	if err != nil {
+		return err, -1, nil
+	}
+
+	req.Header = header
+
+	client := &http.Client{}
+	resp, err := client.Do(req)
+	if err != nil {
+		return err, -1, nil
+	}
+
+	defer resp.Body.Close()
+
+	body, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return err, -1, nil
+	}
+
+	return nil, resp.StatusCode, body
+}
+
 const bertEmbeddingsURL = `https://gist.githubusercontent.com/mudler/0a080b166b87640e8644b09c2aee6e3b/raw/f0e8c26bb72edc16d9fbafbfd6638072126ff225/bert-embeddings-gallery.yaml`

 //go:embed backend-assets/*
@@ -252,9 +276,6 @@ var _ = Describe("API test", func() {
 	var cancel context.CancelFunc
 	var tmpdir string
 	var modelDir string
-	var bcl *config.BackendConfigLoader
-	var ml *model.ModelLoader
-	var applicationConfig *config.ApplicationConfig

 	commonOpts := []config.AppOption{
 		config.WithDebug(true),
@@ -300,7 +321,7 @@ var _ = Describe("API test", func() {
 				},
 			}

-			bcl, ml, applicationConfig, err = startup.Startup(
+			application, err := application.New(
 				append(commonOpts,
 					config.WithContext(c),
 					config.WithGalleries(galleries),
@@ -310,7 +331,7 @@ var _ = Describe("API test", func() {
 					config.WithBackendAssetsOutput(backendAssetsDir))...)
 			Expect(err).ToNot(HaveOccurred())

-			app, err = App(bcl, ml, applicationConfig)
+			app, err = API(application)
 			Expect(err).ToNot(HaveOccurred())

 			go app.Listen("127.0.0.1:9090")
@@ -349,6 +370,33 @@ var _ = Describe("API test", func() {
 			})
 		})

+		Context("URL routing Tests", func() {
+			It("Should support reverse-proxy when unauthenticated", func() {
+
+				err, sc, body := getRequest("http://127.0.0.1:9090/myprefix/", http.Header{
+					"X-Forwarded-Proto":  {"https"},
+					"X-Forwarded-Host":   {"example.org"},
+					"X-Forwarded-Prefix": {"/myprefix/"},
+				})
+				Expect(err).To(BeNil(), "error")
+				Expect(sc).To(Equal(401), "status code")
+				Expect(string(body)).To(ContainSubstring(`<base href="https://example.org/myprefix/" />`), "body")
+			})
+
+			It("Should support reverse-proxy when authenticated", func() {
+
+				err, sc, body := getRequest("http://127.0.0.1:9090/myprefix/", http.Header{
+					"Authorization":      {bearerKey},
+					"X-Forwarded-Proto":  {"https"},
+					"X-Forwarded-Host":   {"example.org"},
+					"X-Forwarded-Prefix": {"/myprefix/"},
+				})
+				Expect(err).To(BeNil(), "error")
+				Expect(sc).To(Equal(200), "status code")
+				Expect(string(body)).To(ContainSubstring(`<base href="https://example.org/myprefix/" />`), "body")
+			})
+		})
+
 		Context("Applying models", func() {

 			It("applies models from a gallery", func() {
@@ -539,7 +587,7 @@ var _ = Describe("API test", func() {
 				var res map[string]string
 				err = json.Unmarshal([]byte(resp2.Choices[0].Message.FunctionCall.Arguments), &res)
 				Expect(err).ToNot(HaveOccurred())
-				Expect(res["location"]).To(Equal("San Francisco"), fmt.Sprint(res))
+				Expect(res["location"]).To(ContainSubstring("San Francisco"), fmt.Sprint(res))
 				Expect(res["unit"]).To(Equal("celcius"), fmt.Sprint(res))
 				Expect(string(resp2.Choices[0].FinishReason)).To(Equal("function_call"), fmt.Sprint(resp2.Choices[0].FinishReason))

@@ -641,7 +689,7 @@ var _ = Describe("API test", func() {
 				},
 			}

-			bcl, ml, applicationConfig, err = startup.Startup(
+			application, err := application.New(
 				append(commonOpts,
 					config.WithContext(c),
 					config.WithAudioDir(tmpdir),
@@ -652,7 +700,7 @@ var _ = Describe("API test", func() {
 					config.WithBackendAssetsOutput(tmpdir))...,
 			)
 			Expect(err).ToNot(HaveOccurred())
-			app, err = App(bcl, ml, applicationConfig)
+			app, err = API(application)
 			Expect(err).ToNot(HaveOccurred())

 			go app.Listen("127.0.0.1:9090")
@@ -708,7 +756,7 @@ var _ = Describe("API test", func() {
 			Expect(err).ToNot(HaveOccurred(), fmt.Sprint(resp))

 			Expect(resp.StatusCode).To(Equal(200), fmt.Sprint(string(dat)))
-			Expect(resp.Header.Get("Content-Type")).To(Equal("audio/x-wav"))
+			Expect(resp.Header.Get("Content-Type")).To(Or(Equal("audio/x-wav"), Equal("audio/vnd.wave")))
 		})
 		It("installs and is capable to generate images", Label("stablediffusion"), func() {
 			if runtime.GOOS != "linux" {
@@ -772,14 +820,14 @@ var _ = Describe("API test", func() {

 			var err error

-			bcl, ml, applicationConfig, err = startup.Startup(
+			application, err := application.New(
 				append(commonOpts,
 					config.WithExternalBackend("huggingface", os.Getenv("HUGGINGFACE_GRPC")),
 					config.WithContext(c),
 					config.WithModelPath(modelPath),
 				)...)
 			Expect(err).ToNot(HaveOccurred())
-			app, err = App(bcl, ml, applicationConfig)
+			app, err = API(application)
 			Expect(err).ToNot(HaveOccurred())
 			go app.Listen("127.0.0.1:9090")

@@ -990,14 +1038,14 @@ var _ = Describe("API test", func() {
 			c, cancel = context.WithCancel(context.Background())

 			var err error
-			bcl, ml, applicationConfig, err = startup.Startup(
+			application, err := application.New(
 				append(commonOpts,
 					config.WithContext(c),
 					config.WithModelPath(modelPath),
 					config.WithConfigFile(os.Getenv("CONFIG_FILE")))...,
 			)
 			Expect(err).ToNot(HaveOccurred())
-			app, err = App(bcl, ml, applicationConfig)
+			app, err = API(application)
 			Expect(err).ToNot(HaveOccurred())

 			go app.Listen("127.0.0.1:9090")
--- a/core/http/elements/buttons.go
+++ b/core/http/elements/buttons.go
@@ -16,7 +16,7 @@ func installButton(galleryName string) elem.Node {
 			"class":                 "float-right inline-block rounded bg-primary px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong",
 			"hx-swap":               "outerHTML",
 			// post the Model ID as param
-			"hx-post": "/browse/install/model/" + galleryName,
+			"hx-post": "browse/install/model/" + galleryName,
 		},
 		elem.I(
 			attrs.Props{
@@ -36,7 +36,7 @@ func reInstallButton(galleryName string) elem.Node {
 			"hx-target":             "#action-div-" + dropBadChars(galleryName),
 			"hx-swap":               "outerHTML",
 			// post the Model ID as param
-			"hx-post": "/browse/install/model/" + galleryName,
+			"hx-post": "browse/install/model/" + galleryName,
 		},
 		elem.I(
 			attrs.Props{
@@ -80,7 +80,7 @@ func deleteButton(galleryID string) elem.Node {
 			"hx-target":             "#action-div-" + dropBadChars(galleryID),
 			"hx-swap":               "outerHTML",
 			// post the Model ID as param
-			"hx-post": "/browse/delete/model/" + galleryID,
+			"hx-post": "browse/delete/model/" + galleryID,
 		},
 		elem.I(
 			attrs.Props{
--- a/core/http/elements/gallery.go
+++ b/core/http/elements/gallery.go
@@ -47,7 +47,7 @@ func searchableElement(text, icon string) elem.Node {
 					//	"value":     text,
 					//"class":     "inline-block bg-gray-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2",
 					"href":      "#!",
-					"hx-post":   "/browse/search/models",
+					"hx-post":   "browse/search/models",
 					"hx-target": "#search-results",
 					// TODO: this doesn't work
 					//	"hx-vals":      `{ \"search\": \"` + text + `\" }`,
--- a/core/http/elements/progressbar.go
+++ b/core/http/elements/progressbar.go
@@ -64,7 +64,7 @@ func StartProgressBar(uid, progress, text string) string {
 	return elem.Div(
 		attrs.Props{
 			"hx-trigger": "done",
-			"hx-get":     "/browse/job/" + uid,
+			"hx-get":     "browse/job/" + uid,
 			"hx-swap":    "outerHTML",
 			"hx-target":  "this",
 		},
@@ -77,7 +77,7 @@ func StartProgressBar(uid, progress, text string) string {
 			},
 			elem.Text(bluemonday.StrictPolicy().Sanitize(text)), //Perhaps overly defensive
 			elem.Div(attrs.Props{
-				"hx-get":     "/browse/job/progress/" + uid,
+				"hx-get":     "browse/job/progress/" + uid,
 				"hx-trigger": "every 600ms",
 				"hx-target":  "this",
 				"hx-swap":    "innerHTML",
--- a/core/http/endpoints/explorer/dashboard.go
+++ b/core/http/endpoints/explorer/dashboard.go
@@ -6,6 +6,7 @@ import (

 	"github.com/gofiber/fiber/v2"
 	"github.com/mudler/LocalAI/core/explorer"
+	"github.com/mudler/LocalAI/core/http/utils"
 	"github.com/mudler/LocalAI/internal"
 )

@@ -14,6 +15,7 @@ func Dashboard() func(*fiber.Ctx) error {
 		summary := fiber.Map{
 			"Title":   "LocalAI API - " + internal.PrintableVersion(),
 			"Version": internal.PrintableVersion(),
+			"BaseURL": utils.BaseURL(c),
 		}

 		if string(c.Context().Request.Header.ContentType()) == "application/json" || len(c.Accepts("html")) == 0 {
--- a/core/http/endpoints/localai/gallery.go
+++ b/core/http/endpoints/localai/gallery.go
@@ -9,6 +9,7 @@ import (
 	"github.com/google/uuid"
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/gallery"
+	"github.com/mudler/LocalAI/core/http/utils"
 	"github.com/mudler/LocalAI/core/schema"
 	"github.com/mudler/LocalAI/core/services"
 	"github.com/rs/zerolog/log"
@@ -82,7 +83,8 @@ func (mgs *ModelGalleryEndpointService) ApplyModelGalleryEndpoint() func(c *fibe
 			Galleries:        mgs.galleries,
 			ConfigURL:        input.ConfigURL,
 		}
-		return c.JSON(schema.GalleryResponse{ID: uuid.String(), StatusURL: c.BaseURL() + "/models/jobs/" + uuid.String()})
+
+		return c.JSON(schema.GalleryResponse{ID: uuid.String(), StatusURL: fmt.Sprintf("%smodels/jobs/%s", utils.BaseURL(c), uuid.String())})
 	}
 }

@@ -105,7 +107,7 @@ func (mgs *ModelGalleryEndpointService) DeleteModelGalleryEndpoint() func(c *fib
 			return err
 		}

-		return c.JSON(schema.GalleryResponse{ID: uuid.String(), StatusURL: c.BaseURL() + "/models/jobs/" + uuid.String()})
+		return c.JSON(schema.GalleryResponse{ID: uuid.String(), StatusURL: fmt.Sprintf("%smodels/jobs/%s", utils.BaseURL(c), uuid.String())})
 	}
 }

--- a/core/http/endpoints/localai/welcome.go
+++ b/core/http/endpoints/localai/welcome.go
@@ -4,6 +4,7 @@ import (
 	"github.com/gofiber/fiber/v2"
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/gallery"
+	"github.com/mudler/LocalAI/core/http/utils"
 	"github.com/mudler/LocalAI/core/p2p"
 	"github.com/mudler/LocalAI/core/services"
 	"github.com/mudler/LocalAI/internal"
@@ -32,6 +33,7 @@ func WelcomeEndpoint(appConfig *config.ApplicationConfig,
 		summary := fiber.Map{
 			"Title":             "LocalAI API - " + internal.PrintableVersion(),
 			"Version":           internal.PrintableVersion(),
+			"BaseURL":           utils.BaseURL(c),
 			"Models":            modelsWithoutConfig,
 			"ModelsConfig":      backendConfigs,
 			"GalleryConfig":     galleryConfigs,
--- a/core/http/endpoints/openai/chat.go
+++ b/core/http/endpoints/openai/chat.go
@@ -14,6 +14,8 @@ import (
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/schema"
 	"github.com/mudler/LocalAI/pkg/functions"
+	"github.com/mudler/LocalAI/pkg/templates"
+
 	model "github.com/mudler/LocalAI/pkg/model"
 	"github.com/rs/zerolog/log"
 	"github.com/valyala/fasthttp"
@@ -24,7 +26,7 @@ import (
 // @Param request body schema.OpenAIRequest true "query params"
 // @Success 200 {object} schema.OpenAIResponse "Response"
 // @Router /v1/chat/completions [post]
-func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startupOptions *config.ApplicationConfig) func(c *fiber.Ctx) error {
+func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluator *templates.Evaluator, startupOptions *config.ApplicationConfig) func(c *fiber.Ctx) error {
 	var id, textContentToReturn string
 	var created int

@@ -39,15 +41,11 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
 		responses <- initialMessage

 		ComputeChoices(req, s, config, startupOptions, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool {
-			choices := []schema.Choice{}
-			if s != "" {
-				choices = append(choices, schema.Choice{Delta: &schema.Message{Content: &s}, Index: 0})
-			}
 			resp := schema.OpenAIResponse{
 				ID:      id,
 				Created: created,
 				Model:   req.Model, // we have to return what the user sent here, due to OpenAI spec.
-				Choices: choices,
+				Choices: []schema.Choice{{Delta: &schema.Message{Content: &s}, Index: 0}},
 				Object:  "chat.completion.chunk",
 				Usage: schema.OpenAIUsage{
 					PromptTokens:     usage.Prompt,
@@ -298,148 +296,10 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
 		// If we are using the tokenizer template, we don't need to process the messages
 		// unless we are processing functions
 		if !config.TemplateConfig.UseTokenizerTemplate || shouldUseFn {
-			suppressConfigSystemPrompt := false
-			mess := []string{}
-			for messageIndex, i := range input.Messages {
-				var content string
-				role := i.Role
-
-				// if function call, we might want to customize the role so we can display better that the "assistant called a json action"
-				// if an "assistant_function_call" role is defined, we use it, otherwise we use the role that is passed by in the request
-				if (i.FunctionCall != nil || i.ToolCalls != nil) && i.Role == "assistant" {
-					roleFn := "assistant_function_call"
-					r := config.Roles[roleFn]
-					if r != "" {
-						role = roleFn
-					}
-				}
-				r := config.Roles[role]
-				contentExists := i.Content != nil && i.StringContent != ""
-
-				fcall := i.FunctionCall
-				if len(i.ToolCalls) > 0 {
-					fcall = i.ToolCalls
-				}
-
-				// First attempt to populate content via a chat message specific template
-				if config.TemplateConfig.ChatMessage != "" {
-					chatMessageData := model.ChatMessageTemplateData{
-						SystemPrompt: config.SystemPrompt,
-						Role:         r,
-						RoleName:     role,
-						Content:      i.StringContent,
-						FunctionCall: fcall,
-						FunctionName: i.Name,
-						LastMessage:  messageIndex == (len(input.Messages) - 1),
-						Function:     config.Grammar != "" && (messageIndex == (len(input.Messages) - 1)),
-						MessageIndex: messageIndex,
-					}
-					templatedChatMessage, err := ml.EvaluateTemplateForChatMessage(config.TemplateConfig.ChatMessage, chatMessageData)
-					if err != nil {
-						log.Error().Err(err).Interface("message", chatMessageData).Str("template", config.TemplateConfig.ChatMessage).Msg("error processing message with template, skipping")
-					} else {
-						if templatedChatMessage == "" {
-							log.Warn().Msgf("template \"%s\" produced blank output for %+v. Skipping!", config.TemplateConfig.ChatMessage, chatMessageData)
-							continue // TODO: This continue is here intentionally to skip over the line `mess = append(mess, content)` below, and to prevent the sprintf
-						}
-						log.Debug().Msgf("templated message for chat: %s", templatedChatMessage)
-						content = templatedChatMessage
-					}
-				}
-
-				marshalAnyRole := func(f any) {
-					j, err := json.Marshal(f)
-					if err == nil {
-						if contentExists {
-							content += "\n" + fmt.Sprint(r, " ", string(j))
-						} else {
-							content = fmt.Sprint(r, " ", string(j))
-						}
-					}
-				}
-				marshalAny := func(f any) {
-					j, err := json.Marshal(f)
-					if err == nil {
-						if contentExists {
-							content += "\n" + string(j)
-						} else {
-							content = string(j)
-						}
-					}
-				}
-				// If this model doesn't have such a template, or if that template fails to return a value, template at the message level.
-				if content == "" {
-					if r != "" {
-						if contentExists {
-							content = fmt.Sprint(r, i.StringContent)
-						}
-
-						if i.FunctionCall != nil {
-							marshalAnyRole(i.FunctionCall)
-						}
-						if i.ToolCalls != nil {
-							marshalAnyRole(i.ToolCalls)
-						}
-					} else {
-						if contentExists {
-							content = fmt.Sprint(i.StringContent)
-						}
-						if i.FunctionCall != nil {
-							marshalAny(i.FunctionCall)
-						}
-						if i.ToolCalls != nil {
-							marshalAny(i.ToolCalls)
-						}
-					}
-					// Special Handling: System. We care if it was printed at all, not the r branch, so check seperately
-					if contentExists && role == "system" {
-						suppressConfigSystemPrompt = true
-					}
-				}
-
-				mess = append(mess, content)
-			}
-
-			joinCharacter := "\n"
-			if config.TemplateConfig.JoinChatMessagesByCharacter != nil {
-				joinCharacter = *config.TemplateConfig.JoinChatMessagesByCharacter
-			}
-
-			predInput = strings.Join(mess, joinCharacter)
-			log.Debug().Msgf("Prompt (before templating): %s", predInput)
-
-			templateFile := ""
-
-			// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
-			if ml.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) {
-				templateFile = config.Model
-			}
-
-			if config.TemplateConfig.Chat != "" && !shouldUseFn {
-				templateFile = config.TemplateConfig.Chat
-			}
-
-			if config.TemplateConfig.Functions != "" && shouldUseFn {
-				templateFile = config.TemplateConfig.Functions
-			}
-
-			if templateFile != "" {
-				templatedInput, err := ml.EvaluateTemplateForPrompt(model.ChatPromptTemplate, templateFile, model.PromptTemplateData{
-					SystemPrompt:         config.SystemPrompt,
-					SuppressSystemPrompt: suppressConfigSystemPrompt,
-					Input:                predInput,
-					Functions:            funcs,
-				})
-				if err == nil {
-					predInput = templatedInput
-					log.Debug().Msgf("Template found, input modified to: %s", predInput)
-				} else {
-					log.Debug().Msgf("Template failed loading: %s", err.Error())
-				}
-			}
+			predInput = evaluator.TemplateMessages(input.Messages, config, funcs, shouldUseFn)

 			log.Debug().Msgf("Prompt (after templating): %s", predInput)
-			if shouldUseFn && config.Grammar != "" {
+			if config.Grammar != "" {
 				log.Debug().Msgf("Grammar: %+v", config.Grammar)
 			}
 		}
@@ -469,9 +329,6 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
 				toolsCalled := false
 				for ev := range responses {
 					usage = &ev.Usage // Copy a pointer to the latest usage chunk so that the stop message can reference it
-					if len(ev.Choices) == 0 {
-						break
-					}
 					if len(ev.Choices[0].Delta.ToolCalls) > 0 {
 						toolsCalled = true
 					}
--- a/core/http/endpoints/openai/completion.go
+++ b/core/http/endpoints/openai/completion.go
@@ -16,6 +16,7 @@ import (
 	"github.com/mudler/LocalAI/core/schema"
 	"github.com/mudler/LocalAI/pkg/functions"
 	model "github.com/mudler/LocalAI/pkg/model"
+	"github.com/mudler/LocalAI/pkg/templates"
 	"github.com/rs/zerolog/log"
 	"github.com/valyala/fasthttp"
 )
@@ -25,7 +26,7 @@ import (
 // @Param request body schema.OpenAIRequest true "query params"
 // @Success 200 {object} schema.OpenAIResponse "Response"
 // @Router /v1/completions [post]
-func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
+func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluator *templates.Evaluator, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
 	id := uuid.New().String()
 	created := int(time.Now().Unix())

@@ -94,17 +95,6 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a
 			c.Set("Transfer-Encoding", "chunked")
 		}

-		templateFile := ""
-
-		// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
-		if ml.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) {
-			templateFile = config.Model
-		}
-
-		if config.TemplateConfig.Completion != "" {
-			templateFile = config.TemplateConfig.Completion
-		}
-
 		if input.Stream {
 			if len(config.PromptStrings) > 1 {
 				return errors.New("cannot handle more than 1 `PromptStrings` when Streaming")
@@ -112,15 +102,13 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a

 			predInput := config.PromptStrings[0]

-			if templateFile != "" {
-				templatedInput, err := ml.EvaluateTemplateForPrompt(model.CompletionPromptTemplate, templateFile, model.PromptTemplateData{
-					Input:        predInput,
-					SystemPrompt: config.SystemPrompt,
-				})
-				if err == nil {
-					predInput = templatedInput
-					log.Debug().Msgf("Template found, input modified to: %s", predInput)
-				}
+			templatedInput, err := evaluator.EvaluateTemplateForPrompt(templates.CompletionPromptTemplate, *config, templates.PromptTemplateData{
+				Input:        predInput,
+				SystemPrompt: config.SystemPrompt,
+			})
+			if err == nil {
+				predInput = templatedInput
+				log.Debug().Msgf("Template found, input modified to: %s", predInput)
 			}

 			responses := make(chan schema.OpenAIResponse)
@@ -165,16 +153,13 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a
 		totalTokenUsage := backend.TokenUsage{}

 		for k, i := range config.PromptStrings {
-			if templateFile != "" {
-				// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
-				templatedInput, err := ml.EvaluateTemplateForPrompt(model.CompletionPromptTemplate, templateFile, model.PromptTemplateData{
-					SystemPrompt: config.SystemPrompt,
-					Input:        i,
-				})
-				if err == nil {
-					i = templatedInput
-					log.Debug().Msgf("Template found, input modified to: %s", i)
-				}
+			templatedInput, err := evaluator.EvaluateTemplateForPrompt(templates.CompletionPromptTemplate, *config, templates.PromptTemplateData{
+				SystemPrompt: config.SystemPrompt,
+				Input:        i,
+			})
+			if err == nil {
+				i = templatedInput
+				log.Debug().Msgf("Template found, input modified to: %s", i)
 			}

 			r, tokenUsage, err := ComputeChoices(
--- a/core/http/endpoints/openai/edit.go
+++ b/core/http/endpoints/openai/edit.go
@@ -12,6 +12,7 @@ import (
 	"github.com/google/uuid"
 	"github.com/mudler/LocalAI/core/schema"
 	model "github.com/mudler/LocalAI/pkg/model"
+	"github.com/mudler/LocalAI/pkg/templates"

 	"github.com/rs/zerolog/log"
 )
@@ -21,7 +22,8 @@ import (
 // @Param request body schema.OpenAIRequest true "query params"
 // @Success 200 {object} schema.OpenAIResponse "Response"
 // @Router /v1/edits [post]
-func EditEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
+func EditEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluator *templates.Evaluator, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
+
 	return func(c *fiber.Ctx) error {
 		modelFile, input, err := readRequest(c, cl, ml, appConfig, true)
 		if err != nil {
@@ -35,31 +37,18 @@ func EditEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConf

 		log.Debug().Msgf("Parameter Config: %+v", config)

-		templateFile := ""
-
-		// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
-		if ml.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) {
-			templateFile = config.Model
-		}
-
-		if config.TemplateConfig.Edit != "" {
-			templateFile = config.TemplateConfig.Edit
-		}
-
 		var result []schema.Choice
 		totalTokenUsage := backend.TokenUsage{}

 		for _, i := range config.InputStrings {
-			if templateFile != "" {
-				templatedInput, err := ml.EvaluateTemplateForPrompt(model.EditPromptTemplate, templateFile, model.PromptTemplateData{
-					Input:        i,
-					Instruction:  input.Instruction,
-					SystemPrompt: config.SystemPrompt,
-				})
-				if err == nil {
-					i = templatedInput
-					log.Debug().Msgf("Template found, input modified to: %s", i)
-				}
+			templatedInput, err := evaluator.EvaluateTemplateForPrompt(templates.EditPromptTemplate, *config, templates.PromptTemplateData{
+				Input:        i,
+				Instruction:  input.Instruction,
+				SystemPrompt: config.SystemPrompt,
+			})
+			if err == nil {
+				i = templatedInput
+				log.Debug().Msgf("Template found, input modified to: %s", i)
 			}

 			r, tokenUsage, err := ComputeChoices(input, i, config, appConfig, ml, func(s string, c *[]schema.Choice) {
--- a/core/http/explorer.go
+++ b/core/http/explorer.go
@@ -7,6 +7,7 @@ import (
 	"github.com/gofiber/fiber/v2/middleware/favicon"
 	"github.com/gofiber/fiber/v2/middleware/filesystem"
 	"github.com/mudler/LocalAI/core/explorer"
+	"github.com/mudler/LocalAI/core/http/middleware"
 	"github.com/mudler/LocalAI/core/http/routes"
 )

@@ -22,6 +23,7 @@ func Explorer(db *explorer.Database) *fiber.App {

 	app := fiber.New(fiberCfg)

+	app.Use(middleware.StripPathPrefix())
 	routes.RegisterExplorerRoutes(app, db)

 	httpFS := http.FS(embedDirStatic)
--- a/core/http/middleware/auth.go
+++ b/core/http/middleware/auth.go
@@ -8,6 +8,7 @@ import (
 	"github.com/gofiber/fiber/v2"
 	"github.com/gofiber/fiber/v2/middleware/keyauth"
 	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/http/utils"
 )

 // This file contains the configuration generators and handler functions that are used along with the fiber/keyauth middleware
@@ -39,7 +40,9 @@ func getApiKeyErrorHandler(applicationConfig *config.ApplicationConfig) fiber.Er
 			if applicationConfig.OpaqueErrors {
 				return ctx.SendStatus(401)
 			}
-			return ctx.Status(401).Render("views/login", nil)
+			return ctx.Status(401).Render("views/login", fiber.Map{
+				"BaseURL": utils.BaseURL(ctx),
+			})
 		}
 		if applicationConfig.OpaqueErrors {
 			return ctx.SendStatus(500)
--- a/core/http/middleware/strippathprefix.go
+++ b/core/http/middleware/strippathprefix.go
@@ -0,0 +1,36 @@
+package middleware
+
+import (
+	"strings"
+
+	"github.com/gofiber/fiber/v2"
+)
+
+// StripPathPrefix returns a middleware that strips a path prefix from the request path.
+// The path prefix is obtained from the X-Forwarded-Prefix HTTP request header.
+func StripPathPrefix() fiber.Handler {
+	return func(c *fiber.Ctx) error {
+		for _, prefix := range c.GetReqHeaders()["X-Forwarded-Prefix"] {
+			if prefix != "" {
+				path := c.Path()
+				pos := len(prefix)
+
+				if prefix[pos-1] == '/' {
+					pos--
+				} else {
+					prefix += "/"
+				}
+
+				if strings.HasPrefix(path, prefix) {
+					c.Path(path[pos:])
+					break
+				} else if prefix[:pos] == path {
+					c.Redirect(prefix)
+					return nil
+				}
+			}
+		}
+
+		return c.Next()
+	}
+}
--- a/core/http/middleware/strippathprefix_test.go
+++ b/core/http/middleware/strippathprefix_test.go
@@ -0,0 +1,121 @@
+package middleware
+
+import (
+	"net/http/httptest"
+	"testing"
+
+	"github.com/gofiber/fiber/v2"
+	"github.com/stretchr/testify/require"
+)
+
+func TestStripPathPrefix(t *testing.T) {
+	var actualPath string
+
+	app := fiber.New()
+
+	app.Use(StripPathPrefix())
+
+	app.Get("/hello/world", func(c *fiber.Ctx) error {
+		actualPath = c.Path()
+		return nil
+	})
+
+	app.Get("/", func(c *fiber.Ctx) error {
+		actualPath = c.Path()
+		return nil
+	})
+
+	for _, tc := range []struct {
+		name         string
+		path         string
+		prefixHeader []string
+		expectStatus int
+		expectPath   string
+	}{
+		{
+			name:         "without prefix and header",
+			path:         "/hello/world",
+			expectStatus: 200,
+			expectPath:   "/hello/world",
+		},
+		{
+			name:         "without prefix and headers on root path",
+			path:         "/",
+			expectStatus: 200,
+			expectPath:   "/",
+		},
+		{
+			name:         "without prefix but header",
+			path:         "/hello/world",
+			prefixHeader: []string{"/otherprefix/"},
+			expectStatus: 200,
+			expectPath:   "/hello/world",
+		},
+		{
+			name:         "with prefix but non-matching header",
+			path:         "/prefix/hello/world",
+			prefixHeader: []string{"/otherprefix/"},
+			expectStatus: 404,
+		},
+		{
+			name:         "with prefix and matching header",
+			path:         "/myprefix/hello/world",
+			prefixHeader: []string{"/myprefix/"},
+			expectStatus: 200,
+			expectPath:   "/hello/world",
+		},
+		{
+			name:         "with prefix and 1st header matching",
+			path:         "/myprefix/hello/world",
+			prefixHeader: []string{"/myprefix/", "/otherprefix/"},
+			expectStatus: 200,
+			expectPath:   "/hello/world",
+		},
+		{
+			name:         "with prefix and 2nd header matching",
+			path:         "/myprefix/hello/world",
+			prefixHeader: []string{"/otherprefix/", "/myprefix/"},
+			expectStatus: 200,
+			expectPath:   "/hello/world",
+		},
+		{
+			name:         "with prefix and header not ending with slash",
+			path:         "/myprefix/hello/world",
+			prefixHeader: []string{"/myprefix"},
+			expectStatus: 200,
+			expectPath:   "/hello/world",
+		},
+		{
+			name:         "with prefix and non-matching header not ending with slash",
+			path:         "/myprefix-suffix/hello/world",
+			prefixHeader: []string{"/myprefix"},
+			expectStatus: 404,
+		},
+		{
+			name:         "redirect when prefix does not end with a slash",
+			path:         "/myprefix",
+			prefixHeader: []string{"/myprefix"},
+			expectStatus: 302,
+			expectPath:   "/myprefix/",
+		},
+	} {
+		t.Run(tc.name, func(t *testing.T) {
+			actualPath = ""
+			req := httptest.NewRequest("GET", tc.path, nil)
+			if tc.prefixHeader != nil {
+				req.Header["X-Forwarded-Prefix"] = tc.prefixHeader
+			}
+
+			resp, err := app.Test(req, -1)
+
+			require.NoError(t, err)
+			require.Equal(t, tc.expectStatus, resp.StatusCode, "response status code")
+
+			if tc.expectStatus == 200 {
+				require.Equal(t, tc.expectPath, actualPath, "rewritten path")
+			} else if tc.expectStatus == 302 {
+				require.Equal(t, tc.expectPath, resp.Header.Get("Location"), "redirect location")
+			}
+		})
+	}
+}
--- a/core/http/render.go
+++ b/core/http/render.go
@@ -10,6 +10,7 @@ import (
 	"github.com/gofiber/fiber/v2"
 	fiberhtml "github.com/gofiber/template/html/v2"
 	"github.com/microcosm-cc/bluemonday"
+	"github.com/mudler/LocalAI/core/http/utils"
 	"github.com/mudler/LocalAI/core/schema"
 	"github.com/russross/blackfriday"
 )
@@ -26,7 +27,9 @@ func notFoundHandler(c *fiber.Ctx) error {
 		})
 	} else {
 		// The client expects an HTML response
-		return c.Status(fiber.StatusNotFound).Render("views/404", fiber.Map{})
+		return c.Status(fiber.StatusNotFound).Render("views/404", fiber.Map{
+			"BaseURL": utils.BaseURL(c),
+		})
 	}
 }

--- a/core/http/routes/localai.go
+++ b/core/http/routes/localai.go
@@ -11,62 +11,62 @@ import (
 	"github.com/mudler/LocalAI/pkg/model"
 )

-func RegisterLocalAIRoutes(app *fiber.App,
+func RegisterLocalAIRoutes(router *fiber.App,
 	cl *config.BackendConfigLoader,
 	ml *model.ModelLoader,
 	appConfig *config.ApplicationConfig,
 	galleryService *services.GalleryService) {

-	app.Get("/swagger/*", swagger.HandlerDefault) // default
+	router.Get("/swagger/*", swagger.HandlerDefault) // default

 	// LocalAI API endpoints
 	if !appConfig.DisableGalleryEndpoint {
 		modelGalleryEndpointService := localai.CreateModelGalleryEndpointService(appConfig.Galleries, appConfig.ModelPath, galleryService)
-		app.Post("/models/apply", modelGalleryEndpointService.ApplyModelGalleryEndpoint())
-		app.Post("/models/delete/:name", modelGalleryEndpointService.DeleteModelGalleryEndpoint())
+		router.Post("/models/apply", modelGalleryEndpointService.ApplyModelGalleryEndpoint())
+		router.Post("/models/delete/:name", modelGalleryEndpointService.DeleteModelGalleryEndpoint())

-		app.Get("/models/available", modelGalleryEndpointService.ListModelFromGalleryEndpoint())
-		app.Get("/models/galleries", modelGalleryEndpointService.ListModelGalleriesEndpoint())
-		app.Post("/models/galleries", modelGalleryEndpointService.AddModelGalleryEndpoint())
-		app.Delete("/models/galleries", modelGalleryEndpointService.RemoveModelGalleryEndpoint())
-		app.Get("/models/jobs/:uuid", modelGalleryEndpointService.GetOpStatusEndpoint())
-		app.Get("/models/jobs", modelGalleryEndpointService.GetAllStatusEndpoint())
+		router.Get("/models/available", modelGalleryEndpointService.ListModelFromGalleryEndpoint())
+		router.Get("/models/galleries", modelGalleryEndpointService.ListModelGalleriesEndpoint())
+		router.Post("/models/galleries", modelGalleryEndpointService.AddModelGalleryEndpoint())
+		router.Delete("/models/galleries", modelGalleryEndpointService.RemoveModelGalleryEndpoint())
+		router.Get("/models/jobs/:uuid", modelGalleryEndpointService.GetOpStatusEndpoint())
+		router.Get("/models/jobs", modelGalleryEndpointService.GetAllStatusEndpoint())
 	}

-	app.Post("/tts", localai.TTSEndpoint(cl, ml, appConfig))
-	app.Post("/vad", localai.VADEndpoint(cl, ml, appConfig))
+	router.Post("/tts", localai.TTSEndpoint(cl, ml, appConfig))
+	router.Post("/vad", localai.VADEndpoint(cl, ml, appConfig))

 	// Stores
 	sl := model.NewModelLoader("")
-	app.Post("/stores/set", localai.StoresSetEndpoint(sl, appConfig))
-	app.Post("/stores/delete", localai.StoresDeleteEndpoint(sl, appConfig))
-	app.Post("/stores/get", localai.StoresGetEndpoint(sl, appConfig))
-	app.Post("/stores/find", localai.StoresFindEndpoint(sl, appConfig))
+	router.Post("/stores/set", localai.StoresSetEndpoint(sl, appConfig))
+	router.Post("/stores/delete", localai.StoresDeleteEndpoint(sl, appConfig))
+	router.Post("/stores/get", localai.StoresGetEndpoint(sl, appConfig))
+	router.Post("/stores/find", localai.StoresFindEndpoint(sl, appConfig))

 	if !appConfig.DisableMetrics {
-		app.Get("/metrics", localai.LocalAIMetricsEndpoint())
+		router.Get("/metrics", localai.LocalAIMetricsEndpoint())
 	}

 	// Experimental Backend Statistics Module
 	backendMonitorService := services.NewBackendMonitorService(ml, cl, appConfig) // Split out for now
-	app.Get("/backend/monitor", localai.BackendMonitorEndpoint(backendMonitorService))
-	app.Post("/backend/shutdown", localai.BackendShutdownEndpoint(backendMonitorService))
+	router.Get("/backend/monitor", localai.BackendMonitorEndpoint(backendMonitorService))
+	router.Post("/backend/shutdown", localai.BackendShutdownEndpoint(backendMonitorService))

 	// p2p
 	if p2p.IsP2PEnabled() {
-		app.Get("/api/p2p", localai.ShowP2PNodes(appConfig))
-		app.Get("/api/p2p/token", localai.ShowP2PToken(appConfig))
+		router.Get("/api/p2p", localai.ShowP2PNodes(appConfig))
+		router.Get("/api/p2p/token", localai.ShowP2PToken(appConfig))
 	}

-	app.Get("/version", func(c *fiber.Ctx) error {
+	router.Get("/version", func(c *fiber.Ctx) error {
 		return c.JSON(struct {
 			Version string `json:"version"`
 		}{Version: internal.PrintableVersion()})
 	})

-	app.Get("/system", localai.SystemInformations(ml, appConfig))
+	router.Get("/system", localai.SystemInformations(ml, appConfig))

 	// misc
-	app.Post("/v1/tokenize", localai.TokenizeEndpoint(cl, ml, appConfig))
+	router.Post("/v1/tokenize", localai.TokenizeEndpoint(cl, ml, appConfig))

 }
--- a/core/http/routes/openai.go
+++ b/core/http/routes/openai.go
@@ -2,84 +2,134 @@ package routes

 import (
 	"github.com/gofiber/fiber/v2"
-	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/application"
 	"github.com/mudler/LocalAI/core/http/endpoints/localai"
 	"github.com/mudler/LocalAI/core/http/endpoints/openai"
-	"github.com/mudler/LocalAI/pkg/model"
 )

 func RegisterOpenAIRoutes(app *fiber.App,
-	cl *config.BackendConfigLoader,
-	ml *model.ModelLoader,
-	appConfig *config.ApplicationConfig) {
+	application *application.Application) {
 	// openAI compatible API endpoint

 	// chat
-	app.Post("/v1/chat/completions", openai.ChatEndpoint(cl, ml, appConfig))
-	app.Post("/chat/completions", openai.ChatEndpoint(cl, ml, appConfig))
+	app.Post("/v1/chat/completions",
+		openai.ChatEndpoint(
+			application.BackendLoader(),
+			application.ModelLoader(),
+			application.TemplatesEvaluator(),
+			application.ApplicationConfig(),
+		),
+	)
+
+	app.Post("/chat/completions",
+		openai.ChatEndpoint(
+			application.BackendLoader(),
+			application.ModelLoader(),
+			application.TemplatesEvaluator(),
+			application.ApplicationConfig(),
+		),
+	)

 	// edit
-	app.Post("/v1/edits", openai.EditEndpoint(cl, ml, appConfig))
-	app.Post("/edits", openai.EditEndpoint(cl, ml, appConfig))
+	app.Post("/v1/edits",
+		openai.EditEndpoint(
+			application.BackendLoader(),
+			application.ModelLoader(),
+			application.TemplatesEvaluator(),
+			application.ApplicationConfig(),
+		),
+	)
+
+	app.Post("/edits",
+		openai.EditEndpoint(
+			application.BackendLoader(),
+			application.ModelLoader(),
+			application.TemplatesEvaluator(),
+			application.ApplicationConfig(),
+		),
+	)

 	// assistant
-	app.Get("/v1/assistants", openai.ListAssistantsEndpoint(cl, ml, appConfig))
-	app.Get("/assistants", openai.ListAssistantsEndpoint(cl, ml, appConfig))
-	app.Post("/v1/assistants", openai.CreateAssistantEndpoint(cl, ml, appConfig))
-	app.Post("/assistants", openai.CreateAssistantEndpoint(cl, ml, appConfig))
-	app.Delete("/v1/assistants/:assistant_id", openai.DeleteAssistantEndpoint(cl, ml, appConfig))
-	app.Delete("/assistants/:assistant_id", openai.DeleteAssistantEndpoint(cl, ml, appConfig))
-	app.Get("/v1/assistants/:assistant_id", openai.GetAssistantEndpoint(cl, ml, appConfig))
-	app.Get("/assistants/:assistant_id", openai.GetAssistantEndpoint(cl, ml, appConfig))
-	app.Post("/v1/assistants/:assistant_id", openai.ModifyAssistantEndpoint(cl, ml, appConfig))
-	app.Post("/assistants/:assistant_id", openai.ModifyAssistantEndpoint(cl, ml, appConfig))
-	app.Get("/v1/assistants/:assistant_id/files", openai.ListAssistantFilesEndpoint(cl, ml, appConfig))
-	app.Get("/assistants/:assistant_id/files", openai.ListAssistantFilesEndpoint(cl, ml, appConfig))
-	app.Post("/v1/assistants/:assistant_id/files", openai.CreateAssistantFileEndpoint(cl, ml, appConfig))
-	app.Post("/assistants/:assistant_id/files", openai.CreateAssistantFileEndpoint(cl, ml, appConfig))
-	app.Delete("/v1/assistants/:assistant_id/files/:file_id", openai.DeleteAssistantFileEndpoint(cl, ml, appConfig))
-	app.Delete("/assistants/:assistant_id/files/:file_id", openai.DeleteAssistantFileEndpoint(cl, ml, appConfig))
-	app.Get("/v1/assistants/:assistant_id/files/:file_id", openai.GetAssistantFileEndpoint(cl, ml, appConfig))
-	app.Get("/assistants/:assistant_id/files/:file_id", openai.GetAssistantFileEndpoint(cl, ml, appConfig))
+	app.Get("/v1/assistants", openai.ListAssistantsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
+	app.Get("/assistants", openai.ListAssistantsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
+	app.Post("/v1/assistants", openai.CreateAssistantEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
+	app.Post("/assistants", openai.CreateAssistantEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
+	app.Delete("/v1/assistants/:assistant_id", openai.DeleteAssistantEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
+	app.Delete("/assistants/:assistant_id", openai.DeleteAssistantEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
+	app.Get("/v1/assistants/:assistant_id", openai.GetAssistantEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
+	app.Get("/assistants/:assistant_id", openai.GetAssistantEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
+	app.Post("/v1/assistants/:assistant_id", openai.ModifyAssistantEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
+	app.Post("/assistants/:assistant_id", openai.ModifyAssistantEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
+	app.Get("/v1/assistants/:assistant_id/files", openai.ListAssistantFilesEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
+	app.Get("/assistants/:assistant_id/files", openai.ListAssistantFilesEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
+	app.Post("/v1/assistants/:assistant_id/files", openai.CreateAssistantFileEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
+	app.Post("/assistants/:assistant_id/files", openai.CreateAssistantFileEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
+	app.Delete("/v1/assistants/:assistant_id/files/:file_id", openai.DeleteAssistantFileEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
+	app.Delete("/assistants/:assistant_id/files/:file_id", openai.DeleteAssistantFileEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
+	app.Get("/v1/assistants/:assistant_id/files/:file_id", openai.GetAssistantFileEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
+	app.Get("/assistants/:assistant_id/files/:file_id", openai.GetAssistantFileEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))

 	// files
-	app.Post("/v1/files", openai.UploadFilesEndpoint(cl, appConfig))
-	app.Post("/files", openai.UploadFilesEndpoint(cl, appConfig))
-	app.Get("/v1/files", openai.ListFilesEndpoint(cl, appConfig))
-	app.Get("/files", openai.ListFilesEndpoint(cl, appConfig))
-	app.Get("/v1/files/:file_id", openai.GetFilesEndpoint(cl, appConfig))
-	app.Get("/files/:file_id", openai.GetFilesEndpoint(cl, appConfig))
-	app.Delete("/v1/files/:file_id", openai.DeleteFilesEndpoint(cl, appConfig))
-	app.Delete("/files/:file_id", openai.DeleteFilesEndpoint(cl, appConfig))
-	app.Get("/v1/files/:file_id/content", openai.GetFilesContentsEndpoint(cl, appConfig))
-	app.Get("/files/:file_id/content", openai.GetFilesContentsEndpoint(cl, appConfig))
+	app.Post("/v1/files", openai.UploadFilesEndpoint(application.BackendLoader(), application.ApplicationConfig()))
+	app.Post("/files", openai.UploadFilesEndpoint(application.BackendLoader(), application.ApplicationConfig()))
+	app.Get("/v1/files", openai.ListFilesEndpoint(application.BackendLoader(), application.ApplicationConfig()))
+	app.Get("/files", openai.ListFilesEndpoint(application.BackendLoader(), application.ApplicationConfig()))
+	app.Get("/v1/files/:file_id", openai.GetFilesEndpoint(application.BackendLoader(), application.ApplicationConfig()))
+	app.Get("/files/:file_id", openai.GetFilesEndpoint(application.BackendLoader(), application.ApplicationConfig()))
+	app.Delete("/v1/files/:file_id", openai.DeleteFilesEndpoint(application.BackendLoader(), application.ApplicationConfig()))
+	app.Delete("/files/:file_id", openai.DeleteFilesEndpoint(application.BackendLoader(), application.ApplicationConfig()))
+	app.Get("/v1/files/:file_id/content", openai.GetFilesContentsEndpoint(application.BackendLoader(), application.ApplicationConfig()))
+	app.Get("/files/:file_id/content", openai.GetFilesContentsEndpoint(application.BackendLoader(), application.ApplicationConfig()))

 	// completion
-	app.Post("/v1/completions", openai.CompletionEndpoint(cl, ml, appConfig))
-	app.Post("/completions", openai.CompletionEndpoint(cl, ml, appConfig))
-	app.Post("/v1/engines/:model/completions", openai.CompletionEndpoint(cl, ml, appConfig))
+	app.Post("/v1/completions",
+		openai.CompletionEndpoint(
+			application.BackendLoader(),
+			application.ModelLoader(),
+			application.TemplatesEvaluator(),
+			application.ApplicationConfig(),
+		),
+	)
+
+	app.Post("/completions",
+		openai.CompletionEndpoint(
+			application.BackendLoader(),
+			application.ModelLoader(),
+			application.TemplatesEvaluator(),
+			application.ApplicationConfig(),
+		),
+	)
+
+	app.Post("/v1/engines/:model/completions",
+		openai.CompletionEndpoint(
+			application.BackendLoader(),
+			application.ModelLoader(),
+			application.TemplatesEvaluator(),
+			application.ApplicationConfig(),
+		),
+	)

 	// embeddings
-	app.Post("/v1/embeddings", openai.EmbeddingsEndpoint(cl, ml, appConfig))
-	app.Post("/embeddings", openai.EmbeddingsEndpoint(cl, ml, appConfig))
-	app.Post("/v1/engines/:model/embeddings", openai.EmbeddingsEndpoint(cl, ml, appConfig))
+	app.Post("/v1/embeddings", openai.EmbeddingsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
+	app.Post("/embeddings", openai.EmbeddingsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
+	app.Post("/v1/engines/:model/embeddings", openai.EmbeddingsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))

 	// audio
-	app.Post("/v1/audio/transcriptions", openai.TranscriptEndpoint(cl, ml, appConfig))
-	app.Post("/v1/audio/speech", localai.TTSEndpoint(cl, ml, appConfig))
+	app.Post("/v1/audio/transcriptions", openai.TranscriptEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
+	app.Post("/v1/audio/speech", localai.TTSEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))

 	// images
-	app.Post("/v1/images/generations", openai.ImageEndpoint(cl, ml, appConfig))
+	app.Post("/v1/images/generations", openai.ImageEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))

-	if appConfig.ImageDir != "" {
-		app.Static("/generated-images", appConfig.ImageDir)
+	if application.ApplicationConfig().ImageDir != "" {
+		app.Static("/generated-images", application.ApplicationConfig().ImageDir)
 	}

-	if appConfig.AudioDir != "" {
-		app.Static("/generated-audio", appConfig.AudioDir)
+	if application.ApplicationConfig().AudioDir != "" {
+		app.Static("/generated-audio", application.ApplicationConfig().AudioDir)
 	}

 	// List models
-	app.Get("/v1/models", openai.ListModelsEndpoint(cl, ml))
-	app.Get("/models", openai.ListModelsEndpoint(cl, ml))
+	app.Get("/v1/models", openai.ListModelsEndpoint(application.BackendLoader(), application.ModelLoader()))
+	app.Get("/models", openai.ListModelsEndpoint(application.BackendLoader(), application.ModelLoader()))
 }
--- a/core/http/routes/ui.go
+++ b/core/http/routes/ui.go
@@ -6,20 +6,21 @@ import (
 	"sort"
 	"strings"

-	"github.com/microcosm-cc/bluemonday"
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/gallery"
 	"github.com/mudler/LocalAI/core/http/elements"
 	"github.com/mudler/LocalAI/core/http/endpoints/localai"
+	"github.com/mudler/LocalAI/core/http/utils"
 	"github.com/mudler/LocalAI/core/p2p"
 	"github.com/mudler/LocalAI/core/services"
 	"github.com/mudler/LocalAI/internal"
 	"github.com/mudler/LocalAI/pkg/model"
 	"github.com/mudler/LocalAI/pkg/xsync"
-	"github.com/rs/zerolog/log"

 	"github.com/gofiber/fiber/v2"
 	"github.com/google/uuid"
+	"github.com/microcosm-cc/bluemonday"
+	"github.com/rs/zerolog/log"
 )

 type modelOpCache struct {
@@ -91,6 +92,7 @@ func RegisterUIRoutes(app *fiber.App,
 		app.Get("/p2p", func(c *fiber.Ctx) error {
 			summary := fiber.Map{
 				"Title":   "LocalAI - P2P dashboard",
+				"BaseURL": utils.BaseURL(c),
 				"Version": internal.PrintableVersion(),
 				//"Nodes":          p2p.GetAvailableNodes(""),
 				//"FederatedNodes": p2p.GetAvailableNodes(p2p.FederatedID),
@@ -149,6 +151,7 @@ func RegisterUIRoutes(app *fiber.App,

 			summary := fiber.Map{
 				"Title":            "LocalAI - Models",
+				"BaseURL":          utils.BaseURL(c),
 				"Version":          internal.PrintableVersion(),
 				"Models":           template.HTML(elements.ListModels(models, processingModels, galleryService)),
 				"Repositories":     appConfig.Galleries,
@@ -308,6 +311,7 @@ func RegisterUIRoutes(app *fiber.App,

 		summary := fiber.Map{
 			"Title":        "LocalAI - Chat with " + c.Params("model"),
+			"BaseURL":      utils.BaseURL(c),
 			"ModelsConfig": backendConfigs,
 			"Model":        c.Params("model"),
 			"Version":      internal.PrintableVersion(),
@@ -323,11 +327,12 @@ func RegisterUIRoutes(app *fiber.App,

 		if len(backendConfigs) == 0 {
 			// If no model is available redirect to the index which suggests how to install models
-			return c.Redirect("/")
+			return c.Redirect(utils.BaseURL(c))
 		}

 		summary := fiber.Map{
 			"Title":        "LocalAI - Talk",
+			"BaseURL":      utils.BaseURL(c),
 			"ModelsConfig": backendConfigs,
 			"Model":        backendConfigs[0],
 			"IsP2PEnabled": p2p.IsP2PEnabled(),
@@ -344,11 +349,12 @@ func RegisterUIRoutes(app *fiber.App,

 		if len(backendConfigs) == 0 {
 			// If no model is available redirect to the index which suggests how to install models
-			return c.Redirect("/")
+			return c.Redirect(utils.BaseURL(c))
 		}

 		summary := fiber.Map{
 			"Title":        "LocalAI - Chat with " + backendConfigs[0],
+			"BaseURL":      utils.BaseURL(c),
 			"ModelsConfig": backendConfigs,
 			"Model":        backendConfigs[0],
 			"Version":      internal.PrintableVersion(),
@@ -364,6 +370,7 @@ func RegisterUIRoutes(app *fiber.App,

 		summary := fiber.Map{
 			"Title":        "LocalAI - Generate images with " + c.Params("model"),
+			"BaseURL":      utils.BaseURL(c),
 			"ModelsConfig": backendConfigs,
 			"Model":        c.Params("model"),
 			"Version":      internal.PrintableVersion(),
@@ -380,11 +387,12 @@ func RegisterUIRoutes(app *fiber.App,

 		if len(backendConfigs) == 0 {
 			// If no model is available redirect to the index which suggests how to install models
-			return c.Redirect("/")
+			return c.Redirect(utils.BaseURL(c))
 		}

 		summary := fiber.Map{
 			"Title":        "LocalAI - Generate images with " + backendConfigs[0].Name,
+			"BaseURL":      utils.BaseURL(c),
 			"ModelsConfig": backendConfigs,
 			"Model":        backendConfigs[0].Name,
 			"Version":      internal.PrintableVersion(),
@@ -400,6 +408,7 @@ func RegisterUIRoutes(app *fiber.App,

 		summary := fiber.Map{
 			"Title":        "LocalAI - Generate images with " + c.Params("model"),
+			"BaseURL":      utils.BaseURL(c),
 			"ModelsConfig": backendConfigs,
 			"Model":        c.Params("model"),
 			"Version":      internal.PrintableVersion(),
@@ -416,11 +425,12 @@ func RegisterUIRoutes(app *fiber.App,

 		if len(backendConfigs) == 0 {
 			// If no model is available redirect to the index which suggests how to install models
-			return c.Redirect("/")
+			return c.Redirect(utils.BaseURL(c))
 		}

 		summary := fiber.Map{
 			"Title":        "LocalAI - Generate audio with " + backendConfigs[0].Name,
+			"BaseURL":      utils.BaseURL(c),
 			"ModelsConfig": backendConfigs,
 			"Model":        backendConfigs[0].Name,
 			"IsP2PEnabled": p2p.IsP2PEnabled(),
--- a/core/http/static/assets/font1.css
+++ b/core/http/static/assets/font1.css
@@ -7,33 +7,33 @@ https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700&family=Roboto:wg
  font-style: normal;
  font-weight: 400;
  font-display: swap;
-  src: url(/static/assets/UcCO3FwrK3iLTeHuS_fvQtMwCp50KnMw2boKoduKmMEVuLyfMZg.ttf) format('truetype');
+  src: url(./UcCO3FwrK3iLTeHuS_fvQtMwCp50KnMw2boKoduKmMEVuLyfMZg.ttf) format('truetype');
 }
@font-face {
  font-family: 'Inter';
  font-style: normal;
  font-weight: 600;
  font-display: swap;
-  src: url(/static/assets/UcCO3FwrK3iLTeHuS_fvQtMwCp50KnMw2boKoduKmMEVuGKYMZg.ttf) format('truetype');
+  src: url(./UcCO3FwrK3iLTeHuS_fvQtMwCp50KnMw2boKoduKmMEVuGKYMZg.ttf) format('truetype');
 }
@font-face {
  font-family: 'Inter';
  font-style: normal;
  font-weight: 700;
  font-display: swap;
-  src: url(/static/assets/UcCO3FwrK3iLTeHuS_fvQtMwCp50KnMw2boKoduKmMEVuFuYMZg.ttf) format('truetype');
+  src: url(./UcCO3FwrK3iLTeHuS_fvQtMwCp50KnMw2boKoduKmMEVuFuYMZg.ttf) format('truetype');
 }
@font-face {
  font-family: 'Roboto';
  font-style: normal;
  font-weight: 400;
  font-display: swap;
-  src: url(/static/assets/KFOmCnqEu92Fr1Me5Q.ttf) format('truetype');
+  src: url(./KFOmCnqEu92Fr1Me5Q.ttf) format('truetype');
 }
@font-face {
  font-family: 'Roboto';
  font-style: normal;
  font-weight: 500;
  font-display: swap;
-  src: url(/static/assets/KFOlCnqEu92Fr1MmEU9vAw.ttf) format('truetype');
+  src: url(./KFOlCnqEu92Fr1MmEU9vAw.ttf) format('truetype');
 }
--- a/core/http/static/assets/font2.css
+++ b/core/http/static/assets/font2.css
@@ -7,33 +7,33 @@ https://fonts.googleapis.com/css?family=Roboto:300,400,500,700,900&display=swap
  font-style: normal;
  font-weight: 300;
  font-display: swap;
-  src: url(/static/assets//KFOlCnqEu92Fr1MmSU5fBBc9.ttf) format('truetype');
+  src: url(./KFOlCnqEu92Fr1MmSU5fBBc9.ttf) format('truetype');
 }
@font-face {
  font-family: 'Roboto';
  font-style: normal;
  font-weight: 400;
  font-display: swap;
-  src: url(/static/assets//KFOmCnqEu92Fr1Mu4mxP.ttf) format('truetype');
+  src: url(./KFOmCnqEu92Fr1Mu4mxP.ttf) format('truetype');
 }
@font-face {
  font-family: 'Roboto';
  font-style: normal;
  font-weight: 500;
  font-display: swap;
-  src: url(/static/assets//KFOlCnqEu92Fr1MmEU9fBBc9.ttf) format('truetype');
+  src: url(./KFOlCnqEu92Fr1MmEU9fBBc9.ttf) format('truetype');
 }
@font-face {
  font-family: 'Roboto';
  font-style: normal;
  font-weight: 700;
  font-display: swap;
-  src: url(/static/assets//KFOlCnqEu92Fr1MmWUlfBBc9.ttf) format('truetype');
+  src: url(./KFOlCnqEu92Fr1MmWUlfBBc9.ttf) format('truetype');
 }
@font-face {
  font-family: 'Roboto';
  font-style: normal;
  font-weight: 900;
  font-display: swap;
-  src: url(/static/assets//KFOlCnqEu92Fr1MmYUtfBBc9.ttf) format('truetype');
+  src: url(./KFOlCnqEu92Fr1MmYUtfBBc9.ttf) format('truetype');
 }
--- a/core/http/static/chat.js
+++ b/core/http/static/chat.js
@@ -143,7 +143,7 @@ function readInputImage() {
    // }

    // Source: https://stackoverflow.com/a/75751803/11386095
-    const response = await fetch("/v1/chat/completions", {
+    const response = await fetch("v1/chat/completions", {
      method: "POST",
      headers: {
        Authorization: `Bearer ${key}`,
--- a/core/http/static/image.js
+++ b/core/http/static/image.js
@@ -48,7 +48,7 @@ async function promptDallE(key, input) {
  document.getElementById("input").disabled = true;

  const model = document.getElementById("image-model").value;
-  const response = await fetch("/v1/images/generations", {
+  const response = await fetch("v1/images/generations", {
    method: "POST",
    headers: {
      Authorization: `Bearer ${key}`,
--- a/core/http/static/talk.js
+++ b/core/http/static/talk.js
@@ -122,7 +122,7 @@ async function sendAudioToWhisper(audioBlob) {
    formData.append('model', getWhisperModel());
    API_KEY = localStorage.getItem("key");

-    const response = await fetch('/v1/audio/transcriptions', {
+    const response = await fetch('v1/audio/transcriptions', {
        method: 'POST',
        headers: {
            'Authorization': `Bearer ${API_KEY}`
@@ -139,7 +139,7 @@ async function sendTextToChatGPT(text) {
    conversationHistory.push({ role: "user", content: text });
    API_KEY = localStorage.getItem("key");

-    const response = await fetch('/v1/chat/completions', {
+    const response = await fetch('v1/chat/completions', {
        method: 'POST',
        headers: {
            'Authorization': `Bearer ${API_KEY}`,
@@ -163,7 +163,7 @@ async function sendTextToChatGPT(text) {
 async function getTextToSpeechAudio(text) {
    API_KEY = localStorage.getItem("key");

-    const response = await fetch('/v1/audio/speech', {
+    const response = await fetch('v1/audio/speech', {
        
        method: 'POST',
        headers: {
--- a/core/http/static/tts.js
+++ b/core/http/static/tts.js
@@ -19,7 +19,7 @@ async function tts(key, input) {
  document.getElementById("input").disabled = true;

  const model = document.getElementById("tts-model").value;
-  const response = await fetch("/tts", {
+  const response = await fetch("tts", {
    method: "POST",
    headers: {
      Authorization: `Bearer ${key}`,
--- a/core/http/utils/baseurl.go
+++ b/core/http/utils/baseurl.go
@@ -0,0 +1,24 @@
+package utils
+
+import (
+	"strings"
+
+	"github.com/gofiber/fiber/v2"
+)
+
+// BaseURL returns the base URL for the given HTTP request context.
+// It takes into account that the app may be exposed by a reverse-proxy under a different protocol, host and path.
+// The returned URL is guaranteed to end with `/`.
+// The method should be used in conjunction with the StripPathPrefix middleware.
+func BaseURL(c *fiber.Ctx) string {
+	path := c.Path()
+	origPath := c.OriginalURL()
+
+	if path != origPath && strings.HasSuffix(origPath, path) {
+		pathPrefix := origPath[:len(origPath)-len(path)+1]
+
+		return c.BaseURL() + pathPrefix
+	}
+
+	return c.BaseURL() + "/"
+}
--- a/core/http/utils/baseurl_test.go
+++ b/core/http/utils/baseurl_test.go
@@ -0,0 +1,48 @@
+package utils
+
+import (
+	"net/http/httptest"
+	"testing"
+
+	"github.com/gofiber/fiber/v2"
+	"github.com/stretchr/testify/require"
+)
+
+func TestBaseURL(t *testing.T) {
+	for _, tc := range []struct {
+		name      string
+		prefix    string
+		expectURL string
+	}{
+		{
+			name:      "without prefix",
+			prefix:    "/",
+			expectURL: "http://example.com/",
+		},
+		{
+			name:      "with prefix",
+			prefix:    "/myprefix/",
+			expectURL: "http://example.com/myprefix/",
+		},
+	} {
+		t.Run(tc.name, func(t *testing.T) {
+			app := fiber.New()
+			actualURL := ""
+
+			app.Get(tc.prefix+"hello/world", func(c *fiber.Ctx) error {
+				if tc.prefix != "/" {
+					c.Path("/hello/world")
+				}
+				actualURL = BaseURL(c)
+				return nil
+			})
+
+			req := httptest.NewRequest("GET", tc.prefix+"hello/world", nil)
+			resp, err := app.Test(req, -1)
+
+			require.NoError(t, err)
+			require.Equal(t, 200, resp.StatusCode, "response status code")
+			require.Equal(t, tc.expectURL, actualURL, "base URL")
+		})
+	}
+}
--- a/core/http/views/404.html
+++ b/core/http/views/404.html
@@ -12,7 +12,7 @@
        <div class="header text-center py-12">
            <h1 class="text-5xl font-bold">Welcome to your LocalAI instance!</h1>
            <div class="mt-6">
-         <!--       <a href="/" aria-label="HomePage" alt="HomePage">           
+         <!--       <a href="./" aria-label="HomePage" alt="HomePage">
                    <img class="mx-auto w-1/4 h-auto" src="https://github.com/go-skynet/LocalAI/assets/2420543/0966aa2a-166e-4f99-a3e5-6c915fc997dd" alt="LocalAI Logo">            
                </a>
            -->
--- a/core/http/views/chat.html
+++ b/core/http/views/chat.html
@@ -28,7 +28,7 @@ SOFTWARE.
 <!doctype html>
 <html lang="en">
  {{template "views/partials/head" .}}
-  <script defer src="/static/chat.js"></script>
+  <script defer src="static/chat.js"></script>
  <style>
    body {
        overflow: hidden; 
@@ -101,9 +101,9 @@ SOFTWARE.
        {{ $model:=.Model}}
        {{ range .ModelsConfig }}
        {{ if eq . $model }}
-        <option value="/chat/{{.}}" selected  class="bg-gray-700 text-white">{{.}}</option>
+        <option value="chat/{{.}}" selected  class="bg-gray-700 text-white">{{.}}</option>
        {{ else }}
-        <option value="/chat/{{.}}" class="bg-gray-700 text-white">{{.}}</option>
+        <option value="chat/{{.}}" class="bg-gray-700 text-white">{{.}}</option>
        {{ end }}
        {{ end }}
      </select>
@@ -142,7 +142,7 @@ SOFTWARE.
      <div id="loader" class="my-2 loader" style="display: none;"></div>
      <input id="chat-model" type="hidden" value="{{.Model}}">
      <input id="input_image" type="file" style="display: none;" @change="fileName = $event.target.files[0].name">
-      <form id="prompt" action="/chat/{{.Model}}" method="get" @submit.prevent="submitPrompt">
+      <form id="prompt" action="chat/{{.Model}}" method="get" @submit.prevent="submitPrompt">
          <div class="relative w-full">
              <textarea
                  id="input"
--- a/core/http/views/explorer.html
+++ b/core/http/views/explorer.html
@@ -370,7 +370,7 @@
                }
            }
        </script>
-        <script src="/static/p2panimation.js"></script>
+        <script src="static/p2panimation.js"></script>

        {{template "views/partials/footer" .}}
    </div>
--- a/core/http/views/index.html
+++ b/core/http/views/index.html
@@ -20,7 +20,7 @@
            {{template "views/partials/inprogress" .}}
            {{ if eq (len .ModelsConfig) 0 }}
            <h2 class="text-center text-3xl font-semibold text-gray-100"> <i class="text-yellow-200 ml-2 fa-solid fa-triangle-exclamation animate-pulse"></i> Ouch! seems you don't have any models installed from the LocalAI gallery!</h2>
-            <p class="text-center mt-4 text-xl">..install something from the <a class="text-gray-400 hover:text-white ml-1 px-3 py-2 rounded" href="/browse">🖼️ Gallery</a> or check the <a href="https://localai.io/basics/getting_started/" class="text-gray-400 hover:text-white ml-1 px-3 py-2 rounded"> <i class="fa-solid fa-book"></i> Getting started documentation </a></p>
+            <p class="text-center mt-4 text-xl">..install something from the <a class="text-gray-400 hover:text-white ml-1 px-3 py-2 rounded" href="browse">🖼️ Gallery</a> or check the <a href="https://localai.io/basics/getting_started/" class="text-gray-400 hover:text-white ml-1 px-3 py-2 rounded"> <i class="fa-solid fa-book"></i> Getting started documentation </a></p>

            {{ if ne (len .Models) 0 }}
            <hr class="my-4">
@@ -66,7 +66,7 @@
                        {{ end }}
                    </td>
                    <td class="px-4 py-3 font-bold">
-                        <p class="font-bold text-white flex items-center"><i class="fas fa-brain pr-2"></i><a href="/browse?term={{.Name}}">{{.Name}}</a></p>
+                        <p class="font-bold text-white flex items-center"><i class="fas fa-brain pr-2"></i><a href="browse?term={{.Name}}">{{.Name}}</a></p>
                    </td>
                    <td class="px-4 py-3 font-bold">
                        {{ if .Backend }}
@@ -84,7 +84,7 @@
                    <td class="px-4 py-3">
                        <button
                            class="float-right inline-block rounded bg-red-800 px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-red-accent-300 hover:shadow-red-2 focus:bg-red-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-red-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong"
-                            data-twe-ripple-color="light" data-twe-ripple-init="" hx-confirm="Are you sure you wish to delete the model?" hx-post="/browse/delete/model/{{.Name}}" hx-swap="outerHTML"><i class="fa-solid fa-cancel pr-2"></i>Delete</button>
+                            data-twe-ripple-color="light" data-twe-ripple-init="" hx-confirm="Are you sure you wish to delete the model?" hx-post="browse/delete/model/{{.Name}}" hx-swap="outerHTML"><i class="fa-solid fa-cancel pr-2"></i>Delete</button>
                    </td>
                {{ end }}
                {{ range .Models }}
--- a/core/http/views/login.html
+++ b/core/http/views/login.html
@@ -4,6 +4,8 @@
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Open Authenticated Website</title>
+    <base href="{{.BaseURL}}" />
+    <link rel="icon" type="image/x-icon" href="favicon.ico" />
 </head>
 <body>
    <h1>Authorization is required</h1>
--- a/core/http/views/models.html
+++ b/core/http/views/models.html
@@ -16,38 +16,38 @@

            <div class="text-center font-semibold text-gray-100">
                <h2>Filter by type:</h2>
-                <button  hx-post="/browse/search/models"
+                <button  hx-post="browse/search/models"
                    class="text-white-500 inline-block bg-blue-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2 hover:bg-gray-300 hover:shadow-gray-2"
                    hx-target="#search-results" 
                    hx-vals='{"search": "tts"}'
                hx-indicator=".htmx-indicator" >TTS</button> 
-                <button  hx-post="/browse/search/models" 
+                <button  hx-post="browse/search/models" 
                    class="text-white-500 inline-block bg-blue-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2 hover:bg-gray-300 hover:shadow-gray-2"
                    hx-target="#search-results" 
                    hx-vals='{"search": "stablediffusion"}'
                hx-indicator=".htmx-indicator" >Image generation</button> 
-                <button  hx-post="/browse/search/models" \
+                <button  hx-post="browse/search/models" \
                    class="text-white-500 inline-block bg-blue-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2 hover:bg-gray-300 hover:shadow-gray-2"
                    hx-target="#search-results" 
                    hx-vals='{"search": "llm"}'
                hx-indicator=".htmx-indicator" >Text generation</button> 
-                <button  hx-post="/browse/search/models" 
+                <button  hx-post="browse/search/models" 
                    class="text-white-500 inline-block bg-blue-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2 hover:bg-gray-300 hover:shadow-gray-2"
                    hx-target="#search-results" 
                    hx-vals='{"search": "multimodal"}'
                hx-indicator=".htmx-indicator" >Multimodal</button> 
-                <button  hx-post="/browse/search/models" 
+                <button  hx-post="browse/search/models" 
                    class="text-white-500 inline-block bg-blue-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2 hover:bg-gray-300 hover:shadow-gray-2"
                    hx-target="#search-results" 
                    hx-vals='{"search": "embedding"}'
                hx-indicator=".htmx-indicator" >Embeddings</button>
-                <button  hx-post="/browse/search/models"
+                <button  hx-post="browse/search/models"
                    class="text-white-500 inline-block bg-blue-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2 hover:bg-gray-300 hover:shadow-gray-2"
                    hx-target="#search-results" 
                    hx-vals='{"search": "rerank"}'
                hx-indicator=".htmx-indicator" >Rerankers</button> 
                <button  
-                    hx-post="/browse/search/models"
+                    hx-post="browse/search/models"
                    class="text-white-500 inline-block bg-blue-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2 hover:bg-gray-300 hover:shadow-gray-2"
                    hx-target="#search-results" 
                    hx-vals='{"search": "whisper"}'
@@ -57,7 +57,7 @@
            <div class="text-center text-xs font-semibold text-gray-100">
                Filter by tags:
                {{ range .AllTags }}
-                    <button  hx-post="/browse/search/models" class="text-blue-500" hx-target="#search-results" 
+                    <button  hx-post="browse/search/models" class="text-blue-500" hx-target="#search-results" 
                    hx-vals='{"search": "{{.}}"}'
                    hx-indicator=".htmx-indicator" >{{.}}</button> 
                {{ end }}
@@ -69,7 +69,7 @@

            <input class="form-control appearance-none block w-full mt-5 px-3 py-2 text-base font-normal text-gray-300 pb-2 mb-5 bg-gray-800 bg-clip-padding border border-solid border-gray-600 rounded transition ease-in-out m-0 focus:text-gray-300 focus:bg-gray-900 focus:border-blue-500 focus:outline-none" type="search" 
                name="search" placeholder="Begin Typing To Search models..." 
-                hx-post="/browse/search/models" 
+                hx-post="browse/search/models" 
                hx-trigger="input changed delay:500ms, search" 
                hx-target="#search-results" 
                hx-indicator=".htmx-indicator">
--- a/core/http/views/p2p.html
+++ b/core/http/views/p2p.html
@@ -48,11 +48,11 @@
            <!-- Federation Box -->
            <div class="bg-gray-800 p-6 rounded-lg shadow-lg mb-12 text-left">

-                <p class="text-xl font-semibold text-gray-200"> <i class="text-gray-200 fa-solid fa-circle-nodes"></i> Federated Nodes: <span hx-get="/p2p/ui/workers-federation-stats" hx-trigger="every 1s"></span> </p>
+                <p class="text-xl font-semibold text-gray-200"> <i class="text-gray-200 fa-solid fa-circle-nodes"></i> Federated Nodes: <span hx-get="p2p/ui/workers-federation-stats" hx-trigger="every 1s"></span> </p>
                <p class="mb-4">You can start LocalAI in federated mode to share your instance, or start the federated server to balance requests between nodes of the federation.</p>

                <div class="grid grid-cols-1 sm:grid-cols-2 md:grid-cols-3 gap-4 mb-12">
-                    <div hx-get="/p2p/ui/workers-federation" hx-trigger="every 1s"></div>
+                    <div hx-get="p2p/ui/workers-federation" hx-trigger="every 1s"></div>
                </div>

                <hr class="border-gray-700 mb-12">
@@ -123,11 +123,11 @@

            <div class="bg-gray-800 p-6 rounded-lg shadow-lg mb-12 text-left">

-                <p class="text-xl font-semibold text-gray-200"> <i class="text-gray-200 fa-solid fa-circle-nodes"></i> Workers (llama.cpp): <span hx-get="/p2p/ui/workers-stats" hx-trigger="every 1s"></span> </p>
+                <p class="text-xl font-semibold text-gray-200"> <i class="text-gray-200 fa-solid fa-circle-nodes"></i> Workers (llama.cpp): <span hx-get="p2p/ui/workers-stats" hx-trigger="every 1s"></span> </p>
                <p class="mb-4">You can start llama.cpp workers to distribute weights between the workers and offload part of the computation. To start a new worker, you can use the CLI or Docker.</p>

                <div class="grid grid-cols-1 sm:grid-cols-2 md:grid-cols-3 gap-4 mb-12">
-                    <div hx-get="/p2p/ui/workers" hx-trigger="every 1s"></div>
+                    <div hx-get="p2p/ui/workers" hx-trigger="every 1s"></div>
                </div>
                <hr class="border-gray-700 mb-12">

@@ -177,7 +177,7 @@

    {{template "views/partials/footer" .}}
 </div>
-<script src="/static/p2panimation.js"></script>
+<script src="static/p2panimation.js"></script>
 <style>
    .token {
        word-break: break-all;
--- a/core/http/views/partials/footer.html
+++ b/core/http/views/partials/footer.html
@@ -2,4 +2,4 @@
    LocalAI Version {{.Version}}<br>
    <a href='https://github.com/mudler/LocalAI' class="text-blue-400 hover:text-blue-600" target="_blank">LocalAI</a> © 2023-2024 <a href='https://mudler.pm' class="text-blue-400 hover:text-blue-600" target="_blank">Ettore Di Giacinto</a>
 </footer>
-<script src="/static/assets/tw-elements.js"></script>
+<script src="static/assets/tw-elements.js"></script>
--- a/core/http/views/partials/head.html
+++ b/core/http/views/partials/head.html
@@ -2,33 +2,35 @@
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>{{.Title}}</title>
+    <base href="{{.BaseURL}}" />
+    <link rel="icon" type="image/x-icon" href="favicon.ico" />
    <link
    rel="stylesheet"
-    href="/static/assets/highlightjs.css"
+    href="static/assets/highlightjs.css"
  />
-  <script defer src="/static/assets/highlightjs.js"></script>
+  <script defer src="static/assets/highlightjs.js"></script>
    <script
    defer
-    src="/static/assets/alpine.js"
+    src="static/assets/alpine.js"
  ></script>
  <script
    defer
-    src="/static/assets/marked.js"
+    src="static/assets/marked.js"
  ></script>
  <script
    defer
-    src="/static/assets/purify.js"
+    src="static/assets/purify.js"
  ></script>

-  <link href="/static/general.css" rel="stylesheet" />
-    <link href="/static/assets/font1.css" rel="stylesheet">
+  <link href="static/general.css" rel="stylesheet" />
+    <link href="static/assets/font1.css" rel="stylesheet">
    <link
-    href="/static/assets/font2.css"
+    href="static/assets/font2.css"
    rel="stylesheet" />
  <link
    rel="stylesheet"
-    href="/static/assets/tw-elements.css" />
-  <script src="/static/assets/tailwindcss.js"></script>
+    href="static/assets/tw-elements.css" />
+  <script src="static/assets/tailwindcss.js"></script>
  <script>
    tailwind.config = {
      darkMode: "class",
@@ -54,11 +56,11 @@
      });
    }
  </script>
-  <link href="/static/assets/fontawesome/css/fontawesome.css" rel="stylesheet" />
-  <link href="/static/assets/fontawesome/css/brands.css" rel="stylesheet" />
-  <link href="/static/assets/fontawesome/css/solid.css" rel="stylesheet" />
-  <script src="/static/assets/flowbite.min.js"></script>
-  <script src="/static/assets/htmx.js" crossorigin="anonymous"></script>
+  <link href="static/assets/fontawesome/css/fontawesome.css" rel="stylesheet" />
+  <link href="static/assets/fontawesome/css/brands.css" rel="stylesheet" />
+  <link href="static/assets/fontawesome/css/solid.css" rel="stylesheet" />
+  <script src="static/assets/flowbite.min.js"></script>
+  <script src="static/assets/htmx.js" crossorigin="anonymous"></script>
  <!-- P2P Animation START -->
  <style>
    .animation-container {
--- a/core/http/views/partials/inprogress.html
+++ b/core/http/views/partials/inprogress.html
@@ -17,13 +17,13 @@

      <div class="flex items-center justify-between bg-slate-600 p-2 mb-2 rounded-md">
         <div class="flex items center">
-             <span class="text-gray-300"><a href="/browse?term={{$parts._1}}"
+             <span class="text-gray-300"><a href="browse?term={{$parts._1}}"
                 class="text-white-500 inline-block bg-blue-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2 hover:bg-gray-300 hover:shadow-gray-2"
                 >{{$modelName}}</a> {{if $repository}} (from the '{{$repository}}' repository) {{end}}</span>
         </div>
-         <div hx-get="/browse/job/{{$value}}" hx-swap="outerHTML" hx-target="this" hx-trigger="done">
+         <div hx-get="browse/job/{{$value}}" hx-swap="outerHTML" hx-target="this" hx-trigger="done">
             <h3 role="status" id="pblabel" >{{$op}}
-                 <div hx-get="/browse/job/progress/{{$value}}" hx-trigger="every 600ms" 
+                 <div hx-get="browse/job/progress/{{$value}}" hx-trigger="every 600ms" 
                 hx-target="this"
                 hx-swap="innerHTML"  ></div></h3>
         </div>     
--- a/core/http/views/partials/navbar.html
+++ b/core/http/views/partials/navbar.html
@@ -3,8 +3,8 @@
        <div class="flex items-center justify-between">
            <div class="flex items-center">
                <!-- Logo Image: Replace 'logo_url_here' with your actual logo URL -->
-                <a href="/" class="text-white text-xl font-bold"><img src="https://github.com/go-skynet/LocalAI/assets/2420543/0966aa2a-166e-4f99-a3e5-6c915fc997dd" alt="LocalAI Logo" class="h-10 mr-3 border-2 border-gray-300 shadow rounded"></a>
-                <a href="/" class="text-white text-xl font-bold">LocalAI</a>
+                <a href="./" class="text-white text-xl font-bold"><img src="https://github.com/go-skynet/LocalAI/assets/2420543/0966aa2a-166e-4f99-a3e5-6c915fc997dd" alt="LocalAI Logo" class="h-10 mr-3 border-2 border-gray-300 shadow rounded"></a>
+                <a href="./" class="text-white text-xl font-bold">LocalAI</a>
            </div>
            <!-- Menu button for small screens -->
            <div class="lg:hidden">
@@ -14,33 +14,33 @@
            </div>
            <!-- Navigation links -->
            <div class="hidden lg:flex lg:items-center lg:justify-end lg:flex-1 lg:w-0">
-                <a href="/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-home pr-2"></i>Home</a>
+                <a href="./" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-home pr-2"></i>Home</a>
                <a href="https://localai.io" class="text-gray-400 hover:text-white px-3 py-2 rounded" target="_blank" ><i class="fas fa-book-reader pr-2"></i> Documentation</a>
-                <a href="/browse/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-brain pr-2"></i> Models</a>
-                <a href="/chat/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fa-solid fa-comments pr-2"></i> Chat</a>
-                <a href="/text2image/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-image pr-2"></i> Generate images</a>
-                <a href="/tts/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fa-solid fa-music pr-2"></i> TTS </a>
-                <a href="/talk/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fa-solid fa-phone pr-2"></i> Talk </a>
+                <a href="browse/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-brain pr-2"></i> Models</a>
+                <a href="chat/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fa-solid fa-comments pr-2"></i> Chat</a>
+                <a href="text2image/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-image pr-2"></i> Generate images</a>
+                <a href="tts/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fa-solid fa-music pr-2"></i> TTS </a>
+                <a href="talk/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fa-solid fa-phone pr-2"></i> Talk </a>
                {{ if .IsP2PEnabled }}
-                <a href="/p2p/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fa-solid fa-circle-nodes"></i> Swarm </a>
+                <a href="p2p/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fa-solid fa-circle-nodes"></i> Swarm </a>
                {{ end }}
-                <a href="/swagger/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-code pr-2"></i> API</a>
+                <a href="swagger/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-code pr-2"></i> API</a>
            </div>
        </div>
        <!-- Collapsible menu for small screens -->
        <div class="hidden lg:hidden" id="mobile-menu">
            <div class="pt-4 pb-3 border-t border-gray-700">
-                <a href="/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fas fa-home pr-2"></i>Home</a>
+                <a href="./" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fas fa-home pr-2"></i>Home</a>
                <a href="https://localai.io" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1" target="_blank" ><i class="fas fa-book-reader pr-2"></i> Documentation</a>
-                <a href="/browse/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fas fa-brain pr-2"></i> Models</a>
-                <a href="/chat/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fa-solid fa-comments pr-2"></i> Chat</a>
-                <a href="/text2image/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fas fa-image pr-2"></i> Generate images</a>
-                <a href="/tts/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fa-solid fa-music pr-2"></i> TTS </a>
-                <a href="/talk/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fa-solid fa-phone pr-2"></i> Talk </a>
+                <a href="browse/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fas fa-brain pr-2"></i> Models</a>
+                <a href="chat/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fa-solid fa-comments pr-2"></i> Chat</a>
+                <a href="text2image/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fas fa-image pr-2"></i> Generate images</a>
+                <a href="tts/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fa-solid fa-music pr-2"></i> TTS </a>
+                <a href="talk/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fa-solid fa-phone pr-2"></i> Talk </a>
                {{ if .IsP2PEnabled }}
-                <a href="/p2p/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fa-solid fa-circle-nodes"></i> Swarm </a>
+                <a href="p2p/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fa-solid fa-circle-nodes"></i> Swarm </a>
                {{ end }}
-                <a href="/swagger/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fas fa-code pr-2"></i> API</a>
+                <a href="swagger/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fas fa-code pr-2"></i> API</a>
            </div>
        </div>
    </div>
--- a/core/http/views/partials/navbar_explorer.html
+++ b/core/http/views/partials/navbar_explorer.html
@@ -3,8 +3,8 @@
        <div class="flex items-center justify-between">
            <div class="flex items-center">
                <!-- Logo Image: Replace 'logo_url_here' with your actual logo URL -->
-                <a href="/" class="text-white text-xl font-bold"><img src="https://github.com/go-skynet/LocalAI/assets/2420543/0966aa2a-166e-4f99-a3e5-6c915fc997dd" alt="LocalAI Logo" class="h-10 mr-3 border-2 border-gray-300 shadow rounded"></a>
-                <a href="/" class="text-white text-xl font-bold">LocalAI</a>
+                <a href="./" class="text-white text-xl font-bold"><img src="https://github.com/go-skynet/LocalAI/assets/2420543/0966aa2a-166e-4f99-a3e5-6c915fc997dd" alt="LocalAI Logo" class="h-10 mr-3 border-2 border-gray-300 shadow rounded"></a>
+                <a href="./" class="text-white text-xl font-bold">LocalAI</a>
            </div>
            <!-- Menu button for small screens -->
            <div class="lg:hidden">
@@ -14,7 +14,7 @@
            </div>
            <!-- Navigation links -->
            <div class="hidden lg:flex lg:items-center lg:justify-end lg:flex-1 lg:w-0">
-                <a href="/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-home pr-2"></i>Home</a>
+                <a href="./" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-home pr-2"></i>Home</a>
                <a href="https://localai.io" class="text-gray-400 hover:text-white px-3 py-2 rounded" target="_blank" ><i class="fas fa-book-reader pr-2"></i> Documentation</a>
                <a href="https://models.localai.io/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-brain pr-2"></i> Models</a>
            </div>
@@ -22,7 +22,7 @@
        <!-- Collapsible menu for small screens -->
        <div class="hidden lg:hidden" id="mobile-menu">
            <div class="pt-4 pb-3 border-t border-gray-700">
-                <a href="/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fas fa-home pr-2"></i>Home</a>
+                <a href="./" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fas fa-home pr-2"></i>Home</a>
                <a href="https://localai.io" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1" target="_blank" ><i class="fas fa-book-reader pr-2"></i> Documentation</a>
                <a href="https://models.localai.io/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-brain pr-2"></i> Models</a>
            </div>
--- a/core/http/views/talk.html
+++ b/core/http/views/talk.html
@@ -1,7 +1,7 @@
 <!doctype html>
 <html lang="en">
  {{template "views/partials/head" .}}
-  <script defer src="/static/talk.js"></script>
+  <script defer src="static/talk.js"></script>
  <style>
    body {
        overflow: hidden; 
--- a/core/http/views/text2image.html
+++ b/core/http/views/text2image.html
@@ -1,7 +1,7 @@
 <!DOCTYPE html>
 <html lang="en">
 {{template "views/partials/head" .}}
-<script defer src="/static/image.js"></script>
+<script defer src="static/image.js"></script>

 <body class="bg-gray-900 text-gray-200">
 <div class="flex flex-col min-h-screen">
@@ -50,9 +50,9 @@
                {{ $model:=.Model}}
                {{ range .ModelsConfig }}
                {{ if eq .Name $model }}
-                <option value="/text2image/{{.Name}}" selected class="bg-gray-700 text-white">{{.Name}}</option>
+                <option value="text2image/{{.Name}}" selected class="bg-gray-700 text-white">{{.Name}}</option>
                {{ else }}
-                <option value="/text2image/{{.Name}}" class="bg-gray-700 text-white">{{.Name}}</option>
+                <option value="text2image/{{.Name}}" class="bg-gray-700 text-white">{{.Name}}</option>
                {{ end }}
                {{ end }}
              </select>
@@ -62,7 +62,7 @@

            <div class="mt-12">
              <input id="image-model" type="hidden" value="{{.Model}}">
-              <form id="genimage" action="/text2image/{{.Model}}" method="get">
+              <form id="genimage" action="text2image/{{.Model}}" method="get">
                <input
                  type="text"
                  id="input"
--- a/core/http/views/tts.html
+++ b/core/http/views/tts.html
@@ -1,7 +1,7 @@
 <!DOCTYPE html>
 <html lang="en">
 {{template "views/partials/head" .}}
-<script defer src="/static/tts.js"></script>
+<script defer src="static/tts.js"></script>

 <body class="bg-gray-900 text-gray-200">
 <div class="flex flex-col min-h-screen">
@@ -47,9 +47,9 @@
                {{ $model:=.Model}}
                {{ range .ModelsConfig }}
                {{ if eq .Name $model }}
-                <option value="/tts/{{.Name}}" selected class="bg-gray-700 text-white">{{.Name}}</option>
+                <option value="tts/{{.Name}}" selected class="bg-gray-700 text-white">{{.Name}}</option>
                {{ else }}
-                <option value="/tts/{{.Name}}" class="bg-gray-700 text-white">{{.Name}}</option>
+                <option value="tts/{{.Name}}" class="bg-gray-700 text-white">{{.Name}}</option>
                {{ end }}
                {{ end }}
              </select>
@@ -59,7 +59,7 @@

            <div class="mt-12">
              <input id="tts-model" type="hidden" value="{{.Model}}">
-              <form id="tts" action="/tts/{{.Model}}" method="get">
+              <form id="tts" action="tts/{{.Model}}" method="get">
                <input
                  type="text"
                  id="input"
--- a/docs/content/docs/features/distributed_inferencing.md
+++ b/docs/content/docs/features/distributed_inferencing.md
@@ -129,6 +129,9 @@ There are options that can be tweaked or parameters that can be set using enviro

 | Environment Variable | Description |
 |----------------------|-------------|
+| **LOCALAI_P2P** | Set to "true" to enable p2p |
+| **LOCALAI_FEDERATED** | Set to "true" to enable federated mode |
+| **FEDERATED_SERVER** | Set to "true" to enable federated server |
 | **LOCALAI_P2P_DISABLE_DHT** | Set to "true" to disable DHT and enable p2p layer to be local only (mDNS) |
 | **LOCALAI_P2P_ENABLE_LIMITS** | Set to "true" to enable connection limits and resources management (useful when running with poor connectivity or want to limit resources consumption) |
 | **LOCALAI_P2P_LISTEN_MADDRS** | Set to comma separated list of multiaddresses to override default libp2p 0.0.0.0 multiaddresses |
--- a/docs/content/docs/getting-started/kubernetes.md
+++ b/docs/content/docs/getting-started/kubernetes.md
@@ -10,13 +10,13 @@ ico = "rocket_launch"
 For installing LocalAI in Kubernetes, the deployment file from the `examples` can be used and customized as prefered:

 ```
-kubectl apply -f https://raw.githubusercontent.com/mudler/LocalAI/master/examples/kubernetes/deployment.yaml
+kubectl apply -f https://raw.githubusercontent.com/mudler/LocalAI-examples/refs/heads/main/kubernetes/deployment.yaml
 ```

 For Nvidia GPUs:

 ```
-kubectl apply -f https://raw.githubusercontent.com/mudler/LocalAI/master/examples/kubernetes/deployment-nvidia.yaml
+kubectl apply -f https://raw.githubusercontent.com/mudler/LocalAI-examples/refs/heads/main/kubernetes/deployment-nvidia.yaml
 ```

 Alternatively, the [helm chart](https://github.com/go-skynet/helm-charts) can be used as well:
--- a/docs/content/docs/reference/compatibility-table.md
+++ b/docs/content/docs/reference/compatibility-table.md
@@ -6,7 +6,7 @@ weight = 24
 url = "/model-compatibility/"
 +++

-Besides llama based models, LocalAI is compatible also with other architectures. The table below lists all the compatible models families and the associated binding repository.
+Besides llama based models, LocalAI is compatible also with other architectures. The table below lists all the backends, compatible models families and the associated repository.

 {{% alert note %}}

@@ -16,19 +16,8 @@ LocalAI will attempt to automatically load models which are not explicitly confi

 | Backend and Bindings                                                             | Compatible models     | Completion/Chat endpoint | Capability | Embeddings support                | Token stream support | Acceleration |
 |----------------------------------------------------------------------------------|-----------------------|--------------------------|---------------------------|-----------------------------------|----------------------|--------------|
-| [llama.cpp]({{%relref "docs/features/text-generation#llama.cpp" %}})        | Vicuna, Alpaca, LLaMa, Falcon, Starcoder, GPT-2, [and many others](https://github.com/ggerganov/llama.cpp?tab=readme-ov-file#description) | yes                      | GPT and Functions                        | yes** | yes                  | CUDA, openCL, cuBLAS, Metal |
-| [gpt4all-llama](https://github.com/nomic-ai/gpt4all)      | Vicuna, Alpaca, LLaMa | yes                      | GPT                        | no                                | yes                  | N/A  |
-| [gpt4all-mpt](https://github.com/nomic-ai/gpt4all)          | MPT                   | yes                      | GPT                        | no                                | yes                  | N/A  |
-| [gpt4all-j](https://github.com/nomic-ai/gpt4all)           | GPT4ALL-J             | yes                      | GPT                        | no                                | yes                  | N/A  |
-| [falcon-ggml](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp))        | Falcon (*)             | yes                      | GPT                        | no                                | no                   | N/A |
-| [dolly](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp))            | Dolly                 | yes                      | GPT                        | no                                | no                   | N/A |
-| [gptj](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp))        | GPTJ             | yes                      | GPT                        | no                                | no                   | N/A |
-| [mpt](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp))         | MPT     | yes                      | GPT                        | no                                | no                   | N/A |
-| [replit](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp))        | Replit             | yes                      | GPT                        | no                                | no                   | N/A |
-| [gptneox](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp))        | GPT NeoX, RedPajama, StableLM             | yes                      | GPT                        | no                                | no                   | N/A |
-| [bloomz](https://github.com/NouamaneTazi/bloomz.cpp) ([binding](https://github.com/go-skynet/bloomz.cpp))       | Bloom                 | yes                      | GPT                        | no                                | no                   | N/A |
-| [rwkv](https://github.com/saharNooby/rwkv.cpp) ([binding](https://github.com/donomii/go-rwkv.cpp))       | rwkv                 | yes                      | GPT                        | no                                | yes                   | N/A  |
-| [bert](https://github.com/skeskinen/bert.cpp) ([binding](https://github.com/go-skynet/go-bert.cpp)) | bert                  | no                       | Embeddings only                  | yes                               | no                   | N/A |
+| [llama.cpp]({{%relref "docs/features/text-generation#llama.cpp" %}})        | LLama, Mamba, RWKV, Falcon, Starcoder, GPT-2, [and many others](https://github.com/ggerganov/llama.cpp?tab=readme-ov-file#description) | yes                      | GPT and Functions                        | yes | yes                  | CUDA, openCL, cuBLAS, Metal |
+| [llama.cpp's ggml model (backward compatibility with old format, before GGUF)](https://github.com/ggerganov/llama.cpp) ([binding](https://github.com/go-skynet/go-llama.cpp))  | LLama, GPT-2, [and many others](https://github.com/ggerganov/llama.cpp?tab=readme-ov-file#description) | yes                      | GPT and Functions                        | yes | yes                  | CUDA, openCL, cuBLAS, Metal |
 | [whisper](https://github.com/ggerganov/whisper.cpp)         | whisper               | no                       | Audio                 | no                                | no                   | N/A |
 | [stablediffusion](https://github.com/EdVince/Stable-Diffusion-NCNN) ([binding](https://github.com/mudler/go-stable-diffusion))        | stablediffusion               | no                       | Image                 | no                                | no                   | N/A |
 | [langchain-huggingface](https://github.com/tmc/langchaingo)                                                                    | Any text generators available on HuggingFace through API | yes                      | GPT                        | no                                | no                   | N/A |
@@ -40,15 +29,19 @@ LocalAI will attempt to automatically load models which are not explicitly confi
 | `diffusers`  | SD,...                   | no                       | Image generation    | no                               | no                   | N/A |
 | `vall-e-x` | Vall-E    | no                       | Audio generation and Voice cloning    | no                               | no                   | CPU/CUDA |
 | `vllm` | Various GPTs and quantization formats | yes                      | GPT             | no | no                  | CPU/CUDA |
+| `mamba` | Mamba models architecture | yes                      | GPT             | no | no                  | CPU/CUDA |
 | `exllama2`  | GPTQ                   | yes                       | GPT only                  | no                               | no                   | N/A |
 | `transformers-musicgen`  |                    | no                       | Audio generation                | no                               | no                   | N/A |
 | [tinydream](https://github.com/symisc/tiny-dream#tiny-dreaman-embedded-header-only-stable-diffusion-inference-c-librarypixlabiotiny-dream)         | stablediffusion               | no                       | Image                 | no                                | no                   | N/A |
 | `coqui` | Coqui    | no                       | Audio generation and Voice cloning    | no                               | no                   | CPU/CUDA |
-| `transformers` | Various GPTs and quantization formats | yes                      | GPT, embeddings            | yes | yes****                  | CPU/CUDA/XPU |
+| `openvoice` | Open voice    | no                       | Audio generation and Voice cloning    | no                               | no                   | CPU/CUDA |
+| `parler-tts` | Open voice    | no                       | Audio generation and Voice cloning    | no                               | no                   | CPU/CUDA |
+| [rerankers](https://github.com/AnswerDotAI/rerankers) | Reranking API    | no                       | Reranking   | no                               | no                   | CPU/CUDA |
+| `transformers` | Various GPTs and quantization formats | yes                      | GPT, embeddings            | yes | yes*                  | CPU/CUDA/XPU |
+| [bark-cpp](https://github.com/PABannier/bark.cpp)        | bark               | no                       | Audio-Only                 | no                                | no                   | yes |
+| [stablediffusion-cpp](https://github.com/leejet/stable-diffusion.cpp)         | stablediffusion-1, stablediffusion-2, stablediffusion-3, flux, PhotoMaker               | no                       | Image                 | no                                | no                   | N/A |
+| [silero-vad](https://github.com/snakers4/silero-vad) with [Golang bindings](https://github.com/streamer45/silero-vad-go) | Silero VAD    | no                       | Voice Activity Detection    | no                               | no                   | CPU |

 Note: any backend name listed above can be used in the `backend` field of the model configuration file (See [the advanced section]({{%relref "docs/advanced" %}})).

- \* 7b ONLY
- ** doesn't seem to be accurate
- *** 7b and 40b with the `ggccv` format, for instance: https://huggingface.co/TheBloke/WizardLM-Uncensored-Falcon-40B-GGML
- **** Only for CUDA and OpenVINO CPU/XPU acceleration.
+- \* Only for CUDA and OpenVINO CPU/XPU acceleration.
--- a/docs/content/docs/reference/nvidia-l4t.md
+++ b/docs/content/docs/reference/nvidia-l4t.md
@@ -0,0 +1,35 @@
+
+++
+disableToc = false
+title = "Running on Nvidia ARM64"
+weight = 27
+++
+
+LocalAI can be run on Nvidia ARM64 devices, such as the Jetson Nano, Jetson Xavier NX, and Jetson AGX Xavier. The following instructions will guide you through building the LocalAI container for Nvidia ARM64 devices.
+
+## Prerequisites
+
+- Docker engine installed (https://docs.docker.com/engine/install/ubuntu/)
+- Nvidia container toolkit installed (https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#installing-with-ap)
+
+## Build the container
+
+Build the LocalAI container for Nvidia ARM64 devices using the following command:
+
+```bash
+git clone https://github.com/mudler/LocalAI
+
+cd LocalAI
+
+docker build --build-arg SKIP_DRIVERS=true --build-arg BUILD_TYPE=cublas --build-arg BASE_IMAGE=nvcr.io/nvidia/l4t-jetpack:r36.4.0 --build-arg IMAGE_TYPE=core -t localai-orin .
+```
+
+## Usage
+
+Run the LocalAI container on Nvidia ARM64 devices using the following command, where `/data/models` is the directory containing the models:
+
+```bash
+docker run -e DEBUG=true -p 8080:8080 -v /data/models:/build/models  -ti --restart=always --name local-ai --runtime nvidia --gpus all localai-orin
+```
+
+Note: `/data/models` is the directory containing the models. You can replace it with the directory containing your models.
--- a/docs/data/version.json
+++ b/docs/data/version.json
@@ -1,3 +1,3 @@
 {
-  "version": "v2.23.0"
+  "version": "v2.24.2"
 }
--- a/docs/themes/hugo-theme-relearn
+++ b/docs/themes/hugo-theme-relearn
--- a/Show More
+++ b/Show More