chore: ⬆️ Update ggerganov/llama.cpp to 26a8406ba9198eb6fdd8329fa717555b4f77f05f (#4358 )

chore: ⬆️ Update ggerganov/llama.cpp to `26a8406ba9198eb6fdd8329fa717555b4f77f05f` (#4353) ⬆️ Update ggerganov/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: LocalAI [bot] <139863280+localai-bot@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
Revert "feat: include tokens usage for streamed output (#4282 )"
2026-02-03 19:22:39 -05:00 · 2024-12-10 15:51:45 +01:00 · 2024-12-08 16:31:48 +01:00
129 changed files with 1268 additions and 5711 deletions
--- a/.env
+++ b/.env
@@ -82,15 +82,6 @@
 # Enable to allow p2p mode
 # LOCALAI_P2P=true

-# Enable to use federated mode
-# LOCALAI_FEDERATED=true
-
-# Enable to start federation server
-# FEDERATED_SERVER=true
-
-# Define to use federation token
-# TOKEN=""
-
 ### Watchdog settings
 ###
 # Enables watchdog to kill backends that are inactive for too much time
--- a/.github/labeler.yml
+++ b/.github/labeler.yml
@@ -5,10 +5,6 @@ dependencies:
 - any:
  - changed-files:
    - any-glob-to-any-file: 'Makefile'
-  - changed-files:
-    - any-glob-to-any-file: '*.mod'
-  - changed-files:
-    - any-glob-to-any-file: '*.sum'

 kind/documentation:
 - any:
--- a/.github/workflows/image.yml
+++ b/.github/workflows/image.yml
@@ -280,7 +280,6 @@ jobs:
      makeflags: ${{ matrix.makeflags }}
      latest-image: ${{ matrix.latest-image }}
      latest-image-aio: ${{ matrix.latest-image-aio }}
-      skip-drivers: ${{ matrix.skip-drivers }}
    secrets:
      dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
      dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
@@ -302,7 +301,6 @@ jobs:
            latest-image: 'latest-cpu'
            latest-image-aio: 'latest-aio-cpu'
            makeflags: "--jobs=4 --output-sync=target"
-            skip-drivers: 'false'
          - build-type: 'cublas'
            cuda-major-version: "11"
            cuda-minor-version: "7"
@@ -314,7 +312,6 @@ jobs:
            base-image: "ubuntu:22.04"
            runs-on: 'arc-runner-set'
            makeflags: "--jobs=4 --output-sync=target"
-            skip-drivers: 'false'
          - build-type: 'cublas'
            cuda-major-version: "12"
            cuda-minor-version: "0"
@@ -326,7 +323,6 @@ jobs:
            base-image: "ubuntu:22.04"
            runs-on: 'arc-runner-set'
            makeflags: "--jobs=4 --output-sync=target"
-            skip-drivers: 'false'
          - build-type: 'cublas'
            cuda-major-version: "11"
            cuda-minor-version: "7"
@@ -338,7 +334,6 @@ jobs:
            runs-on: 'arc-runner-set'
            base-image: "ubuntu:22.04"
            makeflags: "--jobs=4 --output-sync=target"
-            skip-drivers: 'false'
          - build-type: 'cublas'
            cuda-major-version: "12"
            cuda-minor-version: "0"
@@ -349,7 +344,6 @@ jobs:
            image-type: 'core'
            runs-on: 'arc-runner-set'
            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
            makeflags: "--jobs=4 --output-sync=target"
          - build-type: 'vulkan'
            platforms: 'linux/amd64'
@@ -360,45 +354,4 @@ jobs:
            image-type: 'core'
            runs-on: 'arc-runner-set'
            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
            makeflags: "--jobs=4 --output-sync=target"
-#  parallel-builds:
-#    uses: ./.github/workflows/image_build.yml
-#    with:
-#      tag-latest: ${{ matrix.tag-latest }}
-#      tag-suffix: ${{ matrix.tag-suffix }}
-#      ffmpeg: ${{ matrix.ffmpeg }}
-#      image-type: ${{ matrix.image-type }}
-#      build-type: ${{ matrix.build-type }}
-#      cuda-major-version: ${{ matrix.cuda-major-version }}
-#      cuda-minor-version: ${{ matrix.cuda-minor-version }}
-#      platforms: ${{ matrix.platforms }}
-#      runs-on: ${{ matrix.runs-on }}
-#      aio: ${{ matrix.aio }}
-#      base-image: ${{ matrix.base-image }}
-#      grpc-base-image: ${{ matrix.grpc-base-image }}
-#      makeflags: ${{ matrix.makeflags }}
-#      latest-image: ${{ matrix.latest-image }}
-#      latest-image-aio: ${{ matrix.latest-image-aio }}
-#      skip-drivers: ${{ matrix.skip-drivers }}
-#    secrets:
-#      dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
-#      dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
-#      quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
-#      quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
-#    strategy:
-#      matrix:
-#        include:
-#          - build-type: 'cublas'
-#            cuda-major-version: "12"
-#            cuda-minor-version: "0"
-#            platforms: 'linux/arm64'
-#            tag-latest: 'false'
-#            tag-suffix: '-nvidia-l4t-arm64-core'
-#            latest-image: 'latest-nvidia-l4t-arm64-core'
-#            ffmpeg: 'true'
-#            image-type: 'core'
-#            base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
-#            runs-on: 'self-hosted'
-#            makeflags: "--jobs=4 --output-sync=target"
-#            skip-drivers: 'true'
--- a/.github/workflows/image_build.yml
+++ b/.github/workflows/image_build.yml
@@ -49,10 +49,6 @@ on:
        description: 'FFMPEG'
        default: ''
        type: string
-      skip-drivers:
-        description: 'Skip drivers by default'
-        default: 'false'
-        type: string
      image-type:
        description: 'Image type'
        default: ''
@@ -238,7 +234,6 @@ jobs:
            GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
            GRPC_VERSION=v1.65.0
            MAKEFLAGS=${{ inputs.makeflags }}
-            SKIP_DRIVERS=${{ inputs.skip-drivers }}
          context: .
          file: ./Dockerfile
          cache-from: type=gha
@@ -267,7 +262,6 @@ jobs:
            GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
            GRPC_VERSION=v1.65.0
            MAKEFLAGS=${{ inputs.makeflags }}
-            SKIP_DRIVERS=${{ inputs.skip-drivers }}
          context: .
          file: ./Dockerfile
          cache-from: type=gha
--- a/.github/workflows/secscan.yaml
+++ b/.github/workflows/secscan.yaml
@@ -18,7 +18,7 @@ jobs:
        if: ${{ github.actor != 'dependabot[bot]' }}
      - name: Run Gosec Security Scanner
        if: ${{ github.actor != 'dependabot[bot]' }}
-        uses: securego/gosec@v2.22.0
+        uses: securego/gosec@v2.21.4
        with:
          # we let the report trigger content trigger a failure using the GitHub Security features.
          args: '-no-fail -fmt sarif -out results.sarif ./...'
--- a/9
+++ b/9
@@ -115,13 +115,12 @@ FROM requirements-${IMAGE_TYPE} AS requirements-drivers
 ARG BUILD_TYPE
 ARG CUDA_MAJOR_VERSION=12
 ARG CUDA_MINOR_VERSION=0
-ARG SKIP_DRIVERS=false

 ENV BUILD_TYPE=${BUILD_TYPE}

 # Vulkan requirements
 RUN <<EOT bash
-    if [ "${BUILD_TYPE}" = "vulkan" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
+    if [ "${BUILD_TYPE}" = "vulkan" ]; then
        apt-get update && \
        apt-get install -y  --no-install-recommends \
            software-properties-common pciutils wget gpg-agent && \
@@ -137,7 +136,7 @@ EOT

 # CuBLAS requirements
 RUN <<EOT bash
-    if [ "${BUILD_TYPE}" = "cublas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
+    if [ "${BUILD_TYPE}" = "cublas" ]; then
        apt-get update && \
        apt-get install -y  --no-install-recommends \
            software-properties-common pciutils
@@ -163,7 +162,7 @@ RUN <<EOT bash
 EOT

 # If we are building with clblas support, we need the libraries for the builds
-RUN if [ "${BUILD_TYPE}" = "clblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \
+RUN if [ "${BUILD_TYPE}" = "clblas" ]; then \
        apt-get update && \
        apt-get install -y --no-install-recommends \
            libclblast-dev && \
@@ -171,7 +170,7 @@ RUN if [ "${BUILD_TYPE}" = "clblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \
        rm -rf /var/lib/apt/lists/* \
    ; fi

-RUN if [ "${BUILD_TYPE}" = "hipblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \
+RUN if [ "${BUILD_TYPE}" = "hipblas" ]; then \
        apt-get update && \
        apt-get install -y --no-install-recommends \
            hipblas-dev \
--- a/14
+++ b/14
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=504af20ee4eae72080a56d59d744f6774f7901ce
+CPPLLAMA_VERSION?=26a8406ba9198eb6fdd8329fa717555b4f77f05f

 # whisper.cpp version
 WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
@@ -32,7 +32,7 @@ BARKCPP_VERSION?=v1.0.0

 # stablediffusion.cpp (ggml)
 STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
-STABLEDIFFUSION_GGML_VERSION?=dcf91f9e0f2cbf9da472ee2a556751ed4bab2d2a
+STABLEDIFFUSION_GGML_VERSION?=4570715727f35e5a07a76796d823824c8f42206c

 ONNX_VERSION?=1.20.0
 ONNX_ARCH?=x64
@@ -302,8 +302,14 @@ sources/stablediffusion-ggml.cpp:
 	git checkout $(STABLEDIFFUSION_GGML_VERSION) && \
 	git submodule update --init --recursive --depth 1 --single-branch

-backend/go/image/stablediffusion-ggml/libsd.a: sources/stablediffusion-ggml.cpp
-	$(MAKE) -C backend/go/image/stablediffusion-ggml build/libstable-diffusion.a
+sources/stablediffusion-ggml.cpp/build/libstable-diffusion.a: sources/stablediffusion-ggml.cpp
+	cd sources/stablediffusion-ggml.cpp && \
+	mkdir -p build && \
+	cd build && \
+	cmake $(CMAKE_ARGS) .. && \
+	cmake --build . --config Release
+
+backend/go/image/stablediffusion-ggml/libsd.a: sources/stablediffusion-ggml.cpp/build/libstable-diffusion.a
 	$(MAKE) -C backend/go/image/stablediffusion-ggml libsd.a

 backend-assets/grpc/stablediffusion-ggml: backend/go/image/stablediffusion-ggml/libsd.a backend-assets/grpc
--- a/README.md
+++ b/README.md
@@ -126,10 +126,10 @@ If you want to help and contribute, issues up for grabs: https://github.com/mudl

 ## 🚀 [Features](https://localai.io/features/)

- 📖 [Text generation with GPTs](https://localai.io/features/text-generation/) (`llama.cpp`, `transformers`, `vllm` ... [:book: and more](https://localai.io/model-compatibility/index.html#model-compatibility-table))
+- 📖 [Text generation with GPTs](https://localai.io/features/text-generation/) (`llama.cpp`, `gpt4all.cpp`, ... [:book: and more](https://localai.io/model-compatibility/index.html#model-compatibility-table))
 - 🗣 [Text to Audio](https://localai.io/features/text-to-audio/)
 - 🔈 [Audio to Text](https://localai.io/features/audio-to-text/) (Audio transcription with `whisper.cpp`)
- 🎨 [Image generation](https://localai.io/features/image-generation)
+- 🎨 [Image generation with stable diffusion](https://localai.io/features/image-generation)
 - 🔥 [OpenAI-alike tools API](https://localai.io/features/openai-functions/) 
 - 🧠 [Embeddings generation for vector databases](https://localai.io/features/embeddings/)
 - ✍️ [Constrained grammars](https://localai.io/features/constrained_grammars/)
@@ -137,7 +137,6 @@ If you want to help and contribute, issues up for grabs: https://github.com/mudl
 - 🥽 [Vision API](https://localai.io/features/gpt-vision/)
 - 📈 [Reranker API](https://localai.io/features/reranker/)
 - 🆕🖧 [P2P Inferencing](https://localai.io/features/distribute/)
- 🔊 Voice activity detection (Silero-VAD support)
 - 🌍 Integrated WebUI!

 ## 💻 Usage
@@ -160,7 +159,6 @@ Model galleries
 Other:
 - Helm chart https://github.com/go-skynet/helm-charts
 - VSCode extension https://github.com/badgooooor/localai-vscode-plugin
- Langchain: https://python.langchain.com/docs/integrations/providers/localai/
 - Terminal utility https://github.com/djcopley/ShellOracle
 - Local Smart assistant https://github.com/mudler/LocalAGI
 - Home Assistant https://github.com/sammcj/homeassistant-localai / https://github.com/drndos/hass-openai-custom-conversation / https://github.com/valentinfrlch/ha-gpt4vision
--- a/backend/backend.proto
+++ b/backend/backend.proto
@@ -159,7 +159,6 @@ message Reply {
  bytes message = 1;
  int32 tokens = 2;
  int32 prompt_tokens = 3;
-  bytes audio = 5;
 }

 message ModelOptions {
@@ -243,9 +242,6 @@ message ModelOptions {
  repeated float LoraScales = 61;

  repeated string Options = 62;
-
-  string CacheTypeKey = 63;
-  string CacheTypeValue = 64;
 }

 message Result {
--- a/backend/cpp/llama/grpc-server.cpp
+++ b/backend/cpp/llama/grpc-server.cpp
@@ -428,7 +428,6 @@ struct llama_server_context
 {
    llama_model *model = nullptr;
    llama_context *ctx = nullptr;
-    const llama_vocab * vocab = nullptr;

    clip_ctx *clp_ctx = nullptr;

@@ -440,7 +439,6 @@ struct llama_server_context
    bool clean_kv_cache     = true;
    bool all_slots_are_idle = false;
    bool add_bos_token      = true;
-    bool has_eos_token      = true;

    int32_t n_ctx;  // total context for all clients / slots

@@ -494,8 +492,8 @@ struct llama_server_context
        }

        common_init_result common_init = common_init_from_params(params);
-        model = common_init.model.release();
-        ctx = common_init.context.release();
+        model = common_init.model;
+        ctx = common_init.context;
        if (model == nullptr)
        {
            LOG_ERR("unable to load model: %s", params.model.c_str());
@@ -504,7 +502,7 @@ struct llama_server_context

        if (multimodal) {
            const int n_embd_clip = clip_n_mmproj_embd(clp_ctx);
-            const int n_embd_llm  = llama_model_n_embd(model);
+            const int n_embd_llm  = llama_n_embd(model);
            if (n_embd_clip != n_embd_llm) {
                LOG("%s: embedding dim of the multimodal projector (%d) is not equal to that of LLaMA (%d). Make sure that you use the correct mmproj file.\n", __func__, n_embd_clip, n_embd_llm);
                llama_free(ctx);
@@ -513,15 +511,23 @@ struct llama_server_context
            }
        }

-        vocab = llama_model_get_vocab(model);
        n_ctx = llama_n_ctx(ctx);

-        add_bos_token = llama_vocab_get_add_bos(vocab);
-        has_eos_token = llama_vocab_eos(vocab) != LLAMA_TOKEN_NULL;
+        add_bos_token = llama_add_bos_token(model);

        return true;
    }

+    void validate_model_chat_template(server_params & sparams) {
+        llama_chat_message chat[] = {{"user", "test"}};
+        std::vector<char> buf(1);
+        int res = llama_chat_apply_template(model, nullptr, chat, 1, true, buf.data(), buf.size());
+        if (res < 0) {
+            LOG_ERR("The chat template comes with this model is not yet supported, falling back to chatml. This may cause the model to output suboptimal responses", __func__);
+            sparams.chat_template = "<|im_start|>"; // llama_chat_apply_template only checks if <|im_start|> exist in the template
+        }
+    }
+
    llama_client_slot* get_active_slot() {
        for (llama_client_slot& slot : slots) {
            // Check if the slot is currently processing
@@ -675,6 +681,7 @@ struct llama_server_context
        slot->sparams.mirostat          = json_value(data, "mirostat",          default_sparams.mirostat);
        slot->sparams.mirostat_tau      = json_value(data, "mirostat_tau",      default_sparams.mirostat_tau);
        slot->sparams.mirostat_eta      = json_value(data, "mirostat_eta",      default_sparams.mirostat_eta);
+        slot->sparams.penalize_nl       = json_value(data, "penalize_nl",       default_sparams.penalize_nl);
        slot->params.n_keep             = json_value(data, "n_keep",            slot->params.n_keep);
        slot->sparams.seed               = json_value(data, "seed",              default_sparams.seed);
        slot->sparams.grammar           = json_value(data, "grammar",           default_sparams.grammar);
@@ -719,8 +726,8 @@ struct llama_server_context
            slot->prompt = "";
        }

-        if (json_value(data, "ignore_eos", false) && has_eos_token) {
-                slot->sparams.logit_bias.push_back({llama_vocab_eos(vocab), -INFINITY});
+        if (json_value(data, "ignore_eos", false)) {
+                slot->sparams.logit_bias.push_back({llama_token_eos(model), -INFINITY});
        }
        /*
        slot->sparams.penalty_prompt_tokens.clear();
@@ -759,13 +766,13 @@ struct llama_server_context
            }
        }
      */
+
        slot->sparams.logit_bias.clear();

        const auto &logit_bias = data.find("logit_bias");
        if (logit_bias != data.end() && logit_bias->is_array())
        {
-            const llama_vocab * vocab = llama_model_get_vocab(model);
-            const int n_vocab = llama_vocab_n_tokens(vocab);
+            const int n_vocab = llama_n_vocab(model);
            for (const auto &el : *logit_bias)
            {
                if (el.is_array() && el.size() == 2)
@@ -794,7 +801,7 @@ struct llama_server_context
                    }
                    else if (el[0].is_string())
                    {
-                        auto toks = common_tokenize(vocab, el[0].get<std::string>(), false);
+                        auto toks = common_tokenize(model, el[0].get<std::string>(), false);
                        for (auto tok : toks)
                        {
                            slot->sparams.logit_bias.push_back({tok, bias});
@@ -1124,7 +1131,7 @@ struct llama_server_context
            slot.has_next_token = false;
        }

-        if (result.tok == llama_vocab_eos(vocab) || llama_vocab_is_eog(vocab, result.tok))
+        if (result.tok == llama_token_eos(model))
        {
            slot.stopped_eos = true;
            slot.has_next_token = false;
@@ -1206,12 +1213,13 @@ struct llama_server_context
            {"mirostat",          slot.sparams.mirostat},
            {"mirostat_tau",      slot.sparams.mirostat_tau},
            {"mirostat_eta",      slot.sparams.mirostat_eta},
+            {"penalize_nl",       slot.sparams.penalize_nl},
            {"stop",              slot.params.antiprompt},
            {"n_predict",         slot.params.n_predict},
            {"n_keep",            params.n_keep},
            {"ignore_eos",        slot.sparams.ignore_eos},
            {"stream",            slot.params.stream},
-             //      {"logit_bias",        slot.sparams.logit_bias},
+      //      {"logit_bias",        slot.sparams.logit_bias},
            {"n_probs",           slot.sparams.n_probs},
            {"min_keep",          slot.sparams.min_keep},
            {"grammar",           slot.sparams.grammar},
@@ -1319,7 +1327,7 @@ struct llama_server_context
        res.error = false;
        res.stop = true;

-        const int n_embd = llama_model_n_embd(model);
+        const int n_embd = llama_n_embd(model);
        if (!params.embedding)
        {
            LOG_WARNING("embedding disabled", {
@@ -1418,7 +1426,7 @@ struct llama_server_context
                    n_eval = n_batch;
                }

-                const int n_embd = llama_model_n_embd(model);
+                const int n_embd = llama_n_embd(model);
                float * embd = img.image_embedding + i * n_embd;
                llava_embd_batch llava_batch = llava_embd_batch(embd, n_eval, slot.n_past, 0);
                if (llama_decode(ctx, llava_batch.batch))
@@ -1699,11 +1707,11 @@ struct llama_server_context
                            suffix_tokens.erase(suffix_tokens.begin());
                        }

-                        prefix_tokens.insert(prefix_tokens.begin(), llama_vocab_fim_pre(vocab));
-                        prefix_tokens.insert(prefix_tokens.begin(), llama_vocab_bos(vocab)); // always add BOS
-                        prefix_tokens.insert(prefix_tokens.end(),   llama_vocab_fim_suf(vocab));
+                        prefix_tokens.insert(prefix_tokens.begin(), llama_token_prefix(model));
+                        prefix_tokens.insert(prefix_tokens.begin(), llama_token_bos(model)); // always add BOS
+                        prefix_tokens.insert(prefix_tokens.end(),   llama_token_suffix(model));
                        prefix_tokens.insert(prefix_tokens.end(),   suffix_tokens.begin(), suffix_tokens.end());
-                        prefix_tokens.push_back(llama_vocab_fim_mid(vocab));
+                        prefix_tokens.push_back(llama_token_middle(model));
                        prompt_tokens = prefix_tokens;
                    }
                    else
@@ -2104,6 +2112,7 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
    //     slot->sparams.mirostat        = json_value(data, "mirostat",          default_sparams.mirostat);
    //     slot->sparams.mirostat_tau    = json_value(data, "mirostat_tau",      default_sparams.mirostat_tau);
    //     slot->sparams.mirostat_eta    = json_value(data, "mirostat_eta",      default_sparams.mirostat_eta);
+    //     slot->sparams.penalize_nl     = json_value(data, "penalize_nl",       default_sparams.penalize_nl);
    //     slot->params.n_keep           = json_value(data, "n_keep",            slot->params.n_keep);
    //     slot->params.seed             = json_value(data, "seed",              default_params.seed);
    //     slot->sparams.grammar         = json_value(data, "grammar",           default_sparams.grammar);
@@ -2126,6 +2135,7 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
    data["mirostat"] = predict->mirostat();
    data["mirostat_tau"] = predict->mirostattau();
    data["mirostat_eta"] = predict->mirostateta();
+    data["penalize_nl"] = predict->penalizenl();
    data["n_keep"] = predict->nkeep();
    data["seed"] = predict->seed();
    data["grammar"] = predict->grammar();
@@ -2171,6 +2181,7 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
 //     llama.params.sparams.mirostat = predict->mirostat();
 //     llama.params.sparams.mirostat_tau = predict->mirostattau();
 //     llama.params.sparams.mirostat_eta = predict->mirostateta();
+//     llama.params.sparams.penalize_nl = predict->penalizenl();
 //     llama.params.n_keep = predict->nkeep();
 //     llama.params.seed = predict->seed();
 //     llama.params.sparams.grammar = predict->grammar();
@@ -2217,35 +2228,6 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
 //     }
 // }

-const std::vector<ggml_type> kv_cache_types = {
-    GGML_TYPE_F32,
-    GGML_TYPE_F16,
-    GGML_TYPE_BF16,
-    GGML_TYPE_Q8_0,
-    GGML_TYPE_Q4_0,
-    GGML_TYPE_Q4_1,
-    GGML_TYPE_IQ4_NL,
-    GGML_TYPE_Q5_0,
-    GGML_TYPE_Q5_1,
-};
-
-static ggml_type kv_cache_type_from_str(const std::string & s) {
-    for (const auto & type : kv_cache_types) {
-        if (ggml_type_name(type) == s) {
-            return type;
-        }
-    }
-    throw std::runtime_error("Unsupported cache type: " + s);
-}
-
-static std::string get_all_kv_cache_types() {
-    std::ostringstream msg;
-    for (const auto & type : kv_cache_types) {
-        msg << ggml_type_name(type) << (&type == &kv_cache_types.back() ? "" : ", ");
-    }
-    return msg.str();
-}
-
 static void params_parse(const backend::ModelOptions* request,
                                common_params & params) {
   
@@ -2259,12 +2241,6 @@ static void params_parse(const backend::ModelOptions* request,
    }
    //  params.model_alias ??
    params.model_alias =  request->modelfile();
-    if (!request->cachetypekey().empty()) {
-        params.cache_type_k = kv_cache_type_from_str(request->cachetypekey());
-    }
-    if (!request->cachetypevalue().empty()) {
-        params.cache_type_v = kv_cache_type_from_str(request->cachetypevalue());
-    }
    params.n_ctx = request->contextsize();
    //params.memory_f16 = request->f16memory();
    params.cpuparams.n_threads = request->threads();
--- a/backend/cpp/llama/patches/01-llava.patch
+++ b/backend/cpp/llama/patches/01-llava.patch
@@ -1,13 +1,13 @@
 diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp
-index 3cd0d2fa..6c5e811a 100644
+index 342042ff..224db9b5 100644
 --- a/examples/llava/clip.cpp
 +++ b/examples/llava/clip.cpp
-@@ -2608,7 +2608,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
-                 struct ggml_tensor * patches = ggml_graph_get_tensor(gf, "patches");
-                 int* patches_data = (int*)malloc(ggml_nbytes(patches));
-                 for (int i = 0; i < num_patches; i++) {
-                    patches_data[i] = i + 1;
-+                    patches_data[i] = i;
-                 }
-                 ggml_backend_tensor_set(patches, patches_data, 0, ggml_nbytes(patches));
-                 free(patches_data);
+@@ -2419,7 +2419,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
+             struct ggml_tensor * patches = ggml_graph_get_tensor(gf, "patches");
+             int* patches_data = (int*)malloc(ggml_nbytes(patches));
+             for (int i = 0; i < num_patches; i++) {
+-                patches_data[i] = i + 1;
+                patches_data[i] = i;
+             }
+             ggml_backend_tensor_set(patches, patches_data, 0, ggml_nbytes(patches));
+             free(patches_data);
--- a/backend/go/image/stablediffusion-ggml/Makefile
+++ b/backend/go/image/stablediffusion-ggml/Makefile
@@ -2,95 +2,20 @@ INCLUDE_PATH := $(abspath ./)
 LIBRARY_PATH := $(abspath ./)

 AR?=ar
-CMAKE_ARGS?=
+
 BUILD_TYPE?=
-ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh
 # keep standard at C11 and C++11
 CXXFLAGS = -I. -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp/thirdparty -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp/ggml/include -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp -O3 -DNDEBUG -std=c++17 -fPIC

-# Disable Shared libs as we are linking on static gRPC and we can't mix shared and static
-CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
-
-# If build type is cublas, then we set -DGGML_CUDA=ON to CMAKE_ARGS automatically
-ifeq ($(BUILD_TYPE),cublas)
-	CMAKE_ARGS+=-DGGML_CUDA=ON
-# If build type is openblas then we set -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
-# to CMAKE_ARGS automatically
-else ifeq ($(BUILD_TYPE),openblas)
-	CMAKE_ARGS+=-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
-# If build type is clblas (openCL) we set -DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
-else ifeq ($(BUILD_TYPE),clblas)
-	CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
-# If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++ 
-else ifeq ($(BUILD_TYPE),hipblas)
-	CMAKE_ARGS+=-DGGML_HIP=ON
-# If it's OSX, DO NOT embed the metal library - -DGGML_METAL_EMBED_LIBRARY=ON requires further investigation
-# But if it's OSX without metal, disable it here
-else ifeq ($(OS),Darwin)
-	ifneq ($(BUILD_TYPE),metal)
-		CMAKE_ARGS+=-DGGML_METAL=OFF
-	else
-		CMAKE_ARGS+=-DGGML_METAL=ON
-		CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON
-		TARGET+=--target ggml-metal
-	endif
-endif
-
-# ifeq ($(BUILD_TYPE),sycl_f16)
-# 	CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON -DSD_SYCL=ON -DGGML_SYCL_F16=ON
-# endif
-
-# ifeq ($(BUILD_TYPE),sycl_f32)
-# 	CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DSD_SYCL=ON
-# endif
-
 # warnings
 CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function

-# Find all .a archives in ARCHIVE_DIR
-# (ggml can have different backends cpu, cuda, etc., each backend generates a .a archive)
-GGML_ARCHIVE_DIR := build/ggml/src/
-ALL_ARCHIVES := $(shell find $(GGML_ARCHIVE_DIR) -type f -name '*.a')
-
-# Name of the single merged library
-COMBINED_LIB := libggmlall.a
-
-# Rule to merge all the .a files into one
-$(COMBINED_LIB): $(ALL_ARCHIVES)
-	@echo "Merging all .a into $(COMBINED_LIB)"
-	rm -f $@
-	mkdir -p merge-tmp
-	for a in $(ALL_ARCHIVES); do \
-		( cd merge-tmp && ar x ../$$a ); \
-	done
-	( cd merge-tmp && ar rcs ../$@ *.o )
-	# Ensure we have a proper index
-	ranlib $@
-	# Clean up
-	rm -rf merge-tmp
-
-build/libstable-diffusion.a:
-	@echo "Building SD with $(BUILD_TYPE) build type and $(CMAKE_ARGS)"
-ifneq (,$(findstring sycl,$(BUILD_TYPE)))
-	+bash -c "source $(ONEAPI_VARS); \
-	mkdir -p build && \
-	cd build && \
-	cmake $(CMAKE_ARGS) ../../../../../sources/stablediffusion-ggml.cpp && \
-	cmake --build . --config Release"
-else
-	mkdir -p build && \
-	cd build && \
-	cmake $(CMAKE_ARGS) ../../../../../sources/stablediffusion-ggml.cpp && \
-	cmake --build . --config Release
-endif
-	$(MAKE) $(COMBINED_LIB)
-
 gosd.o:
 	$(CXX) $(CXXFLAGS) gosd.cpp -o gosd.o -c

 libsd.a: gosd.o
-	cp $(INCLUDE_PATH)/build/libstable-diffusion.a ./libsd.a
+	cp $(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp/build/libstable-diffusion.a ./libsd.a
 	$(AR) rcs libsd.a gosd.o

 clean:
-	rm -rf gosd.o libsd.a build $(COMBINED_LIB)
+	rm -f gosd.o libsd.a
--- a/backend/go/image/stablediffusion-ggml/gosd.go
+++ b/backend/go/image/stablediffusion-ggml/gosd.go
@@ -1,7 +1,7 @@
 package main

 // #cgo CXXFLAGS: -I${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp/thirdparty -I${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp -I${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp/ggml/include
-// #cgo LDFLAGS: -L${SRCDIR}/ -lsd -lstdc++ -lm -lggmlall -lgomp
+// #cgo LDFLAGS: -L${SRCDIR}/ -L${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp/build/ggml/src/ggml-cpu -L${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp/build/ggml/src -lsd -lstdc++ -lm -lggml -lggml-base -lggml-cpu -lgomp
 // #include <gosd.h>
 // #include <stdlib.h>
 import "C"
--- a/backend/go/vad/silero/vad.go
+++ b/backend/go/vad/silero/vad.go
@@ -21,8 +21,8 @@ func (vad *VAD) Load(opts *pb.ModelOptions) error {
 		SampleRate: 16000,
 		//WindowSize:           1024,
 		Threshold:            0.5,
-		MinSilenceDurationMs: 100,
-		SpeechPadMs:          30,
+		MinSilenceDurationMs: 0,
+		SpeechPadMs:          0,
 	})
 	if err != nil {
 		return fmt.Errorf("create silero detector: %w", err)
--- a/backend/python/autogptq/requirements-intel.txt
+++ b/backend/python/autogptq/requirements-intel.txt
@@ -1,6 +1,5 @@
 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
-intel-extension-for-pytorch==2.3.110+xpu
-torch==2.3.1+cxx11.abi
-oneccl_bind_pt==2.3.100+xpu
+intel-extension-for-pytorch
+torch
 optimum[openvino]
-setuptools
+setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
--- a/backend/python/autogptq/requirements.txt
+++ b/backend/python/autogptq/requirements.txt
@@ -1,6 +1,6 @@
 accelerate
 auto-gptq==0.7.1
-grpcio==1.69.0
+grpcio==1.68.1
 protobuf
 certifi
 transformers
--- a/backend/python/bark/requirements-intel.txt
+++ b/backend/python/bark/requirements-intel.txt
@@ -1,9 +1,8 @@
 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
-intel-extension-for-pytorch==2.3.110+xpu
-torch==2.3.1+cxx11.abi
-torchaudio==2.3.1+cxx11.abi
-oneccl_bind_pt==2.3.100+xpu
+intel-extension-for-pytorch
+torch
+torchaudio
 optimum[openvino]
-setuptools
+setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
 transformers
 accelerate
--- a/backend/python/bark/requirements.txt
+++ b/backend/python/bark/requirements.txt
@@ -1,4 +1,4 @@
 bark==0.1.5
-grpcio==1.69.0
+grpcio==1.68.1
 protobuf
 certifi
--- a/backend/python/common/libbackend.sh
+++ b/backend/python/common/libbackend.sh
@@ -17,9 +17,6 @@
 # LIMIT_TARGETS="cublas12"
 # source $(dirname $0)/../common/libbackend.sh
 #
-
-PYTHON_VERSION="3.10"
-
 function init() {
    # Name of the backend (directory name)
    BACKEND_NAME=${PWD##*/}
@@ -91,7 +88,7 @@ function getBuildProfile() {
 # always result in an activated virtual environment
 function ensureVenv() {
    if [ ! -d "${EDIR}/venv" ]; then
-        uv venv --python ${PYTHON_VERSION} ${EDIR}/venv
+        uv venv ${EDIR}/venv
        echo "virtualenv created"
    fi

--- a/backend/python/common/template/requirements-intel.txt
+++ b/backend/python/common/template/requirements-intel.txt
@@ -1,5 +1,4 @@
 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
-intel-extension-for-pytorch==2.3.110+xpu
-torch==2.3.1+cxx11.abi
-oneccl_bind_pt==2.3.100+xpu
+intel-extension-for-pytorch
+torch
 optimum[openvino]
--- a/backend/python/common/template/requirements.txt
+++ b/backend/python/common/template/requirements.txt
@@ -1,3 +1,3 @@
-grpcio==1.69.0
+grpcio==1.68.1
 protobuf
 grpcio-tools
--- a/backend/python/coqui/requirements-intel.txt
+++ b/backend/python/coqui/requirements-intel.txt
@@ -1,10 +1,9 @@
 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
-intel-extension-for-pytorch==2.3.110+xpu
-torch==2.3.1+cxx11.abi
-torchaudio==2.3.1+cxx11.abi
-oneccl_bind_pt==2.3.100+xpu
+intel-extension-for-pytorch
+torch
+torchaudio
 optimum[openvino]
-setuptools
+setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
 transformers
 accelerate
 coqui-tts
--- a/backend/python/coqui/requirements.txt
+++ b/backend/python/coqui/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.69.0
+grpcio==1.68.1
 protobuf
 certifi
 packaging==24.1
--- a/backend/python/diffusers/backend.py
+++ b/backend/python/diffusers/backend.py
@@ -17,7 +17,7 @@ import backend_pb2_grpc

 import grpc

-from diffusers import SanaPipeline, StableDiffusion3Pipeline, StableDiffusionXLPipeline, StableDiffusionDepth2ImgPipeline, DPMSolverMultistepScheduler, StableDiffusionPipeline, DiffusionPipeline, \
+from diffusers import StableDiffusion3Pipeline, StableDiffusionXLPipeline, StableDiffusionDepth2ImgPipeline, DPMSolverMultistepScheduler, StableDiffusionPipeline, DiffusionPipeline, \
    EulerAncestralDiscreteScheduler, FluxPipeline, FluxTransformer2DModel
 from diffusers import StableDiffusionImg2ImgPipeline, AutoPipelineForText2Image, ControlNetModel, StableVideoDiffusionPipeline
 from diffusers.pipelines.stable_diffusion import safety_checker
@@ -275,13 +275,6 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):

                    if request.LowVRAM:
                        self.pipe.enable_model_cpu_offload()
-            elif request.PipelineType == "SanaPipeline":
-                self.pipe = SanaPipeline.from_pretrained(
-                    request.Model,
-                    variant="bf16",
-                    torch_dtype=torch.bfloat16)
-                self.pipe.vae.to(torch.bfloat16)
-                self.pipe.text_encoder.to(torch.bfloat16)

            if CLIPSKIP and request.CLIPSkip != 0:
                self.clip_skip = request.CLIPSkip
--- a/backend/python/diffusers/requirements-intel.txt
+++ b/backend/python/diffusers/requirements-intel.txt
@@ -1,10 +1,9 @@
 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
-intel-extension-for-pytorch==2.3.110+xpu
-torch==2.3.1+cxx11.abi
-torchvision==0.18.1+cxx11.abi
-oneccl_bind_pt==2.3.100+xpu
+intel-extension-for-pytorch
+torch
+torchvision
 optimum[openvino]
-setuptools
+setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
 diffusers
 opencv-python
 transformers
--- a/backend/python/diffusers/requirements.txt
+++ b/backend/python/diffusers/requirements.txt
@@ -1,5 +1,5 @@
 setuptools
-grpcio==1.69.0
+grpcio==1.68.1
 pillow
 protobuf
 certifi
--- a/backend/python/exllama2/requirements.txt
+++ b/backend/python/exllama2/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.69.0
+grpcio==1.68.1
 protobuf
 certifi
 wheel
--- a/backend/python/mamba/requirements.txt
+++ b/backend/python/mamba/requirements.txt
@@ -1,3 +1,3 @@
-grpcio==1.69.0
+grpcio==1.68.1
 protobuf
 certifi
--- a/backend/python/openvoice/requirements-cpu.txt
+++ b/backend/python/openvoice/requirements-cpu.txt
@@ -1,7 +1,3 @@
 torch==2.4.1
 git+https://github.com/myshell-ai/MeloTTS.git
-git+https://github.com/myshell-ai/OpenVoice.git
-whisper-timestamped
-pydub==0.25.1
-wavmark==0.0.3
-eng_to_ipa==0.0.2
+git+https://github.com/myshell-ai/OpenVoice.git
--- a/backend/python/openvoice/requirements-cublas11.txt
+++ b/backend/python/openvoice/requirements-cublas11.txt
@@ -1,8 +1,4 @@
 --extra-index-url https://download.pytorch.org/whl/cu118
 torch==2.4.1+cu118
 git+https://github.com/myshell-ai/MeloTTS.git
-git+https://github.com/myshell-ai/OpenVoice.git
-whisper-timestamped
-pydub==0.25.1
-wavmark==0.0.3
-eng_to_ipa==0.0.2
+git+https://github.com/myshell-ai/OpenVoice.git
--- a/backend/python/openvoice/requirements-cublas12.txt
+++ b/backend/python/openvoice/requirements-cublas12.txt
@@ -1,7 +1,3 @@
 torch==2.4.1
 git+https://github.com/myshell-ai/MeloTTS.git
-git+https://github.com/myshell-ai/OpenVoice.git
-whisper-timestamped
-pydub==0.25.1
-wavmark==0.0.3
-eng_to_ipa==0.0.2
+git+https://github.com/myshell-ai/OpenVoice.git
--- a/backend/python/openvoice/requirements-hipblas.txt
+++ b/backend/python/openvoice/requirements-hipblas.txt
@@ -1,8 +1,4 @@
 --extra-index-url https://download.pytorch.org/whl/rocm6.0
 torch==2.4.1+rocm6.0
 git+https://github.com/myshell-ai/MeloTTS.git
-git+https://github.com/myshell-ai/OpenVoice.git
-whisper-timestamped
-pydub==0.25.1
-wavmark==0.0.3
-eng_to_ipa==0.0.2
+git+https://github.com/myshell-ai/OpenVoice.git
--- a/backend/python/openvoice/requirements-intel.txt
+++ b/backend/python/openvoice/requirements-intel.txt
@@ -1,15 +1,14 @@
 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
-intel-extension-for-pytorch==2.3.110+xpu
-torch==2.3.1+cxx11.abi
-torchaudio==2.3.1+cxx11.abi
-oneccl_bind_pt==2.3.100+xpu
+intel-extension-for-pytorch
+torch
 optimum[openvino]
-grpcio==1.69.0
+grpcio==1.68.1
 protobuf
 librosa==0.9.1
 faster-whisper==0.9.0
 pydub==0.25.1
 wavmark==0.0.3
+numpy==1.22.0
 eng_to_ipa==0.0.2
 inflect==7.0.0
 unidecode==1.3.7
--- a/backend/python/openvoice/requirements.txt
+++ b/backend/python/openvoice/requirements.txt
@@ -1,17 +1,20 @@
-grpcio==1.69.0
+grpcio==1.68.1
 protobuf
 librosa
 faster-whisper
+pydub==0.25.1
+wavmark==0.0.3
+numpy==1.22.0
+eng_to_ipa==0.0.2
 inflect
 unidecode
+whisper-timestamped
 openai
 python-dotenv
 pypinyin
 cn2an==0.5.22
-numpy==1.22.0
 networkx==2.8.8
 jieba==0.42.1
-gradio==5.9.1
+gradio==3.48.0
 langid==1.1.6
 llvmlite==0.43.0
-setuptools
--- a/backend/python/parler-tts/requirements-intel.txt
+++ b/backend/python/parler-tts/requirements-intel.txt
@@ -1,8 +1,8 @@
 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
-intel-extension-for-pytorch==2.3.110+xpu
-torch==2.3.1+cxx11.abi
-torchaudio==2.3.1+cxx11.abi
-oneccl_bind_pt==2.3.100+xpu
+intel-extension-for-pytorch
+torch
+torchaudio
 optimum[openvino]
+setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
 transformers
 accelerate
--- a/backend/python/parler-tts/requirements.txt
+++ b/backend/python/parler-tts/requirements.txt
@@ -1,4 +1,3 @@
-grpcio==1.69.0
+grpcio==1.68.1
 certifi
 llvmlite==0.43.0
-setuptools
--- a/backend/python/rerankers/requirements-intel.txt
+++ b/backend/python/rerankers/requirements-intel.txt
@@ -1,9 +1,8 @@
 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
-intel-extension-for-pytorch==2.3.110+xpu
+intel-extension-for-pytorch
 transformers
 accelerate
-torch==2.3.1+cxx11.abi
-oneccl_bind_pt==2.3.100+xpu
+torch
 rerankers[transformers]
 optimum[openvino]
-setuptools
+setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
--- a/backend/python/rerankers/requirements.txt
+++ b/backend/python/rerankers/requirements.txt
@@ -1,3 +1,3 @@
-grpcio==1.69.0
+grpcio==1.68.1
 protobuf
 certifi
--- a/backend/python/sentencetransformers/requirements-intel.txt
+++ b/backend/python/sentencetransformers/requirements-intel.txt
@@ -1,9 +1,8 @@
 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
-intel-extension-for-pytorch==2.3.110+xpu
-torch==2.3.1+cxx11.abi
-oneccl_bind_pt==2.3.100+xpu
+intel-extension-for-pytorch
+torch
 optimum[openvino]
-setuptools
+setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
 accelerate
 sentence-transformers==3.3.1
 transformers
--- a/backend/python/sentencetransformers/requirements.txt
+++ b/backend/python/sentencetransformers/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.69.0
+grpcio==1.68.1
 protobuf
 certifi
 datasets
--- a/backend/python/transformers-musicgen/requirements-intel.txt
+++ b/backend/python/transformers-musicgen/requirements-intel.txt
@@ -1,8 +1,7 @@
 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
-intel-extension-for-pytorch==2.3.110+xpu
+intel-extension-for-pytorch
 transformers
-oneccl_bind_pt==2.3.100+xpu
 accelerate
-torch==2.3.1+cxx11.abi
+torch
 optimum[openvino]
-setuptools
+setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
--- a/backend/python/transformers-musicgen/requirements.txt
+++ b/backend/python/transformers-musicgen/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.69.0
+grpcio==1.68.1
 protobuf
 scipy==1.14.0
 certifi
--- a/backend/python/transformers/requirements-intel.txt
+++ b/backend/python/transformers/requirements-intel.txt
@@ -1,7 +1,6 @@
 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
-intel-extension-for-pytorch==2.3.110+xpu
-torch==2.3.1+cxx11.abi
-oneccl_bind_pt==2.3.100+xpu
+intel-extension-for-pytorch
+torch
 optimum[openvino]
 intel-extension-for-transformers
 bitsandbytes
--- a/backend/python/transformers/requirements.txt
+++ b/backend/python/transformers/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.69.0
+grpcio==1.68.1
 protobuf
 certifi
-setuptools
+setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
--- a/backend/python/vall-e-x/requirements-intel.txt
+++ b/backend/python/vall-e-x/requirements-intel.txt
@@ -1,7 +1,7 @@
 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
-intel-extension-for-pytorch==2.3.110+xpu
+intel-extension-for-pytorch
 accelerate
-torch==2.3.1+cxx11.abi
-torchaudio==2.3.1+cxx11.abi
+torch
+torchaudio
 optimum[openvino]
-oneccl_bind_pt==2.3.100+xpu
+setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
--- a/backend/python/vall-e-x/requirements.txt
+++ b/backend/python/vall-e-x/requirements.txt
@@ -1,4 +1,3 @@
-grpcio==1.69.0
+grpcio==1.68.1
 protobuf
-certifi
-setuptools
+certifi
--- a/backend/python/vllm/requirements-intel.txt
+++ b/backend/python/vllm/requirements-intel.txt
@@ -1,9 +1,8 @@
 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
-intel-extension-for-pytorch==2.3.110+xpu
+intel-extension-for-pytorch
 accelerate
-torch==2.3.1+cxx11.abi
+torch
 transformers
 optimum[openvino]
-setuptools
-bitsandbytes
-oneccl_bind_pt==2.3.100+xpu
+setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
+bitsandbytes
--- a/backend/python/vllm/requirements.txt
+++ b/backend/python/vllm/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.69.0
+grpcio==1.68.1
 protobuf
 certifi
 setuptools
--- a/core/application.go
+++ b/core/application.go
@@ -0,0 +1,38 @@
+package core
+
+import (
+	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/services"
+	"github.com/mudler/LocalAI/pkg/model"
+)
+
+// The purpose of this structure is to hold pointers to all initialized services, to make plumbing easy
+// Perhaps a proper DI system is worth it in the future, but for now keep things simple.
+type Application struct {
+
+	// Application-Level Config
+	ApplicationConfig *config.ApplicationConfig
+	// ApplicationState *ApplicationState
+
+	// Core Low-Level Services
+	BackendConfigLoader *config.BackendConfigLoader
+	ModelLoader         *model.ModelLoader
+
+	// Backend Services
+	// EmbeddingsBackendService      *backend.EmbeddingsBackendService
+	// ImageGenerationBackendService *backend.ImageGenerationBackendService
+	// LLMBackendService             *backend.LLMBackendService
+	// TranscriptionBackendService *backend.TranscriptionBackendService
+	// TextToSpeechBackendService  *backend.TextToSpeechBackendService
+
+	// LocalAI System Services
+	BackendMonitorService *services.BackendMonitorService
+	GalleryService        *services.GalleryService
+	LocalAIMetricsService *services.LocalAIMetricsService
+	// OpenAIService         *services.OpenAIService
+}
+
+// TODO [NEXT PR?]: Break up ApplicationConfig.
+// Migrate over stuff that is not set via config at all - especially runtime stuff
+type ApplicationState struct {
+}
--- a/core/application/application.go
+++ b/core/application/application.go
@@ -1,39 +0,0 @@
-package application
-
-import (
-	"github.com/mudler/LocalAI/core/config"
-	"github.com/mudler/LocalAI/pkg/model"
-	"github.com/mudler/LocalAI/pkg/templates"
-)
-
-type Application struct {
-	backendLoader      *config.BackendConfigLoader
-	modelLoader        *model.ModelLoader
-	applicationConfig  *config.ApplicationConfig
-	templatesEvaluator *templates.Evaluator
-}
-
-func newApplication(appConfig *config.ApplicationConfig) *Application {
-	return &Application{
-		backendLoader:      config.NewBackendConfigLoader(appConfig.ModelPath),
-		modelLoader:        model.NewModelLoader(appConfig.ModelPath),
-		applicationConfig:  appConfig,
-		templatesEvaluator: templates.NewEvaluator(appConfig.ModelPath),
-	}
-}
-
-func (a *Application) BackendLoader() *config.BackendConfigLoader {
-	return a.backendLoader
-}
-
-func (a *Application) ModelLoader() *model.ModelLoader {
-	return a.modelLoader
-}
-
-func (a *Application) ApplicationConfig() *config.ApplicationConfig {
-	return a.applicationConfig
-}
-
-func (a *Application) TemplatesEvaluator() *templates.Evaluator {
-	return a.templatesEvaluator
-}
--- a/core/backend/llm.go
+++ b/core/backend/llm.go
@@ -22,9 +22,8 @@ import (
 )

 type LLMResponse struct {
-	Response    string // should this be []byte?
-	Usage       TokenUsage
-	AudioOutput string
+	Response string // should this be []byte?
+	Usage    TokenUsage
 }

 type TokenUsage struct {
@@ -118,12 +117,8 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im
 			ss := ""

 			var partialRune []byte
-			err := inferenceModel.PredictStream(ctx, opts, func(reply *proto.Reply) {
-				msg := reply.Message
-				partialRune = append(partialRune, msg...)
-
-				tokenUsage.Prompt = int(reply.PromptTokens)
-				tokenUsage.Completion = int(reply.Tokens)
+			err := inferenceModel.PredictStream(ctx, opts, func(chars []byte) {
+				partialRune = append(partialRune, chars...)

 				for len(partialRune) > 0 {
 					r, size := utf8.DecodeRune(partialRune)
@@ -137,10 +132,6 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im

 					partialRune = partialRune[size:]
 				}
-
-				if len(msg) == 0 {
-					tokenCallback("", tokenUsage)
-				}
 			})
 			return LLMResponse{
 				Response: ss,
--- a/core/backend/options.go
+++ b/core/backend/options.go
@@ -151,8 +151,6 @@ func grpcModelOpts(c config.BackendConfig) *pb.ModelOptions {
 		TensorParallelSize:   int32(c.TensorParallelSize),
 		MMProj:               c.MMProj,
 		FlashAttention:       c.FlashAttention,
-		CacheTypeKey:         c.CacheTypeK,
-		CacheTypeValue:       c.CacheTypeV,
 		NoKVOffload:          c.NoKVOffloading,
 		YarnExtFactor:        c.YarnExtFactor,
 		YarnAttnFactor:       c.YarnAttnFactor,
--- a/core/cli/run.go
+++ b/core/cli/run.go
@@ -6,12 +6,12 @@ import (
 	"strings"
 	"time"

-	"github.com/mudler/LocalAI/core/application"
 	cli_api "github.com/mudler/LocalAI/core/cli/api"
 	cliContext "github.com/mudler/LocalAI/core/cli/context"
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/http"
 	"github.com/mudler/LocalAI/core/p2p"
+	"github.com/mudler/LocalAI/core/startup"
 	"github.com/rs/zerolog"
 	"github.com/rs/zerolog/log"
 )
@@ -186,16 +186,16 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
 	}

 	if r.PreloadBackendOnly {
-		_, err := application.New(opts...)
+		_, _, _, err := startup.Startup(opts...)
 		return err
 	}

-	app, err := application.New(opts...)
+	cl, ml, options, err := startup.Startup(opts...)
 	if err != nil {
 		return fmt.Errorf("failed basic startup tasks with error %s", err.Error())
 	}

-	appHTTP, err := http.API(app)
+	appHTTP, err := http.App(cl, ml, options)
 	if err != nil {
 		log.Error().Err(err).Msg("error during HTTP App construction")
 		return err
--- a/core/config/backend_config.go
+++ b/core/config/backend_config.go
@@ -38,7 +38,6 @@ type BackendConfig struct {
 	TemplateConfig      TemplateConfig         `yaml:"template"`
 	KnownUsecaseStrings []string               `yaml:"known_usecases"`
 	KnownUsecases       *BackendConfigUsecases `yaml:"-"`
-	Pipeline            Pipeline               `yaml:"pipeline"`

 	PromptStrings, InputStrings                []string               `yaml:"-"`
 	InputToken                                 [][]int                `yaml:"-"`
@@ -77,18 +76,6 @@ type BackendConfig struct {
 	Options []string `yaml:"options"`
 }

-// Pipeline defines other models to use for audio-to-audio
-type Pipeline struct {
-	TTS           string `yaml:"tts"`
-	LLM           string `yaml:"llm"`
-	Transcription string `yaml:"transcription"`
-	VAD           string `yaml:"vad"`
-}
-
-func (p Pipeline) IsNotConfigured() bool {
-	return p.LLM == "" || p.TTS == "" || p.Transcription == ""
-}
-
 type File struct {
 	Filename string         `yaml:"filename" json:"filename"`
 	SHA256   string         `yaml:"sha256" json:"sha256"`
@@ -168,10 +155,8 @@ type LLMConfig struct {
 	TensorParallelSize   int       `yaml:"tensor_parallel_size"`   // vLLM
 	MMProj               string    `yaml:"mmproj"`

-	FlashAttention bool   `yaml:"flash_attention"`
-	NoKVOffloading bool   `yaml:"no_kv_offloading"`
-	CacheTypeK     string `yaml:"cache_type_k"`
-	CacheTypeV     string `yaml:"cache_type_v"`
+	FlashAttention bool `yaml:"flash_attention"`
+	NoKVOffloading bool `yaml:"no_kv_offloading"`

 	RopeScaling string `yaml:"rope_scaling"`
 	ModelType   string `yaml:"type"`
@@ -219,8 +204,6 @@ type TemplateConfig struct {
 	JoinChatMessagesByCharacter *string `yaml:"join_chat_messages_by_character"`

 	Multimodal string `yaml:"multimodal"`
-
-	JinjaTemplate bool `yaml:"jinja_template"`
 }

 func (c *BackendConfig) UnmarshalYAML(value *yaml.Node) error {
--- a/core/config/guesser.go
+++ b/core/config/guesser.go
@@ -26,14 +26,14 @@ const (
 type settingsConfig struct {
 	StopWords      []string
 	TemplateConfig TemplateConfig
-	RepeatPenalty  float64
+	RepeatPenalty float64
 }

 // default settings to adopt with a given model family
 var defaultsSettings map[familyType]settingsConfig = map[familyType]settingsConfig{
 	Gemma: {
 		RepeatPenalty: 1.0,
-		StopWords:     []string{"<|im_end|>", "<end_of_turn>", "<start_of_turn>"},
+		StopWords: []string{"<|im_end|>", "<end_of_turn>", "<start_of_turn>"},
 		TemplateConfig: TemplateConfig{
 			Chat:        "{{.Input }}\n<start_of_turn>model\n",
 			ChatMessage: "<start_of_turn>{{if eq .RoleName \"assistant\" }}model{{else}}{{ .RoleName }}{{end}}\n{{ if .Content -}}\n{{.Content -}}\n{{ end -}}<end_of_turn>",
@@ -200,18 +200,6 @@ func guessDefaultsFromFile(cfg *BackendConfig, modelPath string) {
 	} else {
 		log.Debug().Any("family", family).Msgf("guessDefaultsFromFile: no template found for family")
 	}
-
-	if cfg.HasTemplate() {
-		return
-	}
-
-	// identify from well known templates first, otherwise use the raw jinja template
-	chatTemplate, found := f.Header.MetadataKV.Get("tokenizer.chat_template")
-	if found {
-		// try to use the jinja template
-		cfg.TemplateConfig.JinjaTemplate = true
-		cfg.TemplateConfig.ChatMessage = chatTemplate.ValueString()
-	}
 }

 func identifyFamily(f *gguf.GGUFFile) familyType {
--- a/core/http/app.go
+++ b/core/http/app.go
@@ -7,7 +7,6 @@ import (
 	"net/http"

 	"github.com/dave-gray101/v2keyauth"
-	"github.com/gofiber/websocket/v2"
 	"github.com/mudler/LocalAI/pkg/utils"

 	"github.com/mudler/LocalAI/core/http/endpoints/localai"
@@ -15,9 +14,10 @@ import (
 	"github.com/mudler/LocalAI/core/http/middleware"
 	"github.com/mudler/LocalAI/core/http/routes"

-	"github.com/mudler/LocalAI/core/application"
+	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/schema"
 	"github.com/mudler/LocalAI/core/services"
+	"github.com/mudler/LocalAI/pkg/model"

 	"github.com/gofiber/contrib/fiberzerolog"
 	"github.com/gofiber/fiber/v2"
@@ -49,18 +49,18 @@ var embedDirStatic embed.FS
 // @in header
 // @name Authorization

-func API(application *application.Application) (*fiber.App, error) {
+func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) (*fiber.App, error) {

 	fiberCfg := fiber.Config{
 		Views:     renderEngine(),
-		BodyLimit: application.ApplicationConfig().UploadLimitMB * 1024 * 1024, // this is the default limit of 4MB
+		BodyLimit: appConfig.UploadLimitMB * 1024 * 1024, // this is the default limit of 4MB
 		// We disable the Fiber startup message as it does not conform to structured logging.
 		// We register a startup log line with connection information in the OnListen hook to keep things user friendly though
 		DisableStartupMessage: true,
 		// Override default error handler
 	}

-	if !application.ApplicationConfig().OpaqueErrors {
+	if !appConfig.OpaqueErrors {
 		// Normally, return errors as JSON responses
 		fiberCfg.ErrorHandler = func(ctx *fiber.Ctx, err error) error {
 			// Status code defaults to 500
@@ -86,20 +86,9 @@ func API(application *application.Application) (*fiber.App, error) {
 		}
 	}

-	router := fiber.New(fiberCfg)
+	app := fiber.New(fiberCfg)

-	router.Use(middleware.StripPathPrefix())
-
-	router.Use("/v1/realtime", func(c *fiber.Ctx) error {
-		if websocket.IsWebSocketUpgrade(c) {
-			// Returns true if the client requested upgrade to the WebSocket protocol
-			return c.Next()
-		}
-
-		return nil
-	})
-
-	router.Hooks().OnListen(func(listenData fiber.ListenData) error {
+	app.Hooks().OnListen(func(listenData fiber.ListenData) error {
 		scheme := "http"
 		if listenData.TLS {
 			scheme = "https"
@@ -110,82 +99,82 @@ func API(application *application.Application) (*fiber.App, error) {

 	// Have Fiber use zerolog like the rest of the application rather than it's built-in logger
 	logger := log.Logger
-	router.Use(fiberzerolog.New(fiberzerolog.Config{
+	app.Use(fiberzerolog.New(fiberzerolog.Config{
 		Logger: &logger,
 	}))

 	// Default middleware config

-	if !application.ApplicationConfig().Debug {
-		router.Use(recover.New())
+	if !appConfig.Debug {
+		app.Use(recover.New())
 	}

-	if !application.ApplicationConfig().DisableMetrics {
+	if !appConfig.DisableMetrics {
 		metricsService, err := services.NewLocalAIMetricsService()
 		if err != nil {
 			return nil, err
 		}

 		if metricsService != nil {
-			router.Use(localai.LocalAIMetricsAPIMiddleware(metricsService))
-			router.Hooks().OnShutdown(func() error {
+			app.Use(localai.LocalAIMetricsAPIMiddleware(metricsService))
+			app.Hooks().OnShutdown(func() error {
 				return metricsService.Shutdown()
 			})
 		}

 	}
 	// Health Checks should always be exempt from auth, so register these first
-	routes.HealthRoutes(router)
+	routes.HealthRoutes(app)

-	kaConfig, err := middleware.GetKeyAuthConfig(application.ApplicationConfig())
+	kaConfig, err := middleware.GetKeyAuthConfig(appConfig)
 	if err != nil || kaConfig == nil {
 		return nil, fmt.Errorf("failed to create key auth config: %w", err)
 	}

 	// Auth is applied to _all_ endpoints. No exceptions. Filtering out endpoints to bypass is the role of the Filter property of the KeyAuth Configuration
-	router.Use(v2keyauth.New(*kaConfig))
+	app.Use(v2keyauth.New(*kaConfig))

-	if application.ApplicationConfig().CORS {
+	if appConfig.CORS {
 		var c func(ctx *fiber.Ctx) error
-		if application.ApplicationConfig().CORSAllowOrigins == "" {
+		if appConfig.CORSAllowOrigins == "" {
 			c = cors.New()
 		} else {
-			c = cors.New(cors.Config{AllowOrigins: application.ApplicationConfig().CORSAllowOrigins})
+			c = cors.New(cors.Config{AllowOrigins: appConfig.CORSAllowOrigins})
 		}

-		router.Use(c)
+		app.Use(c)
 	}

-	if application.ApplicationConfig().CSRF {
+	if appConfig.CSRF {
 		log.Debug().Msg("Enabling CSRF middleware. Tokens are now required for state-modifying requests")
-		router.Use(csrf.New())
+		app.Use(csrf.New())
 	}

 	// Load config jsons
-	utils.LoadConfig(application.ApplicationConfig().UploadDir, openai.UploadedFilesFile, &openai.UploadedFiles)
-	utils.LoadConfig(application.ApplicationConfig().ConfigsDir, openai.AssistantsConfigFile, &openai.Assistants)
-	utils.LoadConfig(application.ApplicationConfig().ConfigsDir, openai.AssistantsFileConfigFile, &openai.AssistantFiles)
+	utils.LoadConfig(appConfig.UploadDir, openai.UploadedFilesFile, &openai.UploadedFiles)
+	utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsConfigFile, &openai.Assistants)
+	utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsFileConfigFile, &openai.AssistantFiles)

-	galleryService := services.NewGalleryService(application.ApplicationConfig())
-	galleryService.Start(application.ApplicationConfig().Context, application.BackendLoader())
+	galleryService := services.NewGalleryService(appConfig)
+	galleryService.Start(appConfig.Context, cl)

-	routes.RegisterElevenLabsRoutes(router, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())
-	routes.RegisterLocalAIRoutes(router, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig(), galleryService)
-	routes.RegisterOpenAIRoutes(router, application)
-	if !application.ApplicationConfig().DisableWebUI {
-		routes.RegisterUIRoutes(router, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig(), galleryService)
+	routes.RegisterElevenLabsRoutes(app, cl, ml, appConfig)
+	routes.RegisterLocalAIRoutes(app, cl, ml, appConfig, galleryService)
+	routes.RegisterOpenAIRoutes(app, cl, ml, appConfig)
+	if !appConfig.DisableWebUI {
+		routes.RegisterUIRoutes(app, cl, ml, appConfig, galleryService)
 	}
-	routes.RegisterJINARoutes(router, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())
+	routes.RegisterJINARoutes(app, cl, ml, appConfig)

 	httpFS := http.FS(embedDirStatic)

-	router.Use(favicon.New(favicon.Config{
+	app.Use(favicon.New(favicon.Config{
 		URL:        "/favicon.ico",
 		FileSystem: httpFS,
 		File:       "static/favicon.ico",
 	}))

-	router.Use("/static", filesystem.New(filesystem.Config{
+	app.Use("/static", filesystem.New(filesystem.Config{
 		Root:       httpFS,
 		PathPrefix: "static",
 		Browse:     true,
@@ -193,7 +182,7 @@ func API(application *application.Application) (*fiber.App, error) {

 	// Define a custom 404 handler
 	// Note: keep this at the bottom!
-	router.Use(notFoundHandler)
+	app.Use(notFoundHandler)

-	return router, nil
+	return app, nil
 }
--- a/core/http/app_test.go
+++ b/core/http/app_test.go
@@ -12,14 +12,15 @@ import (
 	"path/filepath"
 	"runtime"

-	"github.com/mudler/LocalAI/core/application"
 	"github.com/mudler/LocalAI/core/config"
 	. "github.com/mudler/LocalAI/core/http"
 	"github.com/mudler/LocalAI/core/schema"
+	"github.com/mudler/LocalAI/core/startup"

 	"github.com/gofiber/fiber/v2"
 	"github.com/mudler/LocalAI/core/gallery"
 	"github.com/mudler/LocalAI/pkg/downloader"
+	"github.com/mudler/LocalAI/pkg/model"
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
 	"gopkg.in/yaml.v3"
@@ -237,31 +238,6 @@ func postInvalidRequest(url string) (error, int) {
 	return nil, resp.StatusCode
 }

-func getRequest(url string, header http.Header) (error, int, []byte) {
-
-	req, err := http.NewRequest("GET", url, nil)
-	if err != nil {
-		return err, -1, nil
-	}
-
-	req.Header = header
-
-	client := &http.Client{}
-	resp, err := client.Do(req)
-	if err != nil {
-		return err, -1, nil
-	}
-
-	defer resp.Body.Close()
-
-	body, err := io.ReadAll(resp.Body)
-	if err != nil {
-		return err, -1, nil
-	}
-
-	return nil, resp.StatusCode, body
-}
-
 const bertEmbeddingsURL = `https://gist.githubusercontent.com/mudler/0a080b166b87640e8644b09c2aee6e3b/raw/f0e8c26bb72edc16d9fbafbfd6638072126ff225/bert-embeddings-gallery.yaml`

 //go:embed backend-assets/*
@@ -276,6 +252,9 @@ var _ = Describe("API test", func() {
 	var cancel context.CancelFunc
 	var tmpdir string
 	var modelDir string
+	var bcl *config.BackendConfigLoader
+	var ml *model.ModelLoader
+	var applicationConfig *config.ApplicationConfig

 	commonOpts := []config.AppOption{
 		config.WithDebug(true),
@@ -321,7 +300,7 @@ var _ = Describe("API test", func() {
 				},
 			}

-			application, err := application.New(
+			bcl, ml, applicationConfig, err = startup.Startup(
 				append(commonOpts,
 					config.WithContext(c),
 					config.WithGalleries(galleries),
@@ -331,7 +310,7 @@ var _ = Describe("API test", func() {
 					config.WithBackendAssetsOutput(backendAssetsDir))...)
 			Expect(err).ToNot(HaveOccurred())

-			app, err = API(application)
+			app, err = App(bcl, ml, applicationConfig)
 			Expect(err).ToNot(HaveOccurred())

 			go app.Listen("127.0.0.1:9090")
@@ -370,33 +349,6 @@ var _ = Describe("API test", func() {
 			})
 		})

-		Context("URL routing Tests", func() {
-			It("Should support reverse-proxy when unauthenticated", func() {
-
-				err, sc, body := getRequest("http://127.0.0.1:9090/myprefix/", http.Header{
-					"X-Forwarded-Proto":  {"https"},
-					"X-Forwarded-Host":   {"example.org"},
-					"X-Forwarded-Prefix": {"/myprefix/"},
-				})
-				Expect(err).To(BeNil(), "error")
-				Expect(sc).To(Equal(401), "status code")
-				Expect(string(body)).To(ContainSubstring(`<base href="https://example.org/myprefix/" />`), "body")
-			})
-
-			It("Should support reverse-proxy when authenticated", func() {
-
-				err, sc, body := getRequest("http://127.0.0.1:9090/myprefix/", http.Header{
-					"Authorization":      {bearerKey},
-					"X-Forwarded-Proto":  {"https"},
-					"X-Forwarded-Host":   {"example.org"},
-					"X-Forwarded-Prefix": {"/myprefix/"},
-				})
-				Expect(err).To(BeNil(), "error")
-				Expect(sc).To(Equal(200), "status code")
-				Expect(string(body)).To(ContainSubstring(`<base href="https://example.org/myprefix/" />`), "body")
-			})
-		})
-
 		Context("Applying models", func() {

 			It("applies models from a gallery", func() {
@@ -587,7 +539,7 @@ var _ = Describe("API test", func() {
 				var res map[string]string
 				err = json.Unmarshal([]byte(resp2.Choices[0].Message.FunctionCall.Arguments), &res)
 				Expect(err).ToNot(HaveOccurred())
-				Expect(res["location"]).To(ContainSubstring("San Francisco"), fmt.Sprint(res))
+				Expect(res["location"]).To(Equal("San Francisco"), fmt.Sprint(res))
 				Expect(res["unit"]).To(Equal("celcius"), fmt.Sprint(res))
 				Expect(string(resp2.Choices[0].FinishReason)).To(Equal("function_call"), fmt.Sprint(resp2.Choices[0].FinishReason))

@@ -689,7 +641,7 @@ var _ = Describe("API test", func() {
 				},
 			}

-			application, err := application.New(
+			bcl, ml, applicationConfig, err = startup.Startup(
 				append(commonOpts,
 					config.WithContext(c),
 					config.WithAudioDir(tmpdir),
@@ -700,7 +652,7 @@ var _ = Describe("API test", func() {
 					config.WithBackendAssetsOutput(tmpdir))...,
 			)
 			Expect(err).ToNot(HaveOccurred())
-			app, err = API(application)
+			app, err = App(bcl, ml, applicationConfig)
 			Expect(err).ToNot(HaveOccurred())

 			go app.Listen("127.0.0.1:9090")
@@ -756,7 +708,7 @@ var _ = Describe("API test", func() {
 			Expect(err).ToNot(HaveOccurred(), fmt.Sprint(resp))

 			Expect(resp.StatusCode).To(Equal(200), fmt.Sprint(string(dat)))
-			Expect(resp.Header.Get("Content-Type")).To(Or(Equal("audio/x-wav"), Equal("audio/vnd.wave")))
+			Expect(resp.Header.Get("Content-Type")).To(Equal("audio/x-wav"))
 		})
 		It("installs and is capable to generate images", Label("stablediffusion"), func() {
 			if runtime.GOOS != "linux" {
@@ -820,14 +772,14 @@ var _ = Describe("API test", func() {

 			var err error

-			application, err := application.New(
+			bcl, ml, applicationConfig, err = startup.Startup(
 				append(commonOpts,
 					config.WithExternalBackend("huggingface", os.Getenv("HUGGINGFACE_GRPC")),
 					config.WithContext(c),
 					config.WithModelPath(modelPath),
 				)...)
 			Expect(err).ToNot(HaveOccurred())
-			app, err = API(application)
+			app, err = App(bcl, ml, applicationConfig)
 			Expect(err).ToNot(HaveOccurred())
 			go app.Listen("127.0.0.1:9090")

@@ -1038,14 +990,14 @@ var _ = Describe("API test", func() {
 			c, cancel = context.WithCancel(context.Background())

 			var err error
-			application, err := application.New(
+			bcl, ml, applicationConfig, err = startup.Startup(
 				append(commonOpts,
 					config.WithContext(c),
 					config.WithModelPath(modelPath),
 					config.WithConfigFile(os.Getenv("CONFIG_FILE")))...,
 			)
 			Expect(err).ToNot(HaveOccurred())
-			app, err = API(application)
+			app, err = App(bcl, ml, applicationConfig)
 			Expect(err).ToNot(HaveOccurred())

 			go app.Listen("127.0.0.1:9090")
--- a/core/http/ctx/fiber.go
+++ b/core/http/ctx/fiber.go
@@ -19,11 +19,9 @@ func ModelFromContext(ctx *fiber.Ctx, cl *config.BackendConfigLoader, loader *mo
 	if ctx.Params("model") != "" {
 		modelInput = ctx.Params("model")
 	}
-
 	if ctx.Query("model") != "" {
 		modelInput = ctx.Query("model")
 	}
-
 	// Set model from bearer token, if available
 	bearer := strings.TrimLeft(ctx.Get("authorization"), "Bear ") // Reduced duplicate characters of Bearer
 	bearerExists := bearer != "" && loader.ExistsInModelPath(bearer)
--- a/core/http/elements/buttons.go
+++ b/core/http/elements/buttons.go
@@ -16,7 +16,7 @@ func installButton(galleryName string) elem.Node {
 			"class":                 "float-right inline-block rounded bg-primary px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong",
 			"hx-swap":               "outerHTML",
 			// post the Model ID as param
-			"hx-post": "browse/install/model/" + galleryName,
+			"hx-post": "/browse/install/model/" + galleryName,
 		},
 		elem.I(
 			attrs.Props{
@@ -36,7 +36,7 @@ func reInstallButton(galleryName string) elem.Node {
 			"hx-target":             "#action-div-" + dropBadChars(galleryName),
 			"hx-swap":               "outerHTML",
 			// post the Model ID as param
-			"hx-post": "browse/install/model/" + galleryName,
+			"hx-post": "/browse/install/model/" + galleryName,
 		},
 		elem.I(
 			attrs.Props{
@@ -80,7 +80,7 @@ func deleteButton(galleryID string) elem.Node {
 			"hx-target":             "#action-div-" + dropBadChars(galleryID),
 			"hx-swap":               "outerHTML",
 			// post the Model ID as param
-			"hx-post": "browse/delete/model/" + galleryID,
+			"hx-post": "/browse/delete/model/" + galleryID,
 		},
 		elem.I(
 			attrs.Props{
--- a/core/http/elements/gallery.go
+++ b/core/http/elements/gallery.go
@@ -47,7 +47,7 @@ func searchableElement(text, icon string) elem.Node {
 					//	"value":     text,
 					//"class":     "inline-block bg-gray-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2",
 					"href":      "#!",
-					"hx-post":   "browse/search/models",
+					"hx-post":   "/browse/search/models",
 					"hx-target": "#search-results",
 					// TODO: this doesn't work
 					//	"hx-vals":      `{ \"search\": \"` + text + `\" }`,
--- a/core/http/elements/progressbar.go
+++ b/core/http/elements/progressbar.go
@@ -64,7 +64,7 @@ func StartProgressBar(uid, progress, text string) string {
 	return elem.Div(
 		attrs.Props{
 			"hx-trigger": "done",
-			"hx-get":     "browse/job/" + uid,
+			"hx-get":     "/browse/job/" + uid,
 			"hx-swap":    "outerHTML",
 			"hx-target":  "this",
 		},
@@ -77,7 +77,7 @@ func StartProgressBar(uid, progress, text string) string {
 			},
 			elem.Text(bluemonday.StrictPolicy().Sanitize(text)), //Perhaps overly defensive
 			elem.Div(attrs.Props{
-				"hx-get":     "browse/job/progress/" + uid,
+				"hx-get":     "/browse/job/progress/" + uid,
 				"hx-trigger": "every 600ms",
 				"hx-target":  "this",
 				"hx-swap":    "innerHTML",
--- a/core/http/endpoints/explorer/dashboard.go
+++ b/core/http/endpoints/explorer/dashboard.go
@@ -6,7 +6,6 @@ import (

 	"github.com/gofiber/fiber/v2"
 	"github.com/mudler/LocalAI/core/explorer"
-	"github.com/mudler/LocalAI/core/http/utils"
 	"github.com/mudler/LocalAI/internal"
 )

@@ -15,7 +14,6 @@ func Dashboard() func(*fiber.Ctx) error {
 		summary := fiber.Map{
 			"Title":   "LocalAI API - " + internal.PrintableVersion(),
 			"Version": internal.PrintableVersion(),
-			"BaseURL": utils.BaseURL(c),
 		}

 		if string(c.Context().Request.Header.ContentType()) == "application/json" || len(c.Accepts("html")) == 0 {
--- a/core/http/endpoints/localai/gallery.go
+++ b/core/http/endpoints/localai/gallery.go
@@ -9,7 +9,6 @@ import (
 	"github.com/google/uuid"
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/gallery"
-	"github.com/mudler/LocalAI/core/http/utils"
 	"github.com/mudler/LocalAI/core/schema"
 	"github.com/mudler/LocalAI/core/services"
 	"github.com/rs/zerolog/log"
@@ -83,8 +82,7 @@ func (mgs *ModelGalleryEndpointService) ApplyModelGalleryEndpoint() func(c *fibe
 			Galleries:        mgs.galleries,
 			ConfigURL:        input.ConfigURL,
 		}
-
-		return c.JSON(schema.GalleryResponse{ID: uuid.String(), StatusURL: fmt.Sprintf("%smodels/jobs/%s", utils.BaseURL(c), uuid.String())})
+		return c.JSON(schema.GalleryResponse{ID: uuid.String(), StatusURL: c.BaseURL() + "/models/jobs/" + uuid.String()})
 	}
 }

@@ -107,7 +105,7 @@ func (mgs *ModelGalleryEndpointService) DeleteModelGalleryEndpoint() func(c *fib
 			return err
 		}

-		return c.JSON(schema.GalleryResponse{ID: uuid.String(), StatusURL: fmt.Sprintf("%smodels/jobs/%s", utils.BaseURL(c), uuid.String())})
+		return c.JSON(schema.GalleryResponse{ID: uuid.String(), StatusURL: c.BaseURL() + "/models/jobs/" + uuid.String()})
 	}
 }

--- a/core/http/endpoints/localai/welcome.go
+++ b/core/http/endpoints/localai/welcome.go
@@ -4,7 +4,6 @@ import (
 	"github.com/gofiber/fiber/v2"
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/gallery"
-	"github.com/mudler/LocalAI/core/http/utils"
 	"github.com/mudler/LocalAI/core/p2p"
 	"github.com/mudler/LocalAI/core/services"
 	"github.com/mudler/LocalAI/internal"
@@ -33,7 +32,6 @@ func WelcomeEndpoint(appConfig *config.ApplicationConfig,
 		summary := fiber.Map{
 			"Title":             "LocalAI API - " + internal.PrintableVersion(),
 			"Version":           internal.PrintableVersion(),
-			"BaseURL":           utils.BaseURL(c),
 			"Models":            modelsWithoutConfig,
 			"ModelsConfig":      backendConfigs,
 			"GalleryConfig":     galleryConfigs,
--- a/core/http/endpoints/openai/chat.go
+++ b/core/http/endpoints/openai/chat.go
@@ -14,8 +14,6 @@ import (
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/schema"
 	"github.com/mudler/LocalAI/pkg/functions"
-	"github.com/mudler/LocalAI/pkg/templates"
-
 	model "github.com/mudler/LocalAI/pkg/model"
 	"github.com/rs/zerolog/log"
 	"github.com/valyala/fasthttp"
@@ -26,7 +24,7 @@ import (
 // @Param request body schema.OpenAIRequest true "query params"
 // @Success 200 {object} schema.OpenAIResponse "Response"
 // @Router /v1/chat/completions [post]
-func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluator *templates.Evaluator, startupOptions *config.ApplicationConfig) func(c *fiber.Ctx) error {
+func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startupOptions *config.ApplicationConfig) func(c *fiber.Ctx) error {
 	var id, textContentToReturn string
 	var created int

@@ -296,10 +294,148 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat
 		// If we are using the tokenizer template, we don't need to process the messages
 		// unless we are processing functions
 		if !config.TemplateConfig.UseTokenizerTemplate || shouldUseFn {
-			predInput = evaluator.TemplateMessages(input.Messages, config, funcs, shouldUseFn)
+			suppressConfigSystemPrompt := false
+			mess := []string{}
+			for messageIndex, i := range input.Messages {
+				var content string
+				role := i.Role
+
+				// if function call, we might want to customize the role so we can display better that the "assistant called a json action"
+				// if an "assistant_function_call" role is defined, we use it, otherwise we use the role that is passed by in the request
+				if (i.FunctionCall != nil || i.ToolCalls != nil) && i.Role == "assistant" {
+					roleFn := "assistant_function_call"
+					r := config.Roles[roleFn]
+					if r != "" {
+						role = roleFn
+					}
+				}
+				r := config.Roles[role]
+				contentExists := i.Content != nil && i.StringContent != ""
+
+				fcall := i.FunctionCall
+				if len(i.ToolCalls) > 0 {
+					fcall = i.ToolCalls
+				}
+
+				// First attempt to populate content via a chat message specific template
+				if config.TemplateConfig.ChatMessage != "" {
+					chatMessageData := model.ChatMessageTemplateData{
+						SystemPrompt: config.SystemPrompt,
+						Role:         r,
+						RoleName:     role,
+						Content:      i.StringContent,
+						FunctionCall: fcall,
+						FunctionName: i.Name,
+						LastMessage:  messageIndex == (len(input.Messages) - 1),
+						Function:     config.Grammar != "" && (messageIndex == (len(input.Messages) - 1)),
+						MessageIndex: messageIndex,
+					}
+					templatedChatMessage, err := ml.EvaluateTemplateForChatMessage(config.TemplateConfig.ChatMessage, chatMessageData)
+					if err != nil {
+						log.Error().Err(err).Interface("message", chatMessageData).Str("template", config.TemplateConfig.ChatMessage).Msg("error processing message with template, skipping")
+					} else {
+						if templatedChatMessage == "" {
+							log.Warn().Msgf("template \"%s\" produced blank output for %+v. Skipping!", config.TemplateConfig.ChatMessage, chatMessageData)
+							continue // TODO: This continue is here intentionally to skip over the line `mess = append(mess, content)` below, and to prevent the sprintf
+						}
+						log.Debug().Msgf("templated message for chat: %s", templatedChatMessage)
+						content = templatedChatMessage
+					}
+				}
+
+				marshalAnyRole := func(f any) {
+					j, err := json.Marshal(f)
+					if err == nil {
+						if contentExists {
+							content += "\n" + fmt.Sprint(r, " ", string(j))
+						} else {
+							content = fmt.Sprint(r, " ", string(j))
+						}
+					}
+				}
+				marshalAny := func(f any) {
+					j, err := json.Marshal(f)
+					if err == nil {
+						if contentExists {
+							content += "\n" + string(j)
+						} else {
+							content = string(j)
+						}
+					}
+				}
+				// If this model doesn't have such a template, or if that template fails to return a value, template at the message level.
+				if content == "" {
+					if r != "" {
+						if contentExists {
+							content = fmt.Sprint(r, i.StringContent)
+						}
+
+						if i.FunctionCall != nil {
+							marshalAnyRole(i.FunctionCall)
+						}
+						if i.ToolCalls != nil {
+							marshalAnyRole(i.ToolCalls)
+						}
+					} else {
+						if contentExists {
+							content = fmt.Sprint(i.StringContent)
+						}
+						if i.FunctionCall != nil {
+							marshalAny(i.FunctionCall)
+						}
+						if i.ToolCalls != nil {
+							marshalAny(i.ToolCalls)
+						}
+					}
+					// Special Handling: System. We care if it was printed at all, not the r branch, so check seperately
+					if contentExists && role == "system" {
+						suppressConfigSystemPrompt = true
+					}
+				}
+
+				mess = append(mess, content)
+			}
+
+			joinCharacter := "\n"
+			if config.TemplateConfig.JoinChatMessagesByCharacter != nil {
+				joinCharacter = *config.TemplateConfig.JoinChatMessagesByCharacter
+			}
+
+			predInput = strings.Join(mess, joinCharacter)
+			log.Debug().Msgf("Prompt (before templating): %s", predInput)
+
+			templateFile := ""
+
+			// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
+			if ml.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) {
+				templateFile = config.Model
+			}
+
+			if config.TemplateConfig.Chat != "" && !shouldUseFn {
+				templateFile = config.TemplateConfig.Chat
+			}
+
+			if config.TemplateConfig.Functions != "" && shouldUseFn {
+				templateFile = config.TemplateConfig.Functions
+			}
+
+			if templateFile != "" {
+				templatedInput, err := ml.EvaluateTemplateForPrompt(model.ChatPromptTemplate, templateFile, model.PromptTemplateData{
+					SystemPrompt:         config.SystemPrompt,
+					SuppressSystemPrompt: suppressConfigSystemPrompt,
+					Input:                predInput,
+					Functions:            funcs,
+				})
+				if err == nil {
+					predInput = templatedInput
+					log.Debug().Msgf("Template found, input modified to: %s", predInput)
+				} else {
+					log.Debug().Msgf("Template failed loading: %s", err.Error())
+				}
+			}

 			log.Debug().Msgf("Prompt (after templating): %s", predInput)
-			if config.Grammar != "" {
+			if shouldUseFn && config.Grammar != "" {
 				log.Debug().Msgf("Grammar: %+v", config.Grammar)
 			}
 		}
--- a/core/http/endpoints/openai/completion.go
+++ b/core/http/endpoints/openai/completion.go
@@ -16,7 +16,6 @@ import (
 	"github.com/mudler/LocalAI/core/schema"
 	"github.com/mudler/LocalAI/pkg/functions"
 	model "github.com/mudler/LocalAI/pkg/model"
-	"github.com/mudler/LocalAI/pkg/templates"
 	"github.com/rs/zerolog/log"
 	"github.com/valyala/fasthttp"
 )
@@ -26,7 +25,7 @@ import (
 // @Param request body schema.OpenAIRequest true "query params"
 // @Success 200 {object} schema.OpenAIResponse "Response"
 // @Router /v1/completions [post]
-func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluator *templates.Evaluator, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
+func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
 	id := uuid.New().String()
 	created := int(time.Now().Unix())

@@ -95,6 +94,17 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, e
 			c.Set("Transfer-Encoding", "chunked")
 		}

+		templateFile := ""
+
+		// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
+		if ml.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) {
+			templateFile = config.Model
+		}
+
+		if config.TemplateConfig.Completion != "" {
+			templateFile = config.TemplateConfig.Completion
+		}
+
 		if input.Stream {
 			if len(config.PromptStrings) > 1 {
 				return errors.New("cannot handle more than 1 `PromptStrings` when Streaming")
@@ -102,13 +112,15 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, e

 			predInput := config.PromptStrings[0]

-			templatedInput, err := evaluator.EvaluateTemplateForPrompt(templates.CompletionPromptTemplate, *config, templates.PromptTemplateData{
-				Input:        predInput,
-				SystemPrompt: config.SystemPrompt,
-			})
-			if err == nil {
-				predInput = templatedInput
-				log.Debug().Msgf("Template found, input modified to: %s", predInput)
+			if templateFile != "" {
+				templatedInput, err := ml.EvaluateTemplateForPrompt(model.CompletionPromptTemplate, templateFile, model.PromptTemplateData{
+					Input:        predInput,
+					SystemPrompt: config.SystemPrompt,
+				})
+				if err == nil {
+					predInput = templatedInput
+					log.Debug().Msgf("Template found, input modified to: %s", predInput)
+				}
 			}

 			responses := make(chan schema.OpenAIResponse)
@@ -153,13 +165,16 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, e
 		totalTokenUsage := backend.TokenUsage{}

 		for k, i := range config.PromptStrings {
-			templatedInput, err := evaluator.EvaluateTemplateForPrompt(templates.CompletionPromptTemplate, *config, templates.PromptTemplateData{
-				SystemPrompt: config.SystemPrompt,
-				Input:        i,
-			})
-			if err == nil {
-				i = templatedInput
-				log.Debug().Msgf("Template found, input modified to: %s", i)
+			if templateFile != "" {
+				// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
+				templatedInput, err := ml.EvaluateTemplateForPrompt(model.CompletionPromptTemplate, templateFile, model.PromptTemplateData{
+					SystemPrompt: config.SystemPrompt,
+					Input:        i,
+				})
+				if err == nil {
+					i = templatedInput
+					log.Debug().Msgf("Template found, input modified to: %s", i)
+				}
 			}

 			r, tokenUsage, err := ComputeChoices(
--- a/core/http/endpoints/openai/edit.go
+++ b/core/http/endpoints/openai/edit.go
@@ -12,7 +12,6 @@ import (
 	"github.com/google/uuid"
 	"github.com/mudler/LocalAI/core/schema"
 	model "github.com/mudler/LocalAI/pkg/model"
-	"github.com/mudler/LocalAI/pkg/templates"

 	"github.com/rs/zerolog/log"
 )
@@ -22,8 +21,7 @@ import (
 // @Param request body schema.OpenAIRequest true "query params"
 // @Success 200 {object} schema.OpenAIResponse "Response"
 // @Router /v1/edits [post]
-func EditEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluator *templates.Evaluator, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
-
+func EditEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		modelFile, input, err := readRequest(c, cl, ml, appConfig, true)
 		if err != nil {
@@ -37,18 +35,31 @@ func EditEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat

 		log.Debug().Msgf("Parameter Config: %+v", config)

+		templateFile := ""
+
+		// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
+		if ml.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) {
+			templateFile = config.Model
+		}
+
+		if config.TemplateConfig.Edit != "" {
+			templateFile = config.TemplateConfig.Edit
+		}
+
 		var result []schema.Choice
 		totalTokenUsage := backend.TokenUsage{}

 		for _, i := range config.InputStrings {
-			templatedInput, err := evaluator.EvaluateTemplateForPrompt(templates.EditPromptTemplate, *config, templates.PromptTemplateData{
-				Input:        i,
-				Instruction:  input.Instruction,
-				SystemPrompt: config.SystemPrompt,
-			})
-			if err == nil {
-				i = templatedInput
-				log.Debug().Msgf("Template found, input modified to: %s", i)
+			if templateFile != "" {
+				templatedInput, err := ml.EvaluateTemplateForPrompt(model.EditPromptTemplate, templateFile, model.PromptTemplateData{
+					Input:        i,
+					Instruction:  input.Instruction,
+					SystemPrompt: config.SystemPrompt,
+				})
+				if err == nil {
+					i = templatedInput
+					log.Debug().Msgf("Template found, input modified to: %s", i)
+				}
 			}

 			r, tokenUsage, err := ComputeChoices(input, i, config, appConfig, ml, func(s string, c *[]schema.Choice) {
--- a/core/http/endpoints/openai/realtime.go
+++ b/core/http/endpoints/openai/realtime.go
--- a/core/http/endpoints/openai/realtime_model.go
+++ b/core/http/endpoints/openai/realtime_model.go
@@ -1,186 +0,0 @@
-package openai
-
-import (
-	"context"
-	"fmt"
-
-	"github.com/mudler/LocalAI/core/backend"
-	"github.com/mudler/LocalAI/core/config"
-	grpcClient "github.com/mudler/LocalAI/pkg/grpc"
-	"github.com/mudler/LocalAI/pkg/grpc/proto"
-	model "github.com/mudler/LocalAI/pkg/model"
-	"github.com/rs/zerolog/log"
-	"google.golang.org/grpc"
-)
-
-var (
-	_ Model = new(wrappedModel)
-	_ Model = new(anyToAnyModel)
-)
-
-// wrappedModel represent a model which does not support Any-to-Any operations
-// This means that we will fake an Any-to-Any model by overriding some of the gRPC client methods
-// which are for Any-To-Any models, but instead we will call a pipeline (for e.g STT->LLM->TTS)
-type wrappedModel struct {
-	TTSConfig           *config.BackendConfig
-	TranscriptionConfig *config.BackendConfig
-	LLMConfig           *config.BackendConfig
-	TTSClient           grpcClient.Backend
-	TranscriptionClient grpcClient.Backend
-	LLMClient           grpcClient.Backend
-
-	VADConfig *config.BackendConfig
-	VADClient grpcClient.Backend
-}
-
-// anyToAnyModel represent a model which supports Any-to-Any operations
-// We have to wrap this out as well because we want to load two models one for VAD and one for the actual model.
-// In the future there could be models that accept continous audio input only so this design will be useful for that
-type anyToAnyModel struct {
-	LLMConfig *config.BackendConfig
-	LLMClient grpcClient.Backend
-
-	VADConfig *config.BackendConfig
-	VADClient grpcClient.Backend
-}
-
-func (m *wrappedModel) VAD(ctx context.Context, in *proto.VADRequest, opts ...grpc.CallOption) (*proto.VADResponse, error) {
-	return m.VADClient.VAD(ctx, in)
-}
-
-func (m *anyToAnyModel) VAD(ctx context.Context, in *proto.VADRequest, opts ...grpc.CallOption) (*proto.VADResponse, error) {
-	return m.VADClient.VAD(ctx, in)
-}
-
-func (m *wrappedModel) Predict(ctx context.Context, in *proto.PredictOptions, opts ...grpc.CallOption) (*proto.Reply, error) {
-	// TODO: Convert with pipeline (audio to text, text to llm, result to tts, and return it)
-	// sound.BufferAsWAV(audioData, "audio.wav")
-
-	return m.LLMClient.Predict(ctx, in)
-}
-
-func (m *wrappedModel) PredictStream(ctx context.Context, in *proto.PredictOptions, f func(reply *proto.Reply), opts ...grpc.CallOption) error {
-	// TODO: Convert with pipeline (audio to text, text to llm, result to tts, and return it)
-
-	return m.LLMClient.PredictStream(ctx, in, f)
-}
-
-func (m *anyToAnyModel) Predict(ctx context.Context, in *proto.PredictOptions, opts ...grpc.CallOption) (*proto.Reply, error) {
-	return m.LLMClient.Predict(ctx, in)
-}
-
-func (m *anyToAnyModel) PredictStream(ctx context.Context, in *proto.PredictOptions, f func(reply *proto.Reply), opts ...grpc.CallOption) error {
-	return m.LLMClient.PredictStream(ctx, in, f)
-}
-
-// returns and loads either a wrapped model or a model that support audio-to-audio
-func newModel(cfg *config.BackendConfig, cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig, modelName string) (Model, error) {
-
-	// Prepare VAD model
-	cfgVAD, err := cl.LoadBackendConfigFileByName(cfg.Pipeline.VAD, ml.ModelPath)
-	if err != nil {
-
-		return nil, fmt.Errorf("failed to load backend config: %w", err)
-	}
-
-	if !cfgVAD.Validate() {
-		return nil, fmt.Errorf("failed to validate config: %w", err)
-	}
-
-	opts := backend.ModelOptions(*cfgVAD, appConfig)
-	VADClient, err := ml.Load(opts...)
-	if err != nil {
-		return nil, fmt.Errorf("failed to load tts model: %w", err)
-	}
-
-	// If we don't have Wrapped model definitions, just return a standard model
-	if cfg.Pipeline.IsNotConfigured() {
-
-		// Otherwise we want to return a wrapped model, which is a "virtual" model that re-uses other models to perform operations
-		cfgAnyToAny, err := cl.LoadBackendConfigFileByName(cfg.Model, ml.ModelPath)
-		if err != nil {
-
-			return nil, fmt.Errorf("failed to load backend config: %w", err)
-		}
-
-		if !cfgAnyToAny.Validate() {
-			return nil, fmt.Errorf("failed to validate config: %w", err)
-		}
-
-		opts := backend.ModelOptions(*cfgAnyToAny, appConfig)
-		anyToAnyClient, err := ml.Load(opts...)
-		if err != nil {
-			return nil, fmt.Errorf("failed to load tts model: %w", err)
-		}
-
-		return &anyToAnyModel{
-			LLMConfig: cfgAnyToAny,
-			LLMClient: anyToAnyClient,
-			VADConfig: cfgVAD,
-			VADClient: VADClient,
-		}, nil
-	}
-
-	log.Debug().Msg("Loading a wrapped model")
-
-	// Otherwise we want to return a wrapped model, which is a "virtual" model that re-uses other models to perform operations
-	cfgLLM, err := cl.LoadBackendConfigFileByName(cfg.Pipeline.LLM, ml.ModelPath)
-	if err != nil {
-
-		return nil, fmt.Errorf("failed to load backend config: %w", err)
-	}
-
-	if !cfgLLM.Validate() {
-		return nil, fmt.Errorf("failed to validate config: %w", err)
-	}
-
-	cfgTTS, err := cl.LoadBackendConfigFileByName(cfg.Pipeline.TTS, ml.ModelPath)
-	if err != nil {
-
-		return nil, fmt.Errorf("failed to load backend config: %w", err)
-	}
-
-	if !cfgTTS.Validate() {
-		return nil, fmt.Errorf("failed to validate config: %w", err)
-	}
-
-	cfgSST, err := cl.LoadBackendConfigFileByName(cfg.Pipeline.Transcription, ml.ModelPath)
-	if err != nil {
-
-		return nil, fmt.Errorf("failed to load backend config: %w", err)
-	}
-
-	if !cfgSST.Validate() {
-		return nil, fmt.Errorf("failed to validate config: %w", err)
-	}
-
-	opts = backend.ModelOptions(*cfgTTS, appConfig)
-	ttsClient, err := ml.Load(opts...)
-	if err != nil {
-		return nil, fmt.Errorf("failed to load tts model: %w", err)
-	}
-
-	opts = backend.ModelOptions(*cfgSST, appConfig)
-	transcriptionClient, err := ml.Load(opts...)
-	if err != nil {
-		return nil, fmt.Errorf("failed to load SST model: %w", err)
-	}
-
-	opts = backend.ModelOptions(*cfgLLM, appConfig)
-	llmClient, err := ml.Load(opts...)
-	if err != nil {
-		return nil, fmt.Errorf("failed to load LLM model: %w", err)
-	}
-
-	return &wrappedModel{
-		TTSConfig:           cfgTTS,
-		TranscriptionConfig: cfgSST,
-		LLMConfig:           cfgLLM,
-		TTSClient:           ttsClient,
-		TranscriptionClient: transcriptionClient,
-		LLMClient:           llmClient,
-
-		VADConfig: cfgVAD,
-		VADClient: VADClient,
-	}, nil
-}
--- a/core/http/endpoints/openai/request.go
+++ b/core/http/endpoints/openai/request.go
@@ -48,25 +48,6 @@ func readRequest(c *fiber.Ctx, cl *config.BackendConfigLoader, ml *model.ModelLo
 	return modelFile, input, err
 }

-// func readWSRequest(c *websocket.Conn, cl *config.BackendConfigLoader, ml *model.ModelLoader, o *config.ApplicationConfig, firstModel bool) (string, *schema.OpenAIRequest, error) {
-// 	input := new(schema.OpenAIRequest)
-
-// 	input.Model = c.Query("name")
-
-// 	received, _ := json.Marshal(input)
-
-// 	ctx, cancel := context.WithCancel(o.Context)
-
-// 	input.Context = ctx
-// 	input.Cancel = cancel
-
-// 	log.Debug().Msgf("Request received: %s", string(received))
-
-// 	modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.Model, firstModel)
-
-// 	return modelFile, input, err
-// }
-
 func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIRequest) {
 	if input.Echo {
 		config.Echo = input.Echo
--- a/core/http/explorer.go
+++ b/core/http/explorer.go
@@ -7,7 +7,6 @@ import (
 	"github.com/gofiber/fiber/v2/middleware/favicon"
 	"github.com/gofiber/fiber/v2/middleware/filesystem"
 	"github.com/mudler/LocalAI/core/explorer"
-	"github.com/mudler/LocalAI/core/http/middleware"
 	"github.com/mudler/LocalAI/core/http/routes"
 )

@@ -23,7 +22,6 @@ func Explorer(db *explorer.Database) *fiber.App {

 	app := fiber.New(fiberCfg)

-	app.Use(middleware.StripPathPrefix())
 	routes.RegisterExplorerRoutes(app, db)

 	httpFS := http.FS(embedDirStatic)
--- a/core/http/middleware/auth.go
+++ b/core/http/middleware/auth.go
@@ -8,7 +8,6 @@ import (
 	"github.com/gofiber/fiber/v2"
 	"github.com/gofiber/fiber/v2/middleware/keyauth"
 	"github.com/mudler/LocalAI/core/config"
-	"github.com/mudler/LocalAI/core/http/utils"
 )

 // This file contains the configuration generators and handler functions that are used along with the fiber/keyauth middleware
@@ -40,9 +39,7 @@ func getApiKeyErrorHandler(applicationConfig *config.ApplicationConfig) fiber.Er
 			if applicationConfig.OpaqueErrors {
 				return ctx.SendStatus(401)
 			}
-			return ctx.Status(401).Render("views/login", fiber.Map{
-				"BaseURL": utils.BaseURL(ctx),
-			})
+			return ctx.Status(401).Render("views/login", nil)
 		}
 		if applicationConfig.OpaqueErrors {
 			return ctx.SendStatus(500)
--- a/core/http/middleware/strippathprefix.go
+++ b/core/http/middleware/strippathprefix.go
@@ -1,36 +0,0 @@
-package middleware
-
-import (
-	"strings"
-
-	"github.com/gofiber/fiber/v2"
-)
-
-// StripPathPrefix returns a middleware that strips a path prefix from the request path.
-// The path prefix is obtained from the X-Forwarded-Prefix HTTP request header.
-func StripPathPrefix() fiber.Handler {
-	return func(c *fiber.Ctx) error {
-		for _, prefix := range c.GetReqHeaders()["X-Forwarded-Prefix"] {
-			if prefix != "" {
-				path := c.Path()
-				pos := len(prefix)
-
-				if prefix[pos-1] == '/' {
-					pos--
-				} else {
-					prefix += "/"
-				}
-
-				if strings.HasPrefix(path, prefix) {
-					c.Path(path[pos:])
-					break
-				} else if prefix[:pos] == path {
-					c.Redirect(prefix)
-					return nil
-				}
-			}
-		}
-
-		return c.Next()
-	}
-}
--- a/core/http/middleware/strippathprefix_test.go
+++ b/core/http/middleware/strippathprefix_test.go
@@ -1,121 +0,0 @@
-package middleware
-
-import (
-	"net/http/httptest"
-	"testing"
-
-	"github.com/gofiber/fiber/v2"
-	"github.com/stretchr/testify/require"
-)
-
-func TestStripPathPrefix(t *testing.T) {
-	var actualPath string
-
-	app := fiber.New()
-
-	app.Use(StripPathPrefix())
-
-	app.Get("/hello/world", func(c *fiber.Ctx) error {
-		actualPath = c.Path()
-		return nil
-	})
-
-	app.Get("/", func(c *fiber.Ctx) error {
-		actualPath = c.Path()
-		return nil
-	})
-
-	for _, tc := range []struct {
-		name         string
-		path         string
-		prefixHeader []string
-		expectStatus int
-		expectPath   string
-	}{
-		{
-			name:         "without prefix and header",
-			path:         "/hello/world",
-			expectStatus: 200,
-			expectPath:   "/hello/world",
-		},
-		{
-			name:         "without prefix and headers on root path",
-			path:         "/",
-			expectStatus: 200,
-			expectPath:   "/",
-		},
-		{
-			name:         "without prefix but header",
-			path:         "/hello/world",
-			prefixHeader: []string{"/otherprefix/"},
-			expectStatus: 200,
-			expectPath:   "/hello/world",
-		},
-		{
-			name:         "with prefix but non-matching header",
-			path:         "/prefix/hello/world",
-			prefixHeader: []string{"/otherprefix/"},
-			expectStatus: 404,
-		},
-		{
-			name:         "with prefix and matching header",
-			path:         "/myprefix/hello/world",
-			prefixHeader: []string{"/myprefix/"},
-			expectStatus: 200,
-			expectPath:   "/hello/world",
-		},
-		{
-			name:         "with prefix and 1st header matching",
-			path:         "/myprefix/hello/world",
-			prefixHeader: []string{"/myprefix/", "/otherprefix/"},
-			expectStatus: 200,
-			expectPath:   "/hello/world",
-		},
-		{
-			name:         "with prefix and 2nd header matching",
-			path:         "/myprefix/hello/world",
-			prefixHeader: []string{"/otherprefix/", "/myprefix/"},
-			expectStatus: 200,
-			expectPath:   "/hello/world",
-		},
-		{
-			name:         "with prefix and header not ending with slash",
-			path:         "/myprefix/hello/world",
-			prefixHeader: []string{"/myprefix"},
-			expectStatus: 200,
-			expectPath:   "/hello/world",
-		},
-		{
-			name:         "with prefix and non-matching header not ending with slash",
-			path:         "/myprefix-suffix/hello/world",
-			prefixHeader: []string{"/myprefix"},
-			expectStatus: 404,
-		},
-		{
-			name:         "redirect when prefix does not end with a slash",
-			path:         "/myprefix",
-			prefixHeader: []string{"/myprefix"},
-			expectStatus: 302,
-			expectPath:   "/myprefix/",
-		},
-	} {
-		t.Run(tc.name, func(t *testing.T) {
-			actualPath = ""
-			req := httptest.NewRequest("GET", tc.path, nil)
-			if tc.prefixHeader != nil {
-				req.Header["X-Forwarded-Prefix"] = tc.prefixHeader
-			}
-
-			resp, err := app.Test(req, -1)
-
-			require.NoError(t, err)
-			require.Equal(t, tc.expectStatus, resp.StatusCode, "response status code")
-
-			if tc.expectStatus == 200 {
-				require.Equal(t, tc.expectPath, actualPath, "rewritten path")
-			} else if tc.expectStatus == 302 {
-				require.Equal(t, tc.expectPath, resp.Header.Get("Location"), "redirect location")
-			}
-		})
-	}
-}
--- a/core/http/render.go
+++ b/core/http/render.go
@@ -10,7 +10,6 @@ import (
 	"github.com/gofiber/fiber/v2"
 	fiberhtml "github.com/gofiber/template/html/v2"
 	"github.com/microcosm-cc/bluemonday"
-	"github.com/mudler/LocalAI/core/http/utils"
 	"github.com/mudler/LocalAI/core/schema"
 	"github.com/russross/blackfriday"
 )
@@ -27,9 +26,7 @@ func notFoundHandler(c *fiber.Ctx) error {
 		})
 	} else {
 		// The client expects an HTML response
-		return c.Status(fiber.StatusNotFound).Render("views/404", fiber.Map{
-			"BaseURL": utils.BaseURL(c),
-		})
+		return c.Status(fiber.StatusNotFound).Render("views/404", fiber.Map{})
 	}
 }

--- a/core/http/routes/localai.go
+++ b/core/http/routes/localai.go
@@ -11,62 +11,62 @@ import (
 	"github.com/mudler/LocalAI/pkg/model"
 )

-func RegisterLocalAIRoutes(router *fiber.App,
+func RegisterLocalAIRoutes(app *fiber.App,
 	cl *config.BackendConfigLoader,
 	ml *model.ModelLoader,
 	appConfig *config.ApplicationConfig,
 	galleryService *services.GalleryService) {

-	router.Get("/swagger/*", swagger.HandlerDefault) // default
+	app.Get("/swagger/*", swagger.HandlerDefault) // default

 	// LocalAI API endpoints
 	if !appConfig.DisableGalleryEndpoint {
 		modelGalleryEndpointService := localai.CreateModelGalleryEndpointService(appConfig.Galleries, appConfig.ModelPath, galleryService)
-		router.Post("/models/apply", modelGalleryEndpointService.ApplyModelGalleryEndpoint())
-		router.Post("/models/delete/:name", modelGalleryEndpointService.DeleteModelGalleryEndpoint())
+		app.Post("/models/apply", modelGalleryEndpointService.ApplyModelGalleryEndpoint())
+		app.Post("/models/delete/:name", modelGalleryEndpointService.DeleteModelGalleryEndpoint())

-		router.Get("/models/available", modelGalleryEndpointService.ListModelFromGalleryEndpoint())
-		router.Get("/models/galleries", modelGalleryEndpointService.ListModelGalleriesEndpoint())
-		router.Post("/models/galleries", modelGalleryEndpointService.AddModelGalleryEndpoint())
-		router.Delete("/models/galleries", modelGalleryEndpointService.RemoveModelGalleryEndpoint())
-		router.Get("/models/jobs/:uuid", modelGalleryEndpointService.GetOpStatusEndpoint())
-		router.Get("/models/jobs", modelGalleryEndpointService.GetAllStatusEndpoint())
+		app.Get("/models/available", modelGalleryEndpointService.ListModelFromGalleryEndpoint())
+		app.Get("/models/galleries", modelGalleryEndpointService.ListModelGalleriesEndpoint())
+		app.Post("/models/galleries", modelGalleryEndpointService.AddModelGalleryEndpoint())
+		app.Delete("/models/galleries", modelGalleryEndpointService.RemoveModelGalleryEndpoint())
+		app.Get("/models/jobs/:uuid", modelGalleryEndpointService.GetOpStatusEndpoint())
+		app.Get("/models/jobs", modelGalleryEndpointService.GetAllStatusEndpoint())
 	}

-	router.Post("/tts", localai.TTSEndpoint(cl, ml, appConfig))
-	router.Post("/vad", localai.VADEndpoint(cl, ml, appConfig))
+	app.Post("/tts", localai.TTSEndpoint(cl, ml, appConfig))
+	app.Post("/vad", localai.VADEndpoint(cl, ml, appConfig))

 	// Stores
 	sl := model.NewModelLoader("")
-	router.Post("/stores/set", localai.StoresSetEndpoint(sl, appConfig))
-	router.Post("/stores/delete", localai.StoresDeleteEndpoint(sl, appConfig))
-	router.Post("/stores/get", localai.StoresGetEndpoint(sl, appConfig))
-	router.Post("/stores/find", localai.StoresFindEndpoint(sl, appConfig))
+	app.Post("/stores/set", localai.StoresSetEndpoint(sl, appConfig))
+	app.Post("/stores/delete", localai.StoresDeleteEndpoint(sl, appConfig))
+	app.Post("/stores/get", localai.StoresGetEndpoint(sl, appConfig))
+	app.Post("/stores/find", localai.StoresFindEndpoint(sl, appConfig))

 	if !appConfig.DisableMetrics {
-		router.Get("/metrics", localai.LocalAIMetricsEndpoint())
+		app.Get("/metrics", localai.LocalAIMetricsEndpoint())
 	}

 	// Experimental Backend Statistics Module
 	backendMonitorService := services.NewBackendMonitorService(ml, cl, appConfig) // Split out for now
-	router.Get("/backend/monitor", localai.BackendMonitorEndpoint(backendMonitorService))
-	router.Post("/backend/shutdown", localai.BackendShutdownEndpoint(backendMonitorService))
+	app.Get("/backend/monitor", localai.BackendMonitorEndpoint(backendMonitorService))
+	app.Post("/backend/shutdown", localai.BackendShutdownEndpoint(backendMonitorService))

 	// p2p
 	if p2p.IsP2PEnabled() {
-		router.Get("/api/p2p", localai.ShowP2PNodes(appConfig))
-		router.Get("/api/p2p/token", localai.ShowP2PToken(appConfig))
+		app.Get("/api/p2p", localai.ShowP2PNodes(appConfig))
+		app.Get("/api/p2p/token", localai.ShowP2PToken(appConfig))
 	}

-	router.Get("/version", func(c *fiber.Ctx) error {
+	app.Get("/version", func(c *fiber.Ctx) error {
 		return c.JSON(struct {
 			Version string `json:"version"`
 		}{Version: internal.PrintableVersion()})
 	})

-	router.Get("/system", localai.SystemInformations(ml, appConfig))
+	app.Get("/system", localai.SystemInformations(ml, appConfig))

 	// misc
-	router.Post("/v1/tokenize", localai.TokenizeEndpoint(cl, ml, appConfig))
+	app.Post("/v1/tokenize", localai.TokenizeEndpoint(cl, ml, appConfig))

 }
--- a/core/http/routes/openai.go
+++ b/core/http/routes/openai.go
@@ -2,137 +2,84 @@ package routes

 import (
 	"github.com/gofiber/fiber/v2"
-	"github.com/mudler/LocalAI/core/application"
+	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/http/endpoints/localai"
 	"github.com/mudler/LocalAI/core/http/endpoints/openai"
+	"github.com/mudler/LocalAI/pkg/model"
 )

 func RegisterOpenAIRoutes(app *fiber.App,
-	application *application.Application) {
+	cl *config.BackendConfigLoader,
+	ml *model.ModelLoader,
+	appConfig *config.ApplicationConfig) {
 	// openAI compatible API endpoint

-	// realtime
-	app.Get("/v1/realtime", openai.Realtime(application))
-
 	// chat
-	app.Post("/v1/chat/completions",
-		openai.ChatEndpoint(
-			application.BackendLoader(),
-			application.ModelLoader(),
-			application.TemplatesEvaluator(),
-			application.ApplicationConfig(),
-		),
-	)
-
-	app.Post("/chat/completions",
-		openai.ChatEndpoint(
-			application.BackendLoader(),
-			application.ModelLoader(),
-			application.TemplatesEvaluator(),
-			application.ApplicationConfig(),
-		),
-	)
+	app.Post("/v1/chat/completions", openai.ChatEndpoint(cl, ml, appConfig))
+	app.Post("/chat/completions", openai.ChatEndpoint(cl, ml, appConfig))

 	// edit
-	app.Post("/v1/edits",
-		openai.EditEndpoint(
-			application.BackendLoader(),
-			application.ModelLoader(),
-			application.TemplatesEvaluator(),
-			application.ApplicationConfig(),
-		),
-	)
-
-	app.Post("/edits",
-		openai.EditEndpoint(
-			application.BackendLoader(),
-			application.ModelLoader(),
-			application.TemplatesEvaluator(),
-			application.ApplicationConfig(),
-		),
-	)
+	app.Post("/v1/edits", openai.EditEndpoint(cl, ml, appConfig))
+	app.Post("/edits", openai.EditEndpoint(cl, ml, appConfig))

 	// assistant
-	app.Get("/v1/assistants", openai.ListAssistantsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
-	app.Get("/assistants", openai.ListAssistantsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
-	app.Post("/v1/assistants", openai.CreateAssistantEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
-	app.Post("/assistants", openai.CreateAssistantEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
-	app.Delete("/v1/assistants/:assistant_id", openai.DeleteAssistantEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
-	app.Delete("/assistants/:assistant_id", openai.DeleteAssistantEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
-	app.Get("/v1/assistants/:assistant_id", openai.GetAssistantEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
-	app.Get("/assistants/:assistant_id", openai.GetAssistantEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
-	app.Post("/v1/assistants/:assistant_id", openai.ModifyAssistantEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
-	app.Post("/assistants/:assistant_id", openai.ModifyAssistantEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
-	app.Get("/v1/assistants/:assistant_id/files", openai.ListAssistantFilesEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
-	app.Get("/assistants/:assistant_id/files", openai.ListAssistantFilesEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
-	app.Post("/v1/assistants/:assistant_id/files", openai.CreateAssistantFileEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
-	app.Post("/assistants/:assistant_id/files", openai.CreateAssistantFileEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
-	app.Delete("/v1/assistants/:assistant_id/files/:file_id", openai.DeleteAssistantFileEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
-	app.Delete("/assistants/:assistant_id/files/:file_id", openai.DeleteAssistantFileEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
-	app.Get("/v1/assistants/:assistant_id/files/:file_id", openai.GetAssistantFileEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
-	app.Get("/assistants/:assistant_id/files/:file_id", openai.GetAssistantFileEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
+	app.Get("/v1/assistants", openai.ListAssistantsEndpoint(cl, ml, appConfig))
+	app.Get("/assistants", openai.ListAssistantsEndpoint(cl, ml, appConfig))
+	app.Post("/v1/assistants", openai.CreateAssistantEndpoint(cl, ml, appConfig))
+	app.Post("/assistants", openai.CreateAssistantEndpoint(cl, ml, appConfig))
+	app.Delete("/v1/assistants/:assistant_id", openai.DeleteAssistantEndpoint(cl, ml, appConfig))
+	app.Delete("/assistants/:assistant_id", openai.DeleteAssistantEndpoint(cl, ml, appConfig))
+	app.Get("/v1/assistants/:assistant_id", openai.GetAssistantEndpoint(cl, ml, appConfig))
+	app.Get("/assistants/:assistant_id", openai.GetAssistantEndpoint(cl, ml, appConfig))
+	app.Post("/v1/assistants/:assistant_id", openai.ModifyAssistantEndpoint(cl, ml, appConfig))
+	app.Post("/assistants/:assistant_id", openai.ModifyAssistantEndpoint(cl, ml, appConfig))
+	app.Get("/v1/assistants/:assistant_id/files", openai.ListAssistantFilesEndpoint(cl, ml, appConfig))
+	app.Get("/assistants/:assistant_id/files", openai.ListAssistantFilesEndpoint(cl, ml, appConfig))
+	app.Post("/v1/assistants/:assistant_id/files", openai.CreateAssistantFileEndpoint(cl, ml, appConfig))
+	app.Post("/assistants/:assistant_id/files", openai.CreateAssistantFileEndpoint(cl, ml, appConfig))
+	app.Delete("/v1/assistants/:assistant_id/files/:file_id", openai.DeleteAssistantFileEndpoint(cl, ml, appConfig))
+	app.Delete("/assistants/:assistant_id/files/:file_id", openai.DeleteAssistantFileEndpoint(cl, ml, appConfig))
+	app.Get("/v1/assistants/:assistant_id/files/:file_id", openai.GetAssistantFileEndpoint(cl, ml, appConfig))
+	app.Get("/assistants/:assistant_id/files/:file_id", openai.GetAssistantFileEndpoint(cl, ml, appConfig))

 	// files
-	app.Post("/v1/files", openai.UploadFilesEndpoint(application.BackendLoader(), application.ApplicationConfig()))
-	app.Post("/files", openai.UploadFilesEndpoint(application.BackendLoader(), application.ApplicationConfig()))
-	app.Get("/v1/files", openai.ListFilesEndpoint(application.BackendLoader(), application.ApplicationConfig()))
-	app.Get("/files", openai.ListFilesEndpoint(application.BackendLoader(), application.ApplicationConfig()))
-	app.Get("/v1/files/:file_id", openai.GetFilesEndpoint(application.BackendLoader(), application.ApplicationConfig()))
-	app.Get("/files/:file_id", openai.GetFilesEndpoint(application.BackendLoader(), application.ApplicationConfig()))
-	app.Delete("/v1/files/:file_id", openai.DeleteFilesEndpoint(application.BackendLoader(), application.ApplicationConfig()))
-	app.Delete("/files/:file_id", openai.DeleteFilesEndpoint(application.BackendLoader(), application.ApplicationConfig()))
-	app.Get("/v1/files/:file_id/content", openai.GetFilesContentsEndpoint(application.BackendLoader(), application.ApplicationConfig()))
-	app.Get("/files/:file_id/content", openai.GetFilesContentsEndpoint(application.BackendLoader(), application.ApplicationConfig()))
+	app.Post("/v1/files", openai.UploadFilesEndpoint(cl, appConfig))
+	app.Post("/files", openai.UploadFilesEndpoint(cl, appConfig))
+	app.Get("/v1/files", openai.ListFilesEndpoint(cl, appConfig))
+	app.Get("/files", openai.ListFilesEndpoint(cl, appConfig))
+	app.Get("/v1/files/:file_id", openai.GetFilesEndpoint(cl, appConfig))
+	app.Get("/files/:file_id", openai.GetFilesEndpoint(cl, appConfig))
+	app.Delete("/v1/files/:file_id", openai.DeleteFilesEndpoint(cl, appConfig))
+	app.Delete("/files/:file_id", openai.DeleteFilesEndpoint(cl, appConfig))
+	app.Get("/v1/files/:file_id/content", openai.GetFilesContentsEndpoint(cl, appConfig))
+	app.Get("/files/:file_id/content", openai.GetFilesContentsEndpoint(cl, appConfig))

 	// completion
-	app.Post("/v1/completions",
-		openai.CompletionEndpoint(
-			application.BackendLoader(),
-			application.ModelLoader(),
-			application.TemplatesEvaluator(),
-			application.ApplicationConfig(),
-		),
-	)
-
-	app.Post("/completions",
-		openai.CompletionEndpoint(
-			application.BackendLoader(),
-			application.ModelLoader(),
-			application.TemplatesEvaluator(),
-			application.ApplicationConfig(),
-		),
-	)
-
-	app.Post("/v1/engines/:model/completions",
-		openai.CompletionEndpoint(
-			application.BackendLoader(),
-			application.ModelLoader(),
-			application.TemplatesEvaluator(),
-			application.ApplicationConfig(),
-		),
-	)
+	app.Post("/v1/completions", openai.CompletionEndpoint(cl, ml, appConfig))
+	app.Post("/completions", openai.CompletionEndpoint(cl, ml, appConfig))
+	app.Post("/v1/engines/:model/completions", openai.CompletionEndpoint(cl, ml, appConfig))

 	// embeddings
-	app.Post("/v1/embeddings", openai.EmbeddingsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
-	app.Post("/embeddings", openai.EmbeddingsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
-	app.Post("/v1/engines/:model/embeddings", openai.EmbeddingsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
+	app.Post("/v1/embeddings", openai.EmbeddingsEndpoint(cl, ml, appConfig))
+	app.Post("/embeddings", openai.EmbeddingsEndpoint(cl, ml, appConfig))
+	app.Post("/v1/engines/:model/embeddings", openai.EmbeddingsEndpoint(cl, ml, appConfig))

 	// audio
-	app.Post("/v1/audio/transcriptions", openai.TranscriptEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
-	app.Post("/v1/audio/speech", localai.TTSEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
+	app.Post("/v1/audio/transcriptions", openai.TranscriptEndpoint(cl, ml, appConfig))
+	app.Post("/v1/audio/speech", localai.TTSEndpoint(cl, ml, appConfig))

 	// images
-	app.Post("/v1/images/generations", openai.ImageEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
+	app.Post("/v1/images/generations", openai.ImageEndpoint(cl, ml, appConfig))

-	if application.ApplicationConfig().ImageDir != "" {
-		app.Static("/generated-images", application.ApplicationConfig().ImageDir)
+	if appConfig.ImageDir != "" {
+		app.Static("/generated-images", appConfig.ImageDir)
 	}

-	if application.ApplicationConfig().AudioDir != "" {
-		app.Static("/generated-audio", application.ApplicationConfig().AudioDir)
+	if appConfig.AudioDir != "" {
+		app.Static("/generated-audio", appConfig.AudioDir)
 	}

 	// List models
-	app.Get("/v1/models", openai.ListModelsEndpoint(application.BackendLoader(), application.ModelLoader()))
-	app.Get("/models", openai.ListModelsEndpoint(application.BackendLoader(), application.ModelLoader()))
+	app.Get("/v1/models", openai.ListModelsEndpoint(cl, ml))
+	app.Get("/models", openai.ListModelsEndpoint(cl, ml))
 }
--- a/core/http/routes/ui.go
+++ b/core/http/routes/ui.go
@@ -6,21 +6,20 @@ import (
 	"sort"
 	"strings"

+	"github.com/microcosm-cc/bluemonday"
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/gallery"
 	"github.com/mudler/LocalAI/core/http/elements"
 	"github.com/mudler/LocalAI/core/http/endpoints/localai"
-	"github.com/mudler/LocalAI/core/http/utils"
 	"github.com/mudler/LocalAI/core/p2p"
 	"github.com/mudler/LocalAI/core/services"
 	"github.com/mudler/LocalAI/internal"
 	"github.com/mudler/LocalAI/pkg/model"
 	"github.com/mudler/LocalAI/pkg/xsync"
+	"github.com/rs/zerolog/log"

 	"github.com/gofiber/fiber/v2"
 	"github.com/google/uuid"
-	"github.com/microcosm-cc/bluemonday"
-	"github.com/rs/zerolog/log"
 )

 type modelOpCache struct {
@@ -92,7 +91,6 @@ func RegisterUIRoutes(app *fiber.App,
 		app.Get("/p2p", func(c *fiber.Ctx) error {
 			summary := fiber.Map{
 				"Title":   "LocalAI - P2P dashboard",
-				"BaseURL": utils.BaseURL(c),
 				"Version": internal.PrintableVersion(),
 				//"Nodes":          p2p.GetAvailableNodes(""),
 				//"FederatedNodes": p2p.GetAvailableNodes(p2p.FederatedID),
@@ -151,7 +149,6 @@ func RegisterUIRoutes(app *fiber.App,

 			summary := fiber.Map{
 				"Title":            "LocalAI - Models",
-				"BaseURL":          utils.BaseURL(c),
 				"Version":          internal.PrintableVersion(),
 				"Models":           template.HTML(elements.ListModels(models, processingModels, galleryService)),
 				"Repositories":     appConfig.Galleries,
@@ -311,7 +308,6 @@ func RegisterUIRoutes(app *fiber.App,

 		summary := fiber.Map{
 			"Title":        "LocalAI - Chat with " + c.Params("model"),
-			"BaseURL":      utils.BaseURL(c),
 			"ModelsConfig": backendConfigs,
 			"Model":        c.Params("model"),
 			"Version":      internal.PrintableVersion(),
@@ -327,12 +323,11 @@ func RegisterUIRoutes(app *fiber.App,

 		if len(backendConfigs) == 0 {
 			// If no model is available redirect to the index which suggests how to install models
-			return c.Redirect(utils.BaseURL(c))
+			return c.Redirect("/")
 		}

 		summary := fiber.Map{
 			"Title":        "LocalAI - Talk",
-			"BaseURL":      utils.BaseURL(c),
 			"ModelsConfig": backendConfigs,
 			"Model":        backendConfigs[0],
 			"IsP2PEnabled": p2p.IsP2PEnabled(),
@@ -349,12 +344,11 @@ func RegisterUIRoutes(app *fiber.App,

 		if len(backendConfigs) == 0 {
 			// If no model is available redirect to the index which suggests how to install models
-			return c.Redirect(utils.BaseURL(c))
+			return c.Redirect("/")
 		}

 		summary := fiber.Map{
 			"Title":        "LocalAI - Chat with " + backendConfigs[0],
-			"BaseURL":      utils.BaseURL(c),
 			"ModelsConfig": backendConfigs,
 			"Model":        backendConfigs[0],
 			"Version":      internal.PrintableVersion(),
@@ -370,7 +364,6 @@ func RegisterUIRoutes(app *fiber.App,

 		summary := fiber.Map{
 			"Title":        "LocalAI - Generate images with " + c.Params("model"),
-			"BaseURL":      utils.BaseURL(c),
 			"ModelsConfig": backendConfigs,
 			"Model":        c.Params("model"),
 			"Version":      internal.PrintableVersion(),
@@ -387,12 +380,11 @@ func RegisterUIRoutes(app *fiber.App,

 		if len(backendConfigs) == 0 {
 			// If no model is available redirect to the index which suggests how to install models
-			return c.Redirect(utils.BaseURL(c))
+			return c.Redirect("/")
 		}

 		summary := fiber.Map{
 			"Title":        "LocalAI - Generate images with " + backendConfigs[0].Name,
-			"BaseURL":      utils.BaseURL(c),
 			"ModelsConfig": backendConfigs,
 			"Model":        backendConfigs[0].Name,
 			"Version":      internal.PrintableVersion(),
@@ -408,7 +400,6 @@ func RegisterUIRoutes(app *fiber.App,

 		summary := fiber.Map{
 			"Title":        "LocalAI - Generate images with " + c.Params("model"),
-			"BaseURL":      utils.BaseURL(c),
 			"ModelsConfig": backendConfigs,
 			"Model":        c.Params("model"),
 			"Version":      internal.PrintableVersion(),
@@ -425,12 +416,11 @@ func RegisterUIRoutes(app *fiber.App,

 		if len(backendConfigs) == 0 {
 			// If no model is available redirect to the index which suggests how to install models
-			return c.Redirect(utils.BaseURL(c))
+			return c.Redirect("/")
 		}

 		summary := fiber.Map{
 			"Title":        "LocalAI - Generate audio with " + backendConfigs[0].Name,
-			"BaseURL":      utils.BaseURL(c),
 			"ModelsConfig": backendConfigs,
 			"Model":        backendConfigs[0].Name,
 			"IsP2PEnabled": p2p.IsP2PEnabled(),
--- a/core/http/static/assets/font1.css
+++ b/core/http/static/assets/font1.css
@@ -7,33 +7,33 @@ https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700&family=Roboto:wg
  font-style: normal;
  font-weight: 400;
  font-display: swap;
-  src: url(./UcCO3FwrK3iLTeHuS_fvQtMwCp50KnMw2boKoduKmMEVuLyfMZg.ttf) format('truetype');
+  src: url(/static/assets/UcCO3FwrK3iLTeHuS_fvQtMwCp50KnMw2boKoduKmMEVuLyfMZg.ttf) format('truetype');
 }
@font-face {
  font-family: 'Inter';
  font-style: normal;
  font-weight: 600;
  font-display: swap;
-  src: url(./UcCO3FwrK3iLTeHuS_fvQtMwCp50KnMw2boKoduKmMEVuGKYMZg.ttf) format('truetype');
+  src: url(/static/assets/UcCO3FwrK3iLTeHuS_fvQtMwCp50KnMw2boKoduKmMEVuGKYMZg.ttf) format('truetype');
 }
@font-face {
  font-family: 'Inter';
  font-style: normal;
  font-weight: 700;
  font-display: swap;
-  src: url(./UcCO3FwrK3iLTeHuS_fvQtMwCp50KnMw2boKoduKmMEVuFuYMZg.ttf) format('truetype');
+  src: url(/static/assets/UcCO3FwrK3iLTeHuS_fvQtMwCp50KnMw2boKoduKmMEVuFuYMZg.ttf) format('truetype');
 }
@font-face {
  font-family: 'Roboto';
  font-style: normal;
  font-weight: 400;
  font-display: swap;
-  src: url(./KFOmCnqEu92Fr1Me5Q.ttf) format('truetype');
+  src: url(/static/assets/KFOmCnqEu92Fr1Me5Q.ttf) format('truetype');
 }
@font-face {
  font-family: 'Roboto';
  font-style: normal;
  font-weight: 500;
  font-display: swap;
-  src: url(./KFOlCnqEu92Fr1MmEU9vAw.ttf) format('truetype');
+  src: url(/static/assets/KFOlCnqEu92Fr1MmEU9vAw.ttf) format('truetype');
 }
--- a/core/http/static/assets/font2.css
+++ b/core/http/static/assets/font2.css
@@ -7,33 +7,33 @@ https://fonts.googleapis.com/css?family=Roboto:300,400,500,700,900&display=swap
  font-style: normal;
  font-weight: 300;
  font-display: swap;
-  src: url(./KFOlCnqEu92Fr1MmSU5fBBc9.ttf) format('truetype');
+  src: url(/static/assets//KFOlCnqEu92Fr1MmSU5fBBc9.ttf) format('truetype');
 }
@font-face {
  font-family: 'Roboto';
  font-style: normal;
  font-weight: 400;
  font-display: swap;
-  src: url(./KFOmCnqEu92Fr1Mu4mxP.ttf) format('truetype');
+  src: url(/static/assets//KFOmCnqEu92Fr1Mu4mxP.ttf) format('truetype');
 }
@font-face {
  font-family: 'Roboto';
  font-style: normal;
  font-weight: 500;
  font-display: swap;
-  src: url(./KFOlCnqEu92Fr1MmEU9fBBc9.ttf) format('truetype');
+  src: url(/static/assets//KFOlCnqEu92Fr1MmEU9fBBc9.ttf) format('truetype');
 }
@font-face {
  font-family: 'Roboto';
  font-style: normal;
  font-weight: 700;
  font-display: swap;
-  src: url(./KFOlCnqEu92Fr1MmWUlfBBc9.ttf) format('truetype');
+  src: url(/static/assets//KFOlCnqEu92Fr1MmWUlfBBc9.ttf) format('truetype');
 }
@font-face {
  font-family: 'Roboto';
  font-style: normal;
  font-weight: 900;
  font-display: swap;
-  src: url(./KFOlCnqEu92Fr1MmYUtfBBc9.ttf) format('truetype');
+  src: url(/static/assets//KFOlCnqEu92Fr1MmYUtfBBc9.ttf) format('truetype');
 }
--- a/core/http/static/chat.js
+++ b/core/http/static/chat.js
@@ -143,7 +143,7 @@ function readInputImage() {
    // }

    // Source: https://stackoverflow.com/a/75751803/11386095
-    const response = await fetch("v1/chat/completions", {
+    const response = await fetch("/v1/chat/completions", {
      method: "POST",
      headers: {
        Authorization: `Bearer ${key}`,
--- a/core/http/static/image.js
+++ b/core/http/static/image.js
@@ -48,7 +48,7 @@ async function promptDallE(key, input) {
  document.getElementById("input").disabled = true;

  const model = document.getElementById("image-model").value;
-  const response = await fetch("v1/images/generations", {
+  const response = await fetch("/v1/images/generations", {
    method: "POST",
    headers: {
      Authorization: `Bearer ${key}`,
--- a/core/http/static/talk.js
+++ b/core/http/static/talk.js
@@ -122,7 +122,7 @@ async function sendAudioToWhisper(audioBlob) {
    formData.append('model', getWhisperModel());
    API_KEY = localStorage.getItem("key");

-    const response = await fetch('v1/audio/transcriptions', {
+    const response = await fetch('/v1/audio/transcriptions', {
        method: 'POST',
        headers: {
            'Authorization': `Bearer ${API_KEY}`
@@ -139,7 +139,7 @@ async function sendTextToChatGPT(text) {
    conversationHistory.push({ role: "user", content: text });
    API_KEY = localStorage.getItem("key");

-    const response = await fetch('v1/chat/completions', {
+    const response = await fetch('/v1/chat/completions', {
        method: 'POST',
        headers: {
            'Authorization': `Bearer ${API_KEY}`,
@@ -163,7 +163,7 @@ async function sendTextToChatGPT(text) {
 async function getTextToSpeechAudio(text) {
    API_KEY = localStorage.getItem("key");

-    const response = await fetch('v1/audio/speech', {
+    const response = await fetch('/v1/audio/speech', {
        
        method: 'POST',
        headers: {
--- a/core/http/static/tts.js
+++ b/core/http/static/tts.js
@@ -19,7 +19,7 @@ async function tts(key, input) {
  document.getElementById("input").disabled = true;

  const model = document.getElementById("tts-model").value;
-  const response = await fetch("tts", {
+  const response = await fetch("/tts", {
    method: "POST",
    headers: {
      Authorization: `Bearer ${key}`,
--- a/core/http/utils/baseurl.go
+++ b/core/http/utils/baseurl.go
@@ -1,24 +0,0 @@
-package utils
-
-import (
-	"strings"
-
-	"github.com/gofiber/fiber/v2"
-)
-
-// BaseURL returns the base URL for the given HTTP request context.
-// It takes into account that the app may be exposed by a reverse-proxy under a different protocol, host and path.
-// The returned URL is guaranteed to end with `/`.
-// The method should be used in conjunction with the StripPathPrefix middleware.
-func BaseURL(c *fiber.Ctx) string {
-	path := c.Path()
-	origPath := c.OriginalURL()
-
-	if path != origPath && strings.HasSuffix(origPath, path) {
-		pathPrefix := origPath[:len(origPath)-len(path)+1]
-
-		return c.BaseURL() + pathPrefix
-	}
-
-	return c.BaseURL() + "/"
-}
--- a/core/http/utils/baseurl_test.go
+++ b/core/http/utils/baseurl_test.go
@@ -1,48 +0,0 @@
-package utils
-
-import (
-	"net/http/httptest"
-	"testing"
-
-	"github.com/gofiber/fiber/v2"
-	"github.com/stretchr/testify/require"
-)
-
-func TestBaseURL(t *testing.T) {
-	for _, tc := range []struct {
-		name      string
-		prefix    string
-		expectURL string
-	}{
-		{
-			name:      "without prefix",
-			prefix:    "/",
-			expectURL: "http://example.com/",
-		},
-		{
-			name:      "with prefix",
-			prefix:    "/myprefix/",
-			expectURL: "http://example.com/myprefix/",
-		},
-	} {
-		t.Run(tc.name, func(t *testing.T) {
-			app := fiber.New()
-			actualURL := ""
-
-			app.Get(tc.prefix+"hello/world", func(c *fiber.Ctx) error {
-				if tc.prefix != "/" {
-					c.Path("/hello/world")
-				}
-				actualURL = BaseURL(c)
-				return nil
-			})
-
-			req := httptest.NewRequest("GET", tc.prefix+"hello/world", nil)
-			resp, err := app.Test(req, -1)
-
-			require.NoError(t, err)
-			require.Equal(t, 200, resp.StatusCode, "response status code")
-			require.Equal(t, tc.expectURL, actualURL, "base URL")
-		})
-	}
-}
--- a/core/http/views/404.html
+++ b/core/http/views/404.html
@@ -12,7 +12,7 @@
        <div class="header text-center py-12">
            <h1 class="text-5xl font-bold">Welcome to your LocalAI instance!</h1>
            <div class="mt-6">
-         <!--       <a href="./" aria-label="HomePage" alt="HomePage">
+         <!--       <a href="/" aria-label="HomePage" alt="HomePage">           
                    <img class="mx-auto w-1/4 h-auto" src="https://github.com/go-skynet/LocalAI/assets/2420543/0966aa2a-166e-4f99-a3e5-6c915fc997dd" alt="LocalAI Logo">            
                </a>
            -->
--- a/core/http/views/chat.html
+++ b/core/http/views/chat.html
@@ -28,7 +28,7 @@ SOFTWARE.
 <!doctype html>
 <html lang="en">
  {{template "views/partials/head" .}}
-  <script defer src="static/chat.js"></script>
+  <script defer src="/static/chat.js"></script>
  <style>
    body {
        overflow: hidden; 
@@ -101,9 +101,9 @@ SOFTWARE.
        {{ $model:=.Model}}
        {{ range .ModelsConfig }}
        {{ if eq . $model }}
-        <option value="chat/{{.}}" selected  class="bg-gray-700 text-white">{{.}}</option>
+        <option value="/chat/{{.}}" selected  class="bg-gray-700 text-white">{{.}}</option>
        {{ else }}
-        <option value="chat/{{.}}" class="bg-gray-700 text-white">{{.}}</option>
+        <option value="/chat/{{.}}" class="bg-gray-700 text-white">{{.}}</option>
        {{ end }}
        {{ end }}
      </select>
@@ -142,7 +142,7 @@ SOFTWARE.
      <div id="loader" class="my-2 loader" style="display: none;"></div>
      <input id="chat-model" type="hidden" value="{{.Model}}">
      <input id="input_image" type="file" style="display: none;" @change="fileName = $event.target.files[0].name">
-      <form id="prompt" action="chat/{{.Model}}" method="get" @submit.prevent="submitPrompt">
+      <form id="prompt" action="/chat/{{.Model}}" method="get" @submit.prevent="submitPrompt">
          <div class="relative w-full">
              <textarea
                  id="input"
--- a/core/http/views/explorer.html
+++ b/core/http/views/explorer.html
@@ -370,7 +370,7 @@
                }
            }
        </script>
-        <script src="static/p2panimation.js"></script>
+        <script src="/static/p2panimation.js"></script>

        {{template "views/partials/footer" .}}
    </div>
--- a/core/http/views/index.html
+++ b/core/http/views/index.html
@@ -20,7 +20,7 @@
            {{template "views/partials/inprogress" .}}
            {{ if eq (len .ModelsConfig) 0 }}
            <h2 class="text-center text-3xl font-semibold text-gray-100"> <i class="text-yellow-200 ml-2 fa-solid fa-triangle-exclamation animate-pulse"></i> Ouch! seems you don't have any models installed from the LocalAI gallery!</h2>
-            <p class="text-center mt-4 text-xl">..install something from the <a class="text-gray-400 hover:text-white ml-1 px-3 py-2 rounded" href="browse">🖼️ Gallery</a> or check the <a href="https://localai.io/basics/getting_started/" class="text-gray-400 hover:text-white ml-1 px-3 py-2 rounded"> <i class="fa-solid fa-book"></i> Getting started documentation </a></p>
+            <p class="text-center mt-4 text-xl">..install something from the <a class="text-gray-400 hover:text-white ml-1 px-3 py-2 rounded" href="/browse">🖼️ Gallery</a> or check the <a href="https://localai.io/basics/getting_started/" class="text-gray-400 hover:text-white ml-1 px-3 py-2 rounded"> <i class="fa-solid fa-book"></i> Getting started documentation </a></p>

            {{ if ne (len .Models) 0 }}
            <hr class="my-4">
@@ -66,7 +66,7 @@
                        {{ end }}
                    </td>
                    <td class="px-4 py-3 font-bold">
-                        <p class="font-bold text-white flex items-center"><i class="fas fa-brain pr-2"></i><a href="browse?term={{.Name}}">{{.Name}}</a></p>
+                        <p class="font-bold text-white flex items-center"><i class="fas fa-brain pr-2"></i><a href="/browse?term={{.Name}}">{{.Name}}</a></p>
                    </td>
                    <td class="px-4 py-3 font-bold">
                        {{ if .Backend }}
@@ -84,7 +84,7 @@
                    <td class="px-4 py-3">
                        <button
                            class="float-right inline-block rounded bg-red-800 px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-red-accent-300 hover:shadow-red-2 focus:bg-red-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-red-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong"
-                            data-twe-ripple-color="light" data-twe-ripple-init="" hx-confirm="Are you sure you wish to delete the model?" hx-post="browse/delete/model/{{.Name}}" hx-swap="outerHTML"><i class="fa-solid fa-cancel pr-2"></i>Delete</button>
+                            data-twe-ripple-color="light" data-twe-ripple-init="" hx-confirm="Are you sure you wish to delete the model?" hx-post="/browse/delete/model/{{.Name}}" hx-swap="outerHTML"><i class="fa-solid fa-cancel pr-2"></i>Delete</button>
                    </td>
                {{ end }}
                {{ range .Models }}
--- a/core/http/views/login.html
+++ b/core/http/views/login.html
@@ -4,8 +4,6 @@
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Open Authenticated Website</title>
-    <base href="{{.BaseURL}}" />
-    <link rel="icon" type="image/x-icon" href="favicon.ico" />
 </head>
 <body>
    <h1>Authorization is required</h1>
--- a/core/http/views/models.html
+++ b/core/http/views/models.html
@@ -16,38 +16,38 @@

            <div class="text-center font-semibold text-gray-100">
                <h2>Filter by type:</h2>
-                <button  hx-post="browse/search/models"
+                <button  hx-post="/browse/search/models"
                    class="text-white-500 inline-block bg-blue-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2 hover:bg-gray-300 hover:shadow-gray-2"
                    hx-target="#search-results" 
                    hx-vals='{"search": "tts"}'
                hx-indicator=".htmx-indicator" >TTS</button> 
-                <button  hx-post="browse/search/models" 
+                <button  hx-post="/browse/search/models" 
                    class="text-white-500 inline-block bg-blue-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2 hover:bg-gray-300 hover:shadow-gray-2"
                    hx-target="#search-results" 
                    hx-vals='{"search": "stablediffusion"}'
                hx-indicator=".htmx-indicator" >Image generation</button> 
-                <button  hx-post="browse/search/models" \
+                <button  hx-post="/browse/search/models" \
                    class="text-white-500 inline-block bg-blue-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2 hover:bg-gray-300 hover:shadow-gray-2"
                    hx-target="#search-results" 
                    hx-vals='{"search": "llm"}'
                hx-indicator=".htmx-indicator" >Text generation</button> 
-                <button  hx-post="browse/search/models" 
+                <button  hx-post="/browse/search/models" 
                    class="text-white-500 inline-block bg-blue-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2 hover:bg-gray-300 hover:shadow-gray-2"
                    hx-target="#search-results" 
                    hx-vals='{"search": "multimodal"}'
                hx-indicator=".htmx-indicator" >Multimodal</button> 
-                <button  hx-post="browse/search/models" 
+                <button  hx-post="/browse/search/models" 
                    class="text-white-500 inline-block bg-blue-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2 hover:bg-gray-300 hover:shadow-gray-2"
                    hx-target="#search-results" 
                    hx-vals='{"search": "embedding"}'
                hx-indicator=".htmx-indicator" >Embeddings</button>
-                <button  hx-post="browse/search/models"
+                <button  hx-post="/browse/search/models"
                    class="text-white-500 inline-block bg-blue-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2 hover:bg-gray-300 hover:shadow-gray-2"
                    hx-target="#search-results" 
                    hx-vals='{"search": "rerank"}'
                hx-indicator=".htmx-indicator" >Rerankers</button> 
                <button  
-                    hx-post="browse/search/models"
+                    hx-post="/browse/search/models"
                    class="text-white-500 inline-block bg-blue-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2 hover:bg-gray-300 hover:shadow-gray-2"
                    hx-target="#search-results" 
                    hx-vals='{"search": "whisper"}'
@@ -57,7 +57,7 @@
            <div class="text-center text-xs font-semibold text-gray-100">
                Filter by tags:
                {{ range .AllTags }}
-                    <button  hx-post="browse/search/models" class="text-blue-500" hx-target="#search-results" 
+                    <button  hx-post="/browse/search/models" class="text-blue-500" hx-target="#search-results" 
                    hx-vals='{"search": "{{.}}"}'
                    hx-indicator=".htmx-indicator" >{{.}}</button> 
                {{ end }}
@@ -69,7 +69,7 @@

            <input class="form-control appearance-none block w-full mt-5 px-3 py-2 text-base font-normal text-gray-300 pb-2 mb-5 bg-gray-800 bg-clip-padding border border-solid border-gray-600 rounded transition ease-in-out m-0 focus:text-gray-300 focus:bg-gray-900 focus:border-blue-500 focus:outline-none" type="search" 
                name="search" placeholder="Begin Typing To Search models..." 
-                hx-post="browse/search/models" 
+                hx-post="/browse/search/models" 
                hx-trigger="input changed delay:500ms, search" 
                hx-target="#search-results" 
                hx-indicator=".htmx-indicator">
--- a/core/http/views/p2p.html
+++ b/core/http/views/p2p.html
@@ -48,11 +48,11 @@
            <!-- Federation Box -->
            <div class="bg-gray-800 p-6 rounded-lg shadow-lg mb-12 text-left">

-                <p class="text-xl font-semibold text-gray-200"> <i class="text-gray-200 fa-solid fa-circle-nodes"></i> Federated Nodes: <span hx-get="p2p/ui/workers-federation-stats" hx-trigger="every 1s"></span> </p>
+                <p class="text-xl font-semibold text-gray-200"> <i class="text-gray-200 fa-solid fa-circle-nodes"></i> Federated Nodes: <span hx-get="/p2p/ui/workers-federation-stats" hx-trigger="every 1s"></span> </p>
                <p class="mb-4">You can start LocalAI in federated mode to share your instance, or start the federated server to balance requests between nodes of the federation.</p>

                <div class="grid grid-cols-1 sm:grid-cols-2 md:grid-cols-3 gap-4 mb-12">
-                    <div hx-get="p2p/ui/workers-federation" hx-trigger="every 1s"></div>
+                    <div hx-get="/p2p/ui/workers-federation" hx-trigger="every 1s"></div>
                </div>

                <hr class="border-gray-700 mb-12">
@@ -123,11 +123,11 @@

            <div class="bg-gray-800 p-6 rounded-lg shadow-lg mb-12 text-left">

-                <p class="text-xl font-semibold text-gray-200"> <i class="text-gray-200 fa-solid fa-circle-nodes"></i> Workers (llama.cpp): <span hx-get="p2p/ui/workers-stats" hx-trigger="every 1s"></span> </p>
+                <p class="text-xl font-semibold text-gray-200"> <i class="text-gray-200 fa-solid fa-circle-nodes"></i> Workers (llama.cpp): <span hx-get="/p2p/ui/workers-stats" hx-trigger="every 1s"></span> </p>
                <p class="mb-4">You can start llama.cpp workers to distribute weights between the workers and offload part of the computation. To start a new worker, you can use the CLI or Docker.</p>

                <div class="grid grid-cols-1 sm:grid-cols-2 md:grid-cols-3 gap-4 mb-12">
-                    <div hx-get="p2p/ui/workers" hx-trigger="every 1s"></div>
+                    <div hx-get="/p2p/ui/workers" hx-trigger="every 1s"></div>
                </div>
                <hr class="border-gray-700 mb-12">

@@ -177,7 +177,7 @@

    {{template "views/partials/footer" .}}
 </div>
-<script src="static/p2panimation.js"></script>
+<script src="/static/p2panimation.js"></script>
 <style>
    .token {
        word-break: break-all;
--- a/core/http/views/partials/footer.html
+++ b/core/http/views/partials/footer.html
@@ -2,4 +2,4 @@
    LocalAI Version {{.Version}}<br>
    <a href='https://github.com/mudler/LocalAI' class="text-blue-400 hover:text-blue-600" target="_blank">LocalAI</a> © 2023-2024 <a href='https://mudler.pm' class="text-blue-400 hover:text-blue-600" target="_blank">Ettore Di Giacinto</a>
 </footer>
-<script src="static/assets/tw-elements.js"></script>
+<script src="/static/assets/tw-elements.js"></script>
--- a/core/http/views/partials/head.html
+++ b/core/http/views/partials/head.html
@@ -2,35 +2,33 @@
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>{{.Title}}</title>
-    <base href="{{.BaseURL}}" />
-    <link rel="icon" type="image/x-icon" href="favicon.ico" />
    <link
    rel="stylesheet"
-    href="static/assets/highlightjs.css"
+    href="/static/assets/highlightjs.css"
  />
-  <script defer src="static/assets/highlightjs.js"></script>
+  <script defer src="/static/assets/highlightjs.js"></script>
    <script
    defer
-    src="static/assets/alpine.js"
+    src="/static/assets/alpine.js"
  ></script>
  <script
    defer
-    src="static/assets/marked.js"
+    src="/static/assets/marked.js"
  ></script>
  <script
    defer
-    src="static/assets/purify.js"
+    src="/static/assets/purify.js"
  ></script>

-  <link href="static/general.css" rel="stylesheet" />
-    <link href="static/assets/font1.css" rel="stylesheet">
+  <link href="/static/general.css" rel="stylesheet" />
+    <link href="/static/assets/font1.css" rel="stylesheet">
    <link
-    href="static/assets/font2.css"
+    href="/static/assets/font2.css"
    rel="stylesheet" />
  <link
    rel="stylesheet"
-    href="static/assets/tw-elements.css" />
-  <script src="static/assets/tailwindcss.js"></script>
+    href="/static/assets/tw-elements.css" />
+  <script src="/static/assets/tailwindcss.js"></script>
  <script>
    tailwind.config = {
      darkMode: "class",
@@ -56,11 +54,11 @@
      });
    }
  </script>
-  <link href="static/assets/fontawesome/css/fontawesome.css" rel="stylesheet" />
-  <link href="static/assets/fontawesome/css/brands.css" rel="stylesheet" />
-  <link href="static/assets/fontawesome/css/solid.css" rel="stylesheet" />
-  <script src="static/assets/flowbite.min.js"></script>
-  <script src="static/assets/htmx.js" crossorigin="anonymous"></script>
+  <link href="/static/assets/fontawesome/css/fontawesome.css" rel="stylesheet" />
+  <link href="/static/assets/fontawesome/css/brands.css" rel="stylesheet" />
+  <link href="/static/assets/fontawesome/css/solid.css" rel="stylesheet" />
+  <script src="/static/assets/flowbite.min.js"></script>
+  <script src="/static/assets/htmx.js" crossorigin="anonymous"></script>
  <!-- P2P Animation START -->
  <style>
    .animation-container {
--- a/core/http/views/partials/inprogress.html
+++ b/core/http/views/partials/inprogress.html
@@ -17,13 +17,13 @@

      <div class="flex items-center justify-between bg-slate-600 p-2 mb-2 rounded-md">
         <div class="flex items center">
-             <span class="text-gray-300"><a href="browse?term={{$parts._1}}"
+             <span class="text-gray-300"><a href="/browse?term={{$parts._1}}"
                 class="text-white-500 inline-block bg-blue-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2 hover:bg-gray-300 hover:shadow-gray-2"
                 >{{$modelName}}</a> {{if $repository}} (from the '{{$repository}}' repository) {{end}}</span>
         </div>
-         <div hx-get="browse/job/{{$value}}" hx-swap="outerHTML" hx-target="this" hx-trigger="done">
+         <div hx-get="/browse/job/{{$value}}" hx-swap="outerHTML" hx-target="this" hx-trigger="done">
             <h3 role="status" id="pblabel" >{{$op}}
-                 <div hx-get="browse/job/progress/{{$value}}" hx-trigger="every 600ms" 
+                 <div hx-get="/browse/job/progress/{{$value}}" hx-trigger="every 600ms" 
                 hx-target="this"
                 hx-swap="innerHTML"  ></div></h3>
         </div>     
--- a/core/http/views/partials/navbar.html
+++ b/core/http/views/partials/navbar.html
@@ -3,8 +3,8 @@
        <div class="flex items-center justify-between">
            <div class="flex items-center">
                <!-- Logo Image: Replace 'logo_url_here' with your actual logo URL -->
-                <a href="./" class="text-white text-xl font-bold"><img src="https://github.com/go-skynet/LocalAI/assets/2420543/0966aa2a-166e-4f99-a3e5-6c915fc997dd" alt="LocalAI Logo" class="h-10 mr-3 border-2 border-gray-300 shadow rounded"></a>
-                <a href="./" class="text-white text-xl font-bold">LocalAI</a>
+                <a href="/" class="text-white text-xl font-bold"><img src="https://github.com/go-skynet/LocalAI/assets/2420543/0966aa2a-166e-4f99-a3e5-6c915fc997dd" alt="LocalAI Logo" class="h-10 mr-3 border-2 border-gray-300 shadow rounded"></a>
+                <a href="/" class="text-white text-xl font-bold">LocalAI</a>
            </div>
            <!-- Menu button for small screens -->
            <div class="lg:hidden">
@@ -14,33 +14,33 @@
            </div>
            <!-- Navigation links -->
            <div class="hidden lg:flex lg:items-center lg:justify-end lg:flex-1 lg:w-0">
-                <a href="./" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-home pr-2"></i>Home</a>
+                <a href="/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-home pr-2"></i>Home</a>
                <a href="https://localai.io" class="text-gray-400 hover:text-white px-3 py-2 rounded" target="_blank" ><i class="fas fa-book-reader pr-2"></i> Documentation</a>
-                <a href="browse/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-brain pr-2"></i> Models</a>
-                <a href="chat/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fa-solid fa-comments pr-2"></i> Chat</a>
-                <a href="text2image/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-image pr-2"></i> Generate images</a>
-                <a href="tts/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fa-solid fa-music pr-2"></i> TTS </a>
-                <a href="talk/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fa-solid fa-phone pr-2"></i> Talk </a>
+                <a href="/browse/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-brain pr-2"></i> Models</a>
+                <a href="/chat/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fa-solid fa-comments pr-2"></i> Chat</a>
+                <a href="/text2image/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-image pr-2"></i> Generate images</a>
+                <a href="/tts/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fa-solid fa-music pr-2"></i> TTS </a>
+                <a href="/talk/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fa-solid fa-phone pr-2"></i> Talk </a>
                {{ if .IsP2PEnabled }}
-                <a href="p2p/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fa-solid fa-circle-nodes"></i> Swarm </a>
+                <a href="/p2p/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fa-solid fa-circle-nodes"></i> Swarm </a>
                {{ end }}
-                <a href="swagger/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-code pr-2"></i> API</a>
+                <a href="/swagger/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-code pr-2"></i> API</a>
            </div>
        </div>
        <!-- Collapsible menu for small screens -->
        <div class="hidden lg:hidden" id="mobile-menu">
            <div class="pt-4 pb-3 border-t border-gray-700">
-                <a href="./" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fas fa-home pr-2"></i>Home</a>
+                <a href="/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fas fa-home pr-2"></i>Home</a>
                <a href="https://localai.io" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1" target="_blank" ><i class="fas fa-book-reader pr-2"></i> Documentation</a>
-                <a href="browse/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fas fa-brain pr-2"></i> Models</a>
-                <a href="chat/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fa-solid fa-comments pr-2"></i> Chat</a>
-                <a href="text2image/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fas fa-image pr-2"></i> Generate images</a>
-                <a href="tts/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fa-solid fa-music pr-2"></i> TTS </a>
-                <a href="talk/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fa-solid fa-phone pr-2"></i> Talk </a>
+                <a href="/browse/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fas fa-brain pr-2"></i> Models</a>
+                <a href="/chat/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fa-solid fa-comments pr-2"></i> Chat</a>
+                <a href="/text2image/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fas fa-image pr-2"></i> Generate images</a>
+                <a href="/tts/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fa-solid fa-music pr-2"></i> TTS </a>
+                <a href="/talk/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fa-solid fa-phone pr-2"></i> Talk </a>
                {{ if .IsP2PEnabled }}
-                <a href="p2p/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fa-solid fa-circle-nodes"></i> Swarm </a>
+                <a href="/p2p/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fa-solid fa-circle-nodes"></i> Swarm </a>
                {{ end }}
-                <a href="swagger/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fas fa-code pr-2"></i> API</a>
+                <a href="/swagger/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fas fa-code pr-2"></i> API</a>
            </div>
        </div>
    </div>
--- a/core/http/views/partials/navbar_explorer.html
+++ b/core/http/views/partials/navbar_explorer.html
@@ -3,8 +3,8 @@
        <div class="flex items-center justify-between">
            <div class="flex items-center">
                <!-- Logo Image: Replace 'logo_url_here' with your actual logo URL -->
-                <a href="./" class="text-white text-xl font-bold"><img src="https://github.com/go-skynet/LocalAI/assets/2420543/0966aa2a-166e-4f99-a3e5-6c915fc997dd" alt="LocalAI Logo" class="h-10 mr-3 border-2 border-gray-300 shadow rounded"></a>
-                <a href="./" class="text-white text-xl font-bold">LocalAI</a>
+                <a href="/" class="text-white text-xl font-bold"><img src="https://github.com/go-skynet/LocalAI/assets/2420543/0966aa2a-166e-4f99-a3e5-6c915fc997dd" alt="LocalAI Logo" class="h-10 mr-3 border-2 border-gray-300 shadow rounded"></a>
+                <a href="/" class="text-white text-xl font-bold">LocalAI</a>
            </div>
            <!-- Menu button for small screens -->
            <div class="lg:hidden">
@@ -14,7 +14,7 @@
            </div>
            <!-- Navigation links -->
            <div class="hidden lg:flex lg:items-center lg:justify-end lg:flex-1 lg:w-0">
-                <a href="./" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-home pr-2"></i>Home</a>
+                <a href="/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-home pr-2"></i>Home</a>
                <a href="https://localai.io" class="text-gray-400 hover:text-white px-3 py-2 rounded" target="_blank" ><i class="fas fa-book-reader pr-2"></i> Documentation</a>
                <a href="https://models.localai.io/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-brain pr-2"></i> Models</a>
            </div>
@@ -22,7 +22,7 @@
        <!-- Collapsible menu for small screens -->
        <div class="hidden lg:hidden" id="mobile-menu">
            <div class="pt-4 pb-3 border-t border-gray-700">
-                <a href="./" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fas fa-home pr-2"></i>Home</a>
+                <a href="/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fas fa-home pr-2"></i>Home</a>
                <a href="https://localai.io" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1" target="_blank" ><i class="fas fa-book-reader pr-2"></i> Documentation</a>
                <a href="https://models.localai.io/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-brain pr-2"></i> Models</a>
            </div>
--- a/core/http/views/talk.html
+++ b/core/http/views/talk.html
@@ -1,7 +1,7 @@
 <!doctype html>
 <html lang="en">
  {{template "views/partials/head" .}}
-  <script defer src="static/talk.js"></script>
+  <script defer src="/static/talk.js"></script>
  <style>
    body {
        overflow: hidden; 
--- a/core/http/views/text2image.html
+++ b/core/http/views/text2image.html
@@ -1,7 +1,7 @@
 <!DOCTYPE html>
 <html lang="en">
 {{template "views/partials/head" .}}
-<script defer src="static/image.js"></script>
+<script defer src="/static/image.js"></script>

 <body class="bg-gray-900 text-gray-200">
 <div class="flex flex-col min-h-screen">
@@ -50,9 +50,9 @@
                {{ $model:=.Model}}
                {{ range .ModelsConfig }}
                {{ if eq .Name $model }}
-                <option value="text2image/{{.Name}}" selected class="bg-gray-700 text-white">{{.Name}}</option>
+                <option value="/text2image/{{.Name}}" selected class="bg-gray-700 text-white">{{.Name}}</option>
                {{ else }}
-                <option value="text2image/{{.Name}}" class="bg-gray-700 text-white">{{.Name}}</option>
+                <option value="/text2image/{{.Name}}" class="bg-gray-700 text-white">{{.Name}}</option>
                {{ end }}
                {{ end }}
              </select>
@@ -62,7 +62,7 @@

            <div class="mt-12">
              <input id="image-model" type="hidden" value="{{.Model}}">
-              <form id="genimage" action="text2image/{{.Model}}" method="get">
+              <form id="genimage" action="/text2image/{{.Model}}" method="get">
                <input
                  type="text"
                  id="input"
--- a/Show More
+++ b/Show More