chore(docs): improve

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-02-03 11:13:31 -05:00 · 2025-11-17 19:34:25 +01:00
173 changed files with 5946 additions and 9062 deletions
--- a/.github/workflows/backend.yml
+++ b/.github/workflows/backend.yml
@@ -1090,7 +1090,7 @@ jobs:
        go-version: ['1.21.x']
    steps:
      - name: Clone
-        uses: actions/checkout@v6
+        uses: actions/checkout@v5
        with:
          submodules: true
      - name: Setup Go ${{ matrix.go-version }}
@@ -1176,7 +1176,7 @@ jobs:
        go-version: ['1.21.x']
    steps:
      - name: Clone
-        uses: actions/checkout@v6
+        uses: actions/checkout@v5
        with:
          submodules: true
      - name: Setup Go ${{ matrix.go-version }}
--- a/.github/workflows/backend_build.yml
+++ b/.github/workflows/backend_build.yml
@@ -97,7 +97,7 @@ jobs:
          && sudo apt-get install -y git

      - name: Checkout
-        uses: actions/checkout@v6
+        uses: actions/checkout@v5

      - name: Release space from worker
        if: inputs.runs-on == 'ubuntu-latest'
--- a/.github/workflows/backend_build_darwin.yml
+++ b/.github/workflows/backend_build_darwin.yml
@@ -50,7 +50,7 @@ jobs:
        go-version: ['${{ inputs.go-version }}']
    steps:
      - name: Clone
-        uses: actions/checkout@v6
+        uses: actions/checkout@v5
        with:
          submodules: true

--- a/.github/workflows/backend_pr.yml
+++ b/.github/workflows/backend_pr.yml
@@ -17,7 +17,7 @@ jobs:
      has-backends-darwin: ${{ steps.set-matrix.outputs.has-backends-darwin }}
    steps:
      - name: Checkout repository
-        uses: actions/checkout@v6
+        uses: actions/checkout@v5

      - name: Setup Bun
        uses: oven-sh/setup-bun@v2
--- a/.github/workflows/build-test.yaml
+++ b/.github/workflows/build-test.yaml
@@ -11,7 +11,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
-        uses: actions/checkout@v6
+        uses: actions/checkout@v5
        with:
          fetch-depth: 0
      - name: Set up Go
@@ -25,7 +25,7 @@ jobs:
    runs-on: macos-latest
    steps:
      - name: Checkout
-        uses: actions/checkout@v6
+        uses: actions/checkout@v5
        with:
          fetch-depth: 0
      - name: Set up Go
@@ -47,7 +47,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
-        uses: actions/checkout@v6
+        uses: actions/checkout@v5
        with:
          fetch-depth: 0
      - name: Set up Go
--- a/.github/workflows/bump_deps.yaml
+++ b/.github/workflows/bump_deps.yaml
@@ -31,7 +31,7 @@ jobs:
            file: "backend/go/piper/Makefile"
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@v5
      - name: Bump dependencies 🔧
        id: bump
        run: |
--- a/.github/workflows/bump_docs.yaml
+++ b/.github/workflows/bump_docs.yaml
@@ -12,7 +12,7 @@ jobs:
          - repository: "mudler/LocalAI"
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@v5
      - name: Bump dependencies 🔧
        run: |
          bash .github/bump_docs.sh ${{ matrix.repository }}
--- a/.github/workflows/checksum_checker.yaml
+++ b/.github/workflows/checksum_checker.yaml
@@ -15,7 +15,7 @@ jobs:
          && sudo add-apt-repository -y ppa:git-core/ppa \
          && sudo apt-get update \
          && sudo apt-get install -y git
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@v5
      - name: Install dependencies
        run: |
          sudo apt-get update
--- a/.github/workflows/dependabot_auto.yml
+++ b/.github/workflows/dependabot_auto.yml
@@ -20,7 +20,7 @@ jobs:
          skip-commit-verification: true

      - name: Checkout repository
-        uses: actions/checkout@v6
+        uses: actions/checkout@v5

      - name: Approve a PR if not already approved
        run: |
--- a/.github/workflows/deploy-explorer.yaml
+++ b/.github/workflows/deploy-explorer.yaml
@@ -15,7 +15,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Clone
-        uses: actions/checkout@v6
+        uses: actions/checkout@v5
        with:
          submodules: true
      - uses: actions/setup-go@v5
--- a/.github/workflows/gallery-agent.yaml
+++ b/.github/workflows/gallery-agent.yaml
@@ -30,7 +30,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout repository
-        uses: actions/checkout@v6
+        uses: actions/checkout@v5
        with:
          token: ${{ secrets.GITHUB_TOKEN }}

--- a/.github/workflows/generate_grpc_cache.yaml
+++ b/.github/workflows/generate_grpc_cache.yaml
@@ -73,7 +73,7 @@ jobs:
        uses: docker/setup-buildx-action@master

      - name: Checkout
-        uses: actions/checkout@v6
+        uses: actions/checkout@v5

      - name: Cache GRPC
        uses: docker/build-push-action@v6
--- a/.github/workflows/generate_intel_image.yaml
+++ b/.github/workflows/generate_intel_image.yaml
@@ -16,7 +16,7 @@ jobs:
      matrix:
        include:
          - base-image: intel/oneapi-basekit:2025.2.0-0-devel-ubuntu22.04
-            runs-on: 'arc-runner-set'
+            runs-on: 'ubuntu-latest'
            platforms: 'linux/amd64'
    runs-on: ${{matrix.runs-on}}
    steps:
@@ -43,7 +43,7 @@ jobs:
        uses: docker/setup-buildx-action@master

      - name: Checkout
-        uses: actions/checkout@v6
+        uses: actions/checkout@v5

      - name: Cache Intel images
        uses: docker/build-push-action@v6
--- a/.github/workflows/image_build.yml
+++ b/.github/workflows/image_build.yml
@@ -94,7 +94,7 @@ jobs:
          && sudo apt-get update \
          && sudo apt-get install -y git
      - name: Checkout
-        uses: actions/checkout@v6
+        uses: actions/checkout@v5

      - name: Release space from worker
        if: inputs.runs-on == 'ubuntu-latest'
--- a/.github/workflows/localaibot_automerge.yml
+++ b/.github/workflows/localaibot_automerge.yml
@@ -14,7 +14,7 @@ jobs:
    if: ${{ github.actor == 'localai-bot' && !contains(github.event.pull_request.title, 'chore(model gallery):') }}
    steps:
      - name: Checkout repository
-        uses: actions/checkout@v6
+        uses: actions/checkout@v5

      - name: Approve a PR if not already approved
        run: |
--- a/.github/workflows/notify-models.yaml
+++ b/.github/workflows/notify-models.yaml
@@ -15,7 +15,7 @@ jobs:
        MODEL_NAME: gemma-3-12b-it-qat
    runs-on: ubuntu-latest
    steps:
-    - uses: actions/checkout@v6
+    - uses: actions/checkout@v5
      with:
        fetch-depth: 0 # needed to checkout all branches for this Action to work
        ref: ${{ github.event.pull_request.head.sha }} # Checkout the PR head to get the actual changes
@@ -95,7 +95,7 @@ jobs:
        MODEL_NAME: gemma-3-12b-it-qat
    runs-on: ubuntu-latest
    steps:
-    - uses: actions/checkout@v6
+    - uses: actions/checkout@v5
      with:
        fetch-depth: 0 # needed to checkout all branches for this Action to work
        ref: ${{ github.event.pull_request.head.sha }} # Checkout the PR head to get the actual changes
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -10,7 +10,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
-        uses: actions/checkout@v6
+        uses: actions/checkout@v5
        with:
          fetch-depth: 0
      - name: Set up Go
@@ -28,7 +28,7 @@ jobs:
    runs-on: macos-latest
    steps:
      - name: Checkout
-        uses: actions/checkout@v6
+        uses: actions/checkout@v5
        with:
          fetch-depth: 0
      - name: Set up Go
@@ -46,7 +46,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
-        uses: actions/checkout@v6
+        uses: actions/checkout@v5
        with:
          fetch-depth: 0
      - name: Set up Go
--- a/.github/workflows/secscan.yaml
+++ b/.github/workflows/secscan.yaml
@@ -14,7 +14,7 @@ jobs:
      GO111MODULE: on
    steps:
      - name: Checkout Source
-        uses: actions/checkout@v6
+        uses: actions/checkout@v5
        if: ${{ github.actor != 'dependabot[bot]' }}
      - name: Run Gosec Security Scanner
        if: ${{ github.actor != 'dependabot[bot]' }}
--- a/.github/workflows/test-extra.yml
+++ b/.github/workflows/test-extra.yml
@@ -19,7 +19,7 @@ jobs:
  #   runs-on: ubuntu-latest
  #   steps:
  #     - name: Clone
-  #       uses: actions/checkout@v6
+  #       uses: actions/checkout@v5
  #       with:
  #         submodules: true
  #     - name: Dependencies
@@ -40,7 +40,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Clone
-        uses: actions/checkout@v6
+        uses: actions/checkout@v5
        with:
          submodules: true
      - name: Dependencies
@@ -61,7 +61,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Clone
-        uses: actions/checkout@v6
+        uses: actions/checkout@v5
        with:
          submodules: true
      - name: Dependencies
@@ -83,7 +83,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Clone
-        uses: actions/checkout@v6
+        uses: actions/checkout@v5
        with:
          submodules: true
      - name: Dependencies
@@ -104,7 +104,7 @@ jobs:
  #  runs-on: ubuntu-latest
  #  steps:
  #    - name: Clone
-  #      uses: actions/checkout@v6
+  #      uses: actions/checkout@v5
  #      with:
  #        submodules: true
  #    - name: Dependencies
@@ -124,7 +124,7 @@ jobs:
  #   runs-on: ubuntu-latest
  #   steps:
  #     - name: Clone
-  #       uses: actions/checkout@v6
+  #       uses: actions/checkout@v5
  #       with:
  #         submodules: true
  #     - name: Dependencies
@@ -186,7 +186,7 @@ jobs:
  #           sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
  #           df -h
  #     - name: Clone
-  #       uses: actions/checkout@v6
+  #       uses: actions/checkout@v5
  #       with:
  #         submodules: true
  #     - name: Dependencies
@@ -211,7 +211,7 @@ jobs:
  #   runs-on: ubuntu-latest
  #   steps:
  #     - name: Clone
-  #       uses: actions/checkout@v6
+  #       uses: actions/checkout@v5
  #       with:
  #         submodules: true
  #     - name: Dependencies
@@ -232,7 +232,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Clone
-        uses: actions/checkout@v6
+        uses: actions/checkout@v5
        with:
          submodules: true
      - name: Dependencies
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -70,7 +70,7 @@ jobs:
          sudo rm -rfv build || true
          df -h
      - name: Clone
-        uses: actions/checkout@v6
+        uses: actions/checkout@v5
        with:
          submodules: true
      - name: Setup Go ${{ matrix.go-version }}
@@ -166,7 +166,7 @@ jobs:
          sudo rm -rfv build || true
          df -h
      - name: Clone
-        uses: actions/checkout@v6
+        uses: actions/checkout@v5
        with:
          submodules: true
      - name: Dependencies
@@ -196,7 +196,7 @@ jobs:
        go-version: ['1.25.x']
    steps:
      - name: Clone
-        uses: actions/checkout@v6
+        uses: actions/checkout@v5
        with:
          submodules: true
      - name: Setup Go ${{ matrix.go-version }}
--- a/.github/workflows/update_swagger.yaml
+++ b/.github/workflows/update_swagger.yaml
@@ -9,7 +9,7 @@ jobs:
      fail-fast: false
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@v5
      - uses: actions/setup-go@v5
        with:
          go-version: 'stable'
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,3 +1,6 @@
 [submodule "docs/themes/hugo-theme-relearn"]
 	path = docs/themes/hugo-theme-relearn
 	url = https://github.com/McShelby/hugo-theme-relearn.git
+[submodule "docs/themes/lotusdocs"]
+	path = docs/themes/lotusdocs
+	url = https://github.com/colinwilson/lotusdocs
--- a/2
+++ b/2
@@ -332,6 +332,6 @@ RUN mkdir -p /models /backends
 HEALTHCHECK --interval=1m --timeout=10m --retries=10 \
  CMD curl -f ${HEALTHCHECK_ENDPOINT} || exit 1

-VOLUME /models /backends /configuration
+VOLUME /models /backends
 EXPOSE 8080
 ENTRYPOINT [ "/entrypoint.sh" ]
--- a/README.md
+++ b/README.md
@@ -108,7 +108,7 @@ Run the installer script:
 curl https://localai.io/install.sh | sh
 ```

-For more installation options, see [Installer Options](https://localai.io/installation/).
+For more installation options, see [Installer Options](https://localai.io/docs/advanced/installer/).

 ### macOS Download:

@@ -206,7 +206,6 @@ For more information, see [💻 Getting started](https://localai.io/basics/getti

 ## 📰 Latest project news

- November 2025: Major improvements to the UX. Among these: [Import models via URL](https://github.com/mudler/LocalAI/pull/7245) and [Multiple chats and history](https://github.com/mudler/LocalAI/pull/7325)
 - October 2025: 🔌 [Model Context Protocol (MCP)](https://localai.io/docs/features/mcp/) support added for agentic capabilities with external tools
 - September 2025: New Launcher application for MacOS and Linux, extended support to many backends for Mac and Nvidia L4T devices. Models: Added MLX-Audio, WAN 2.2. WebUI improvements and Python-based backends now ships portable python environments.
 - August 2025: MLX, MLX-VLM, Diffusers and llama.cpp are now supported on Mac M1/M2/M3+ chips ( with `development` suffix in the gallery ): https://github.com/mudler/LocalAI/pull/6049 https://github.com/mudler/LocalAI/pull/6119 https://github.com/mudler/LocalAI/pull/6121 https://github.com/mudler/LocalAI/pull/6060
--- a/backend/cpp/llama-cpp/CMakeLists.txt
+++ b/backend/cpp/llama-cpp/CMakeLists.txt
@@ -57,7 +57,7 @@ add_library(hw_grpc_proto
  ${hw_proto_srcs}
  ${hw_proto_hdrs} )

-add_executable(${TARGET} grpc-server.cpp json.hpp httplib.h)
+add_executable(${TARGET} grpc-server.cpp utils.hpp json.hpp httplib.h)

 target_include_directories(${TARGET} PRIVATE ../llava)
 target_include_directories(${TARGET} PRIVATE ${CMAKE_SOURCE_DIR})
--- a/backend/cpp/llama-cpp/Makefile
+++ b/backend/cpp/llama-cpp/Makefile
@@ -1,5 +1,5 @@

-LLAMA_VERSION?=583cb83416467e8abf9b37349dcf1f6a0083745a
+LLAMA_VERSION?=80deff3648b93727422461c41c7279ef1dac7452
 LLAMA_REPO?=https://github.com/ggerganov/llama.cpp

 CMAKE_ARGS?=
--- a/backend/cpp/llama-cpp/grpc-server.cpp
+++ b/backend/cpp/llama-cpp/grpc-server.cpp
@@ -8,10 +8,6 @@
 //

 #include "server.cpp"
-#include "server-task.cpp"
-#include "server-queue.cpp"
-#include "server-common.cpp"
-
 // LocalAI

 #include "backend.pb.h"
@@ -141,43 +137,15 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, const
    }
    
    // Extract tools and tool_choice from proto and add to data JSON
-    SRV_INF("[TOOLS DEBUG] parse_options: Checking for tools in proto, tools().empty()=%d, tools().size()=%zu\n", 
-            predict->tools().empty() ? 1 : 0, predict->tools().size());
    if (!predict->tools().empty()) {
-        SRV_INF("[TOOLS DEBUG] parse_options: Tools string from proto (first 500 chars): %s\n", 
-                predict->tools().substr(0, std::min<size_t>(500, predict->tools().size())).c_str());
        try {
            // Parse tools JSON string and add to data
            json tools_json = json::parse(predict->tools());
            data["tools"] = tools_json;
            SRV_INF("Extracted tools from proto: %s\n", predict->tools().c_str());
-            // Debug: Log tools count and names
-            if (tools_json.is_array()) {
-                SRV_INF("[TOOLS DEBUG] parse_options: Successfully parsed %zu tools from Go layer\n", tools_json.size());
-                for (size_t i = 0; i < tools_json.size(); i++) {
-                    if (tools_json[i].contains("function") && tools_json[i]["function"].contains("name")) {
-                        SRV_INF("[TOOLS DEBUG] parse_options: Tool %zu: %s\n", i, tools_json[i]["function"]["name"].get<std::string>().c_str());
-                    } else if (tools_json[i].contains("name")) {
-                        SRV_INF("[TOOLS DEBUG] parse_options: Tool %zu: %s\n", i, tools_json[i]["name"].get<std::string>().c_str());
-                    }
-                }
-            } else {
-                SRV_WRN("[TOOLS DEBUG] parse_options: Parsed tools JSON is not an array: %s\n", tools_json.dump().c_str());
-            }
        } catch (const json::parse_error& e) {
            SRV_WRN("Failed to parse tools JSON from proto: %s\n", e.what());
-            SRV_WRN("[TOOLS DEBUG] parse_options: Tools string that failed to parse: %s\n", predict->tools().c_str());
        }
-    } else {
-        SRV_INF("%s", "[TOOLS DEBUG] parse_options: No tools received from Go layer (predict->tools() is empty)\n");
-    }
-    
-    // Debug: Verify tools are in data after extraction
-    if (data.contains("tools")) {
-        SRV_INF("[TOOLS DEBUG] parse_options: Tools successfully added to data, count: %zu\n", 
-                data["tools"].is_array() ? data["tools"].size() : 0);
-    } else {
-        SRV_INF("%s", "[TOOLS DEBUG] parse_options: WARNING - Tools NOT in data after extraction!\n");
    }
    if (!predict->toolchoice().empty()) {
        try {
@@ -187,11 +155,9 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, const
            // Store it as-is (string or object) so we can convert object to "required" later when adding to body_json
            if (tool_choice_json.is_string()) {
                data["tool_choice"] = tool_choice_json.get<std::string>();
-                SRV_DBG("[TOOLS DEBUG] Received tool_choice from Go layer: %s\n", tool_choice_json.get<std::string>().c_str());
            } else {
                // Store object as-is so we can detect it later and convert to "required"
                data["tool_choice"] = tool_choice_json;
-                SRV_DBG("[TOOLS DEBUG] Received tool_choice object from Go layer: %s\n", tool_choice_json.dump().c_str());
            }
            SRV_INF("Extracted tool_choice from proto: %s\n", predict->toolchoice().c_str());
        } catch (const json::parse_error& e) {
@@ -700,10 +666,6 @@ public:
                        json content_val;
                        try {
                            content_val = json::parse(msg.content());
-                            // Handle null values - convert to empty string to avoid template errors
-                            if (content_val.is_null()) {
-                                content_val = "";
-                            }
                        } catch (const json::parse_error&) {
                            // Not JSON, treat as plain string
                            content_val = msg.content();
@@ -745,12 +707,7 @@ public:
                            msg_json["content"] = content_array;
                        } else {
                            // Use content as-is (already array or not last user message)
-                            // Ensure null values are converted to empty string
-                            if (content_val.is_null()) {
-                                msg_json["content"] = "";
-                            } else {
-                                msg_json["content"] = content_val;
-                            }
+                            msg_json["content"] = content_val;
                        }
                    } else if (is_last_user_msg && has_images_or_audio) {
                        // If no content but this is the last user message with images/audio, create content array
@@ -777,57 +734,6 @@ public:
                            }
                        }
                        msg_json["content"] = content_array;
-                    } else if (msg.role() == "tool") {
-                        // Tool role messages must have content field set, even if empty
-                        // Jinja templates expect content to be a string, not null or object
-                        SRV_INF("[CONTENT DEBUG] PredictStream: Message %d is tool role, content_empty=%d\n", i, msg.content().empty() ? 1 : 0);
-                        if (msg.content().empty()) {
-                            msg_json["content"] = "";
-                            SRV_INF("[CONTENT DEBUG] PredictStream: Message %d (tool): empty content, set to empty string\n", i);
-                        } else {
-                            SRV_INF("[CONTENT DEBUG] PredictStream: Message %d (tool): content exists: %s\n", 
-                                    i, msg.content().substr(0, std::min<size_t>(200, msg.content().size())).c_str());
-                            // Content exists, parse and ensure it's a string
-                            json content_val;
-                            try {
-                                content_val = json::parse(msg.content());
-                                SRV_INF("[CONTENT DEBUG] PredictStream: Message %d (tool): parsed JSON, type=%s\n", 
-                                        i, content_val.is_null() ? "null" : 
-                                           content_val.is_object() ? "object" :
-                                           content_val.is_string() ? "string" :
-                                           content_val.is_array() ? "array" : "other");
-                                // Handle null values - Jinja templates expect content to be a string, not null
-                                if (content_val.is_null()) {
-                                    msg_json["content"] = "";
-                                    SRV_INF("[CONTENT DEBUG] PredictStream: Message %d (tool): null content, converted to empty string\n", i);
-                                } else if (content_val.is_object()) {
-                                    // If content is an object (e.g., from tool call failures/errors), convert to string
-                                    msg_json["content"] = content_val.dump();
-                                    SRV_INF("[CONTENT DEBUG] PredictStream: Message %d (tool): object content, converted to string: %s\n", 
-                                            i, content_val.dump().substr(0, std::min<size_t>(200, content_val.dump().size())).c_str());
-                                } else if (content_val.is_string()) {
-                                    msg_json["content"] = content_val.get<std::string>();
-                                    SRV_INF("[CONTENT DEBUG] PredictStream: Message %d (tool): string content, using as-is\n", i);
-                                } else {
-                                    // For arrays or other types, convert to string
-                                    msg_json["content"] = content_val.dump();
-                                    SRV_INF("[CONTENT DEBUG] PredictStream: Message %d (tool): %s content, converted to string\n", 
-                                            i, content_val.is_array() ? "array" : "other type");
-                                }
-                            } catch (const json::parse_error&) {
-                                // Not JSON, treat as plain string
-                                msg_json["content"] = msg.content();
-                                SRV_INF("[CONTENT DEBUG] PredictStream: Message %d (tool): not JSON, using as string\n", i);
-                            }
-                        }
-                    } else {
-                        // Ensure all messages have content set (fallback for any unhandled cases)
-                        // Jinja templates expect content to be present, default to empty string if not set
-                        if (!msg_json.contains("content")) {
-                            SRV_INF("[CONTENT DEBUG] PredictStream: Message %d (role=%s): no content field, adding empty string\n", 
-                                    i, msg.role().c_str());
-                            msg_json["content"] = "";
-                        }
                    }
                    
                    // Add optional fields for OpenAI-compatible message format
@@ -845,96 +751,14 @@ public:
                        try {
                            json tool_calls = json::parse(msg.tool_calls());
                            msg_json["tool_calls"] = tool_calls;
-                            SRV_INF("[TOOL CALLS DEBUG] PredictStream: Message %d has tool_calls: %s\n", i, tool_calls.dump().c_str());
-                            // IMPORTANT: If message has tool_calls but content is empty or not set,
-                            // set content to space " " instead of empty string "", because llama.cpp's
-                            // common_chat_msgs_to_json_oaicompat converts empty strings to null (line 312),
-                            // which causes template errors when accessing message.content[:tool_start_length]
-                            if (!msg_json.contains("content") || (msg_json.contains("content") && msg_json["content"].is_string() && msg_json["content"].get<std::string>().empty())) {
-                                SRV_INF("[CONTENT DEBUG] PredictStream: Message %d has tool_calls but empty content, setting to space\n", i);
-                                msg_json["content"] = " ";
-                            }
-                            // Log each tool call with name and arguments
-                            if (tool_calls.is_array()) {
-                                for (size_t tc_idx = 0; tc_idx < tool_calls.size(); tc_idx++) {
-                                    const auto& tc = tool_calls[tc_idx];
-                                    std::string tool_name = "unknown";
-                                    std::string tool_args = "{}";
-                                    if (tc.contains("function")) {
-                                        const auto& func = tc["function"];
-                                        if (func.contains("name")) {
-                                            tool_name = func["name"].get<std::string>();
-                                        }
-                                        if (func.contains("arguments")) {
-                                            tool_args = func["arguments"].is_string() ? 
-                                                func["arguments"].get<std::string>() : 
-                                                func["arguments"].dump();
-                                        }
-                                    } else if (tc.contains("name")) {
-                                        tool_name = tc["name"].get<std::string>();
-                                        if (tc.contains("arguments")) {
-                                            tool_args = tc["arguments"].is_string() ? 
-                                                tc["arguments"].get<std::string>() : 
-                                                tc["arguments"].dump();
-                                        }
-                                    }
-                                    SRV_INF("[TOOL CALLS DEBUG] PredictStream: Message %d, tool_call %zu: name=%s, arguments=%s\n", 
-                                            i, tc_idx, tool_name.c_str(), tool_args.c_str());
-                                }
-                            }
                        } catch (const json::parse_error& e) {
                            SRV_WRN("Failed to parse tool_calls JSON: %s\n", e.what());
                        }
                    }
                    
-                    // Debug: Log final content state before adding to array
-                    if (msg_json.contains("content")) {
-                        if (msg_json["content"].is_null()) {
-                            SRV_INF("[CONTENT DEBUG] PredictStream: Message %d FINAL STATE: content is NULL - THIS WILL CAUSE ERROR!\n", i);
-                        } else {
-                            SRV_INF("[CONTENT DEBUG] PredictStream: Message %d FINAL STATE: content type=%s, has_value=%d\n", 
-                                    i, msg_json["content"].is_string() ? "string" :
-                                       msg_json["content"].is_array() ? "array" :
-                                       msg_json["content"].is_object() ? "object" : "other",
-                                    msg_json["content"].is_null() ? 0 : 1);
-                        }
-                    } else {
-                        SRV_INF("[CONTENT DEBUG] PredictStream: Message %d FINAL STATE: NO CONTENT FIELD - THIS WILL CAUSE ERROR!\n", i);
-                    }
-                    
                    messages_json.push_back(msg_json);
                }

-                // Final safety check: Ensure no message has null content (Jinja templates require strings)
-                SRV_INF("[CONTENT DEBUG] PredictStream: Running final safety check on %zu messages\n", messages_json.size());
-                for (size_t idx = 0; idx < messages_json.size(); idx++) {
-                    auto& msg = messages_json[idx];
-                    if (msg.contains("content") && msg["content"].is_null()) {
-                        SRV_INF("[CONTENT DEBUG] PredictStream: Safety check found message %zu with NULL content, converting to empty string\n", idx);
-                        msg["content"] = "";
-                    } else if (!msg.contains("content")) {
-                        SRV_INF("[CONTENT DEBUG] PredictStream: Safety check found message %zu without content field, adding empty string\n", idx);
-                        msg["content"] = "";
-                    } else {
-                        SRV_INF("[CONTENT DEBUG] PredictStream: Safety check message %zu: content OK, type=%s\n", 
-                                idx, msg["content"].is_string() ? "string" :
-                                    msg["content"].is_array() ? "array" :
-                                    msg["content"].is_object() ? "object" : "other");
-                    }
-                }
-
-                // Debug: Count tool messages
-                int tool_msg_count = 0;
-                for (const auto& msg : messages_json) {
-                    if (msg.contains("role") && msg["role"] == "tool") {
-                        tool_msg_count++;
-                    }
-                }
-                SRV_DBG("[TOOLS DEBUG] PredictStream: Built %d tool messages out of %zu total messages\n", tool_msg_count, messages_json.size());
-
-                // Debug: Print full conversation (messages)
-                SRV_DBG("[CONVERSATION DEBUG] PredictStream: Full messages array:\n%s\n", messages_json.dump(2).c_str());
-
                body_json["messages"] = messages_json;
                body_json["stream"] = true; // PredictStream is always streaming
                
@@ -945,16 +769,6 @@ public:
                    data["grammar"].is_string() && 
                    !data["grammar"].get<std::string>().empty();
                
-                SRV_INF("[TOOLS DEBUG] PredictStream: has_grammar_from_go=%d, data.contains(\"tools\")=%d, data.contains(\"grammar\")=%d\n",
-                        has_grammar_from_go ? 1 : 0,
-                        data.contains("tools") ? 1 : 0,
-                        data.contains("grammar") ? 1 : 0);
-                if (data.contains("grammar")) {
-                    SRV_INF("[TOOLS DEBUG] PredictStream: grammar type=%s, empty=%d\n",
-                            data["grammar"].is_string() ? "string" : "other",
-                            data["grammar"].is_string() && data["grammar"].get<std::string>().empty() ? 1 : 0);
-                }
-                
                // Copy other relevant fields from data that oaicompat_chat_params_parse expects
                // Tools and tool_choice are only passed when NoGrammar is true (grammar not provided)
                // When grammar is provided from Go layer, we use it instead of template-generated grammar
@@ -964,36 +778,8 @@ public:
                        body_json["tools"] = data["tools"];
                        std::string tools_str = data["tools"].dump();
                        SRV_INF("Using tools from data (NoGrammar=true): %s\n", tools_str.c_str());
-                        // Debug: Log tools count and details before template processing
-                        if (data["tools"].is_array()) {
-                            SRV_INF("[TOOLS DEBUG] PredictStream: Passing %zu tools to oaicompat_chat_params_parse\n", data["tools"].size());
-                            for (size_t t_idx = 0; t_idx < data["tools"].size(); t_idx++) {
-                                const auto& tool = data["tools"][t_idx];
-                                std::string tool_name = "unknown";
-                                std::string tool_desc = "";
-                                if (tool.contains("function")) {
-                                    const auto& func = tool["function"];
-                                    if (func.contains("name")) {
-                                        tool_name = func["name"].get<std::string>();
-                                    }
-                                    if (func.contains("description")) {
-                                        tool_desc = func["description"].is_string() ? 
-                                            func["description"].get<std::string>() : "";
-                                    }
-                                } else if (tool.contains("name")) {
-                                    tool_name = tool["name"].get<std::string>();
-                                    if (tool.contains("description")) {
-                                        tool_desc = tool["description"].is_string() ? 
-                                            tool["description"].get<std::string>() : "";
-                                    }
-                                }
-                                SRV_INF("[TOOLS DEBUG] PredictStream: Tool %zu: name=%s, description=%s\n", 
-                                        t_idx, tool_name.c_str(), tool_desc.substr(0, 100).c_str());
-                            }
-                        }
                    } else {
                        SRV_WRN("%s", "No tools found in data - tool calls will not work without tools field\n");
-                        SRV_DBG("[TOOLS DEBUG] PredictStream: No tools in data, tool_choice=%s\n", data.contains("tool_choice") ? data["tool_choice"].dump().c_str() : "not set");
                    }
                    if (data.contains("tool_choice")) {
                        // tool_choice can be a string or object, but oaicompat_chat_params_parse expects a string
@@ -1035,17 +821,6 @@ public:
                if (data.contains("chat_template_kwargs")) {
                    body_json["chat_template_kwargs"] = data["chat_template_kwargs"];
                }
-                // Pass parallel_tool_calls if present (used by oaicompat_chat_params_parse)
-                if (data.contains("parallel_tool_calls")) {
-                    body_json["parallel_tool_calls"] = data["parallel_tool_calls"];
-                }
-                // Pass add_generation_prompt if present (used by oaicompat_chat_params_parse)
-                if (data.contains("add_generation_prompt")) {
-                    body_json["add_generation_prompt"] = data["add_generation_prompt"];
-                }
-
-                // Debug: Print full body_json before template processing (includes messages, tools, tool_choice, etc.)
-                SRV_DBG("[CONVERSATION DEBUG] PredictStream: Full body_json before oaicompat_chat_params_parse:\n%s\n", body_json.dump(2).c_str());

                // Use the same approach as server.cpp: call oaicompat_chat_params_parse
                // This handles all template application, grammar merging, etc. automatically
@@ -1056,56 +831,8 @@ public:
                // Update allow_image and allow_audio based on current mctx state
                parser_opt.allow_image = ctx_server.mctx ? mtmd_support_vision(ctx_server.mctx) : false;
                parser_opt.allow_audio = ctx_server.mctx ? mtmd_support_audio(ctx_server.mctx) : false;
-                
-                // Debug: Log tools before template processing
-                if (body_json.contains("tools")) {
-                    SRV_DBG("[TOOLS DEBUG] PredictStream: Before oaicompat_chat_params_parse - tools count: %zu\n", 
-                            body_json["tools"].is_array() ? body_json["tools"].size() : 0);
-                }
-                
-                // Debug: Verify messages content before template processing
-                // Also ensure ALL messages have content set to string (not null) - templates expect strings
-                if (body_json.contains("messages") && body_json["messages"].is_array()) {
-                    SRV_INF("[CONTENT DEBUG] PredictStream: Before oaicompat_chat_params_parse - checking %zu messages\n", body_json["messages"].size());
-                    for (size_t idx = 0; idx < body_json["messages"].size(); idx++) {
-                        auto& msg = body_json["messages"][idx];
-                        std::string role_str = msg.contains("role") ? msg["role"].get<std::string>() : "unknown";
-                        if (msg.contains("content")) {
-                            if (msg["content"].is_null()) {
-                                SRV_INF("[CONTENT DEBUG] PredictStream: BEFORE TEMPLATE - Message %zu (role=%s) has NULL content - FIXING!\n", idx, role_str.c_str());
-                                msg["content"] = ""; // Fix null content
-                            } else if (!msg["content"].is_string() && !msg["content"].is_array()) {
-                                // If content is object or other non-string type, convert to string for templates
-                                SRV_INF("[CONTENT DEBUG] PredictStream: BEFORE TEMPLATE - Message %zu (role=%s) content is not string/array, converting\n", idx, role_str.c_str());
-                                if (msg["content"].is_object()) {
-                                    msg["content"] = msg["content"].dump();
-                                } else {
-                                    msg["content"] = "";
-                                }
-                            } else {
-                                SRV_INF("[CONTENT DEBUG] PredictStream: BEFORE TEMPLATE - Message %zu (role=%s): content type=%s\n", 
-                                        idx, role_str.c_str(),
-                                        msg["content"].is_string() ? "string" :
-                                        msg["content"].is_array() ? "array" :
-                                        msg["content"].is_object() ? "object" : "other");
-                            }
-                        } else {
-                            SRV_INF("[CONTENT DEBUG] PredictStream: BEFORE TEMPLATE - Message %zu (role=%s) MISSING content field - ADDING!\n", idx, role_str.c_str());
-                            msg["content"] = ""; // Add missing content
-                        }
-                    }
-                }
-                
                json parsed_data = oaicompat_chat_params_parse(body_json, parser_opt, files);
                
-                // Debug: Log tools after template processing
-                if (parsed_data.contains("tools")) {
-                    SRV_DBG("[TOOLS DEBUG] PredictStream: After oaicompat_chat_params_parse - tools count: %zu\n",
-                            parsed_data["tools"].is_array() ? parsed_data["tools"].size() : 0);
-                } else {
-                    SRV_DBG("%s", "[TOOLS DEBUG] PredictStream: After oaicompat_chat_params_parse - no tools in parsed_data\n");
-                }
-                
                // Extract the prompt from parsed data
                prompt_str = parsed_data.at("prompt").get<std::string>();
                
@@ -1116,9 +843,8 @@ public:
                    preserved_grammar = data["grammar"];
                }
                
-                // Merge all fields from parsed_data into data (grammar, grammar_triggers, preserved_tokens, parse_tool_calls, etc.)
+                // Merge all fields from parsed_data into data (grammar, grammar_triggers, preserved_tokens, etc.)
                // This ensures all template-generated fields are included
-                // parse_tool_calls is set by oaicompat_chat_params_parse when tools are present
                for (const auto& item : parsed_data.items()) {
                    if (item.key() != "prompt") { // Don't overwrite prompt_str, we already extracted it
                        // If grammar was provided from Go layer, preserve it instead of template-generated grammar
@@ -1129,11 +855,6 @@ public:
                        }
                    }
                }
-                
-                // Debug: Log parse_tool_calls if present (set by oaicompat_chat_params_parse when tools are present)
-                if (data.contains("parse_tool_calls")) {
-                    SRV_DBG("[TOOLS DEBUG] PredictStream: parse_tool_calls=%s\n", data["parse_tool_calls"].get<bool>() ? "true" : "false");
-                }
            } else {
                // Use prompt directly from data
                if (data.contains("prompt") && data["prompt"].is_string()) {
@@ -1388,19 +1109,11 @@ public:
                    }
                }
                
-                SRV_INF("[CONTENT DEBUG] Predict: Processing %d messages\n", request->messages_size());
                for (int i = 0; i < request->messages_size(); i++) {
                    const auto& msg = request->messages(i);
                    json msg_json;
                    msg_json["role"] = msg.role();
                    
-                    SRV_INF("[CONTENT DEBUG] Predict: Message %d: role=%s, content_empty=%d, content_length=%zu\n", 
-                            i, msg.role().c_str(), msg.content().empty() ? 1 : 0, msg.content().size());
-                    if (!msg.content().empty()) {
-                        SRV_INF("[CONTENT DEBUG] Predict: Message %d content (first 200 chars): %s\n", 
-                                i, msg.content().substr(0, std::min<size_t>(200, msg.content().size())).c_str());
-                    }
-                    
                    bool is_last_user_msg = (i == last_user_msg_idx);
                    bool has_images_or_audio = (request->images_size() > 0 || request->audios_size() > 0);
                    
@@ -1411,11 +1124,6 @@ public:
                        json content_val;
                        try {
                            content_val = json::parse(msg.content());
-                            // Handle null values - convert to empty string to avoid template errors
-                            if (content_val.is_null()) {
-                                SRV_INF("[CONTENT DEBUG] Predict: Message %d parsed JSON is null, converting to empty string\n", i);
-                                content_val = "";
-                            }
                        } catch (const json::parse_error&) {
                            // Not JSON, treat as plain string
                            content_val = msg.content();
@@ -1423,7 +1131,6 @@ public:
                        
                        // If content is an object (e.g., from tool call failures), convert to string
                        if (content_val.is_object()) {
-                            SRV_INF("[CONTENT DEBUG] Predict: Message %d content is object, converting to string\n", i);
                            content_val = content_val.dump();
                        }
                        
@@ -1458,17 +1165,7 @@ public:
                            msg_json["content"] = content_array;
                        } else {
                            // Use content as-is (already array or not last user message)
-                            // Ensure null values are converted to empty string
-                            if (content_val.is_null()) {
-                                SRV_INF("[CONTENT DEBUG] Predict: Message %d content_val was null, setting to empty string\n", i);
-                                msg_json["content"] = "";
-                            } else {
-                                msg_json["content"] = content_val;
-                                SRV_INF("[CONTENT DEBUG] Predict: Message %d content set, type=%s\n", 
-                                        i, content_val.is_string() ? "string" : 
-                                           content_val.is_array() ? "array" : 
-                                           content_val.is_object() ? "object" : "other");
-                            }
+                            msg_json["content"] = content_val;
                        }
                    } else if (is_last_user_msg && has_images_or_audio) {
                        // If no content but this is the last user message with images/audio, create content array
@@ -1495,65 +1192,9 @@ public:
                            }
                        }
                        msg_json["content"] = content_array;
-                        SRV_INF("[CONTENT DEBUG] Predict: Message %d created content array with media\n", i);
                    } else if (!msg.tool_calls().empty()) {
-                        // Tool call messages may have null content, but templates expect string
-                        // IMPORTANT: Set to space " " instead of empty string "", because llama.cpp's
-                        // common_chat_msgs_to_json_oaicompat converts empty strings to null (line 312),
-                        // which causes template errors when accessing message.content[:tool_start_length]
-                        SRV_INF("[CONTENT DEBUG] Predict: Message %d has tool_calls, setting content to space (not empty string)\n", i);
-                        msg_json["content"] = " ";
-                    } else if (msg.role() == "tool") {
-                        // Tool role messages must have content field set, even if empty
-                        // Jinja templates expect content to be a string, not null or object
-                        SRV_INF("[CONTENT DEBUG] Predict: Message %d is tool role, content_empty=%d\n", i, msg.content().empty() ? 1 : 0);
-                        if (msg.content().empty()) {
-                            msg_json["content"] = "";
-                            SRV_INF("[CONTENT DEBUG] Predict: Message %d (tool): empty content, set to empty string\n", i);
-                        } else {
-                            SRV_INF("[CONTENT DEBUG] Predict: Message %d (tool): content exists: %s\n", 
-                                    i, msg.content().substr(0, std::min<size_t>(200, msg.content().size())).c_str());
-                            // Content exists, parse and ensure it's a string
-                            json content_val;
-                            try {
-                                content_val = json::parse(msg.content());
-                                SRV_INF("[CONTENT DEBUG] Predict: Message %d (tool): parsed JSON, type=%s\n", 
-                                        i, content_val.is_null() ? "null" : 
-                                           content_val.is_object() ? "object" :
-                                           content_val.is_string() ? "string" :
-                                           content_val.is_array() ? "array" : "other");
-                                // Handle null values - Jinja templates expect content to be a string, not null
-                                if (content_val.is_null()) {
-                                    msg_json["content"] = "";
-                                    SRV_INF("[CONTENT DEBUG] Predict: Message %d (tool): null content, converted to empty string\n", i);
-                                } else if (content_val.is_object()) {
-                                    // If content is an object (e.g., from tool call failures/errors), convert to string
-                                    msg_json["content"] = content_val.dump();
-                                    SRV_INF("[CONTENT DEBUG] Predict: Message %d (tool): object content, converted to string: %s\n", 
-                                            i, content_val.dump().substr(0, std::min<size_t>(200, content_val.dump().size())).c_str());
-                                } else if (content_val.is_string()) {
-                                    msg_json["content"] = content_val.get<std::string>();
-                                    SRV_INF("[CONTENT DEBUG] Predict: Message %d (tool): string content, using as-is\n", i);
-                                } else {
-                                    // For arrays or other types, convert to string
-                                    msg_json["content"] = content_val.dump();
-                                    SRV_INF("[CONTENT DEBUG] Predict: Message %d (tool): %s content, converted to string\n", 
-                                            i, content_val.is_array() ? "array" : "other type");
-                                }
-                            } catch (const json::parse_error&) {
-                                // Not JSON, treat as plain string
-                                msg_json["content"] = msg.content();
-                                SRV_INF("[CONTENT DEBUG] Predict: Message %d (tool): not JSON, using as string\n", i);
-                            }
-                        }
-                    } else {
-                        // Ensure all messages have content set (fallback for any unhandled cases)
-                        // Jinja templates expect content to be present, default to empty string if not set
-                        if (!msg_json.contains("content")) {
-                            SRV_INF("[CONTENT DEBUG] Predict: Message %d (role=%s): no content field, adding empty string\n", 
-                                    i, msg.role().c_str());
-                            msg_json["content"] = "";
-                        }
+                        // Tool call messages may have null content
+                        msg_json["content"] = json();
                    }
                    
                    // Add optional fields for OpenAI-compatible message format
@@ -1571,98 +1212,14 @@ public:
                        try {
                            json tool_calls = json::parse(msg.tool_calls());
                            msg_json["tool_calls"] = tool_calls;
-                            SRV_INF("[TOOL CALLS DEBUG] Predict: Message %d has tool_calls: %s\n", i, tool_calls.dump().c_str());
-                            // IMPORTANT: If message has tool_calls but content is empty or not set,
-                            // set content to space " " instead of empty string "", because llama.cpp's
-                            // common_chat_msgs_to_json_oaicompat converts empty strings to null (line 312),
-                            // which causes template errors when accessing message.content[:tool_start_length]
-                            if (!msg_json.contains("content") || (msg_json.contains("content") && msg_json["content"].is_string() && msg_json["content"].get<std::string>().empty())) {
-                                SRV_INF("[CONTENT DEBUG] Predict: Message %d has tool_calls but empty content, setting to space\n", i);
-                                msg_json["content"] = " ";
-                            }
-                            // Log each tool call with name and arguments
-                            if (tool_calls.is_array()) {
-                                for (size_t tc_idx = 0; tc_idx < tool_calls.size(); tc_idx++) {
-                                    const auto& tc = tool_calls[tc_idx];
-                                    std::string tool_name = "unknown";
-                                    std::string tool_args = "{}";
-                                    if (tc.contains("function")) {
-                                        const auto& func = tc["function"];
-                                        if (func.contains("name")) {
-                                            tool_name = func["name"].get<std::string>();
-                                        }
-                                        if (func.contains("arguments")) {
-                                            tool_args = func["arguments"].is_string() ? 
-                                                func["arguments"].get<std::string>() : 
-                                                func["arguments"].dump();
-                                        }
-                                    } else if (tc.contains("name")) {
-                                        tool_name = tc["name"].get<std::string>();
-                                        if (tc.contains("arguments")) {
-                                            tool_args = tc["arguments"].is_string() ? 
-                                                tc["arguments"].get<std::string>() : 
-                                                tc["arguments"].dump();
-                                        }
-                                    }
-                                    SRV_INF("[TOOL CALLS DEBUG] Predict: Message %d, tool_call %zu: name=%s, arguments=%s\n", 
-                                            i, tc_idx, tool_name.c_str(), tool_args.c_str());
-                                }
-                            }
                        } catch (const json::parse_error& e) {
                            SRV_WRN("Failed to parse tool_calls JSON: %s\n", e.what());
                        }
                    }
                    
-                    // Debug: Log final content state before adding to array
-                    if (msg_json.contains("content")) {
-                        if (msg_json["content"].is_null()) {
-                            SRV_INF("[CONTENT DEBUG] Predict: Message %d FINAL STATE: content is NULL - THIS WILL CAUSE ERROR!\n", i);
-                        } else {
-                            SRV_INF("[CONTENT DEBUG] Predict: Message %d FINAL STATE: content type=%s, has_value=%d\n", 
-                                    i, msg_json["content"].is_string() ? "string" :
-                                       msg_json["content"].is_array() ? "array" :
-                                       msg_json["content"].is_object() ? "object" : "other",
-                                    msg_json["content"].is_null() ? 0 : 1);
-                        }
-                    } else {
-                        SRV_INF("[CONTENT DEBUG] Predict: Message %d FINAL STATE: NO CONTENT FIELD - THIS WILL CAUSE ERROR!\n", i);
-                    }
-                    
                    messages_json.push_back(msg_json);
                }

-                // Final safety check: Ensure no message has null content (Jinja templates require strings)
-                SRV_INF("[CONTENT DEBUG] Predict: Running final safety check on %zu messages\n", messages_json.size());
-                for (size_t idx = 0; idx < messages_json.size(); idx++) {
-                    auto& msg = messages_json[idx];
-                    std::string role_str = msg.contains("role") ? msg["role"].get<std::string>() : "unknown";
-                    if (msg.contains("content") && msg["content"].is_null()) {
-                        SRV_INF("[CONTENT DEBUG] Predict: Safety check found message %zu (role=%s) with NULL content, converting to empty string\n", idx, role_str.c_str());
-                        msg["content"] = "";
-                    } else if (!msg.contains("content")) {
-                        SRV_INF("[CONTENT DEBUG] Predict: Safety check found message %zu (role=%s) without content field, adding empty string\n", idx, role_str.c_str());
-                        msg["content"] = "";
-                    } else {
-                        SRV_INF("[CONTENT DEBUG] Predict: Safety check message %zu (role=%s): content OK, type=%s\n", 
-                                idx, role_str.c_str(),
-                                msg["content"].is_string() ? "string" :
-                                msg["content"].is_array() ? "array" :
-                                msg["content"].is_object() ? "object" : "other");
-                    }
-                }
-
-                // Debug: Count tool messages
-                int tool_msg_count = 0;
-                for (const auto& msg : messages_json) {
-                    if (msg.contains("role") && msg["role"] == "tool") {
-                        tool_msg_count++;
-                    }
-                }
-                SRV_DBG("[TOOLS DEBUG] Predict: Built %d tool messages out of %zu total messages\n", tool_msg_count, messages_json.size());
-
-                // Debug: Print full conversation (messages)
-                SRV_DBG("[CONVERSATION DEBUG] Predict: Full messages array:\n%s\n", messages_json.dump(2).c_str());
-
                body_json["messages"] = messages_json;
                body_json["stream"] = false;
                
@@ -1673,16 +1230,6 @@ public:
                    data["grammar"].is_string() && 
                    !data["grammar"].get<std::string>().empty();
                
-                SRV_INF("[TOOLS DEBUG] Predict: has_grammar_from_go=%d, data.contains(\"tools\")=%d, data.contains(\"grammar\")=%d\n",
-                        has_grammar_from_go ? 1 : 0,
-                        data.contains("tools") ? 1 : 0,
-                        data.contains("grammar") ? 1 : 0);
-                if (data.contains("grammar")) {
-                    SRV_INF("[TOOLS DEBUG] Predict: grammar type=%s, empty=%d\n",
-                            data["grammar"].is_string() ? "string" : "other",
-                            data["grammar"].is_string() && data["grammar"].get<std::string>().empty() ? 1 : 0);
-                }
-                
                // Copy other relevant fields from data that oaicompat_chat_params_parse expects
                // Tools and tool_choice are only passed when NoGrammar is true (grammar not provided)
                // When grammar is provided from Go layer, we use it instead of template-generated grammar
@@ -1692,36 +1239,8 @@ public:
                        body_json["tools"] = data["tools"];
                        std::string tools_str = data["tools"].dump();
                        SRV_INF("Using tools from data (NoGrammar=true): %s\n", tools_str.c_str());
-                        // Debug: Log tools count and details before template processing
-                        if (data["tools"].is_array()) {
-                            SRV_INF("[TOOLS DEBUG] Predict: Passing %zu tools to oaicompat_chat_params_parse\n", data["tools"].size());
-                            for (size_t t_idx = 0; t_idx < data["tools"].size(); t_idx++) {
-                                const auto& tool = data["tools"][t_idx];
-                                std::string tool_name = "unknown";
-                                std::string tool_desc = "";
-                                if (tool.contains("function")) {
-                                    const auto& func = tool["function"];
-                                    if (func.contains("name")) {
-                                        tool_name = func["name"].get<std::string>();
-                                    }
-                                    if (func.contains("description")) {
-                                        tool_desc = func["description"].is_string() ? 
-                                            func["description"].get<std::string>() : "";
-                                    }
-                                } else if (tool.contains("name")) {
-                                    tool_name = tool["name"].get<std::string>();
-                                    if (tool.contains("description")) {
-                                        tool_desc = tool["description"].is_string() ? 
-                                            tool["description"].get<std::string>() : "";
-                                    }
-                                }
-                                SRV_INF("[TOOLS DEBUG] Predict: Tool %zu: name=%s, description=%s\n", 
-                                        t_idx, tool_name.c_str(), tool_desc.substr(0, 100).c_str());
-                            }
-                        }
                    } else {
                        SRV_WRN("%s", "No tools found in data - tool calls will not work without tools field\n");
-                        SRV_DBG("[TOOLS DEBUG] Predict: No tools in data, tool_choice=%s\n", data.contains("tool_choice") ? data["tool_choice"].dump().c_str() : "not set");
                    }
                    if (data.contains("tool_choice")) {
                        // tool_choice can be a string or object, but oaicompat_chat_params_parse expects a string
@@ -1763,17 +1282,6 @@ public:
                if (data.contains("chat_template_kwargs")) {
                    body_json["chat_template_kwargs"] = data["chat_template_kwargs"];
                }
-                // Pass parallel_tool_calls if present (used by oaicompat_chat_params_parse)
-                if (data.contains("parallel_tool_calls")) {
-                    body_json["parallel_tool_calls"] = data["parallel_tool_calls"];
-                }
-                // Pass add_generation_prompt if present (used by oaicompat_chat_params_parse)
-                if (data.contains("add_generation_prompt")) {
-                    body_json["add_generation_prompt"] = data["add_generation_prompt"];
-                }
-
-                // Debug: Print full body_json before template processing (includes messages, tools, tool_choice, etc.)
-                SRV_DBG("[CONVERSATION DEBUG] Predict: Full body_json before oaicompat_chat_params_parse:\n%s\n", body_json.dump(2).c_str());

                // Use the same approach as server.cpp: call oaicompat_chat_params_parse
                // This handles all template application, grammar merging, etc. automatically
@@ -1784,56 +1292,8 @@ public:
                // Update allow_image and allow_audio based on current mctx state
                parser_opt.allow_image = ctx_server.mctx ? mtmd_support_vision(ctx_server.mctx) : false;
                parser_opt.allow_audio = ctx_server.mctx ? mtmd_support_audio(ctx_server.mctx) : false;
-                
-                // Debug: Log tools before template processing
-                if (body_json.contains("tools")) {
-                    SRV_DBG("[TOOLS DEBUG] Predict: Before oaicompat_chat_params_parse - tools count: %zu\n", 
-                            body_json["tools"].is_array() ? body_json["tools"].size() : 0);
-                }
-                
-                // Debug: Verify messages content before template processing
-                // Also ensure ALL messages have content set to string (not null) - templates expect strings
-                if (body_json.contains("messages") && body_json["messages"].is_array()) {
-                    SRV_INF("[CONTENT DEBUG] Predict: Before oaicompat_chat_params_parse - checking %zu messages\n", body_json["messages"].size());
-                    for (size_t idx = 0; idx < body_json["messages"].size(); idx++) {
-                        auto& msg = body_json["messages"][idx];
-                        std::string role_str = msg.contains("role") ? msg["role"].get<std::string>() : "unknown";
-                        if (msg.contains("content")) {
-                            if (msg["content"].is_null()) {
-                                SRV_INF("[CONTENT DEBUG] Predict: BEFORE TEMPLATE - Message %zu (role=%s) has NULL content - FIXING!\n", idx, role_str.c_str());
-                                msg["content"] = ""; // Fix null content
-                            } else if (!msg["content"].is_string() && !msg["content"].is_array()) {
-                                // If content is object or other non-string type, convert to string for templates
-                                SRV_INF("[CONTENT DEBUG] Predict: BEFORE TEMPLATE - Message %zu (role=%s) content is not string/array, converting\n", idx, role_str.c_str());
-                                if (msg["content"].is_object()) {
-                                    msg["content"] = msg["content"].dump();
-                                } else {
-                                    msg["content"] = "";
-                                }
-                            } else {
-                                SRV_INF("[CONTENT DEBUG] Predict: BEFORE TEMPLATE - Message %zu (role=%s): content type=%s\n", 
-                                        idx, role_str.c_str(),
-                                        msg["content"].is_string() ? "string" :
-                                        msg["content"].is_array() ? "array" :
-                                        msg["content"].is_object() ? "object" : "other");
-                            }
-                        } else {
-                            SRV_INF("[CONTENT DEBUG] Predict: BEFORE TEMPLATE - Message %zu (role=%s) MISSING content field - ADDING!\n", idx, role_str.c_str());
-                            msg["content"] = ""; // Add missing content
-                        }
-                    }
-                }
-                
                json parsed_data = oaicompat_chat_params_parse(body_json, parser_opt, files);
                
-                // Debug: Log tools after template processing
-                if (parsed_data.contains("tools")) {
-                    SRV_DBG("[TOOLS DEBUG] Predict: After oaicompat_chat_params_parse - tools count: %zu\n",
-                            parsed_data["tools"].is_array() ? parsed_data["tools"].size() : 0);
-                } else {
-                    SRV_DBG("%s", "[TOOLS DEBUG] Predict: After oaicompat_chat_params_parse - no tools in parsed_data\n");
-                }
-                
                // Extract the prompt from parsed data
                prompt_str = parsed_data.at("prompt").get<std::string>();
                
@@ -1844,9 +1304,8 @@ public:
                    preserved_grammar = data["grammar"];
                }
                
-                // Merge all fields from parsed_data into data (grammar, grammar_triggers, preserved_tokens, parse_tool_calls, etc.)
+                // Merge all fields from parsed_data into data (grammar, grammar_triggers, preserved_tokens, etc.)
                // This ensures all template-generated fields are included
-                // parse_tool_calls is set by oaicompat_chat_params_parse when tools are present
                for (const auto& item : parsed_data.items()) {
                    if (item.key() != "prompt") { // Don't overwrite prompt_str, we already extracted it
                        // If grammar was provided from Go layer, preserve it instead of template-generated grammar
@@ -1857,11 +1316,6 @@ public:
                        }
                    }
                }
-                
-                // Debug: Log parse_tool_calls if present (set by oaicompat_chat_params_parse when tools are present)
-                if (data.contains("parse_tool_calls")) {
-                    SRV_DBG("[TOOLS DEBUG] Predict: parse_tool_calls=%s\n", data["parse_tool_calls"].get<bool>() ? "true" : "false");
-                }
            } else {
                // Use prompt directly from data
                if (data.contains("prompt") && data["prompt"].is_string()) {
@@ -2138,7 +1592,7 @@ public:
            
            tasks.reserve(documents.size());
            for (size_t i = 0; i < documents.size(); i++) {
-                auto tmp = format_prompt_rerank(ctx_server.model, ctx_server.vocab, ctx_server.mctx, request->query(), documents[i]);
+                auto tmp = format_rerank(ctx_server.model, ctx_server.vocab, ctx_server.mctx, request->query(), documents[i]);
                server_task task = server_task(SERVER_TASK_TYPE_RERANK);
                task.id = ctx_server.queue_tasks.get_new_id();
                task.index = i;
--- a/backend/cpp/llama-cpp/prepare.sh
+++ b/backend/cpp/llama-cpp/prepare.sh
@@ -9,13 +9,10 @@ done

 set -e

-for file in $(ls llama.cpp/tools/server/); do
-    cp -rfv llama.cpp/tools/server/$file llama.cpp/tools/grpc-server/
-done
-
 cp -r CMakeLists.txt llama.cpp/tools/grpc-server/
 cp -r grpc-server.cpp llama.cpp/tools/grpc-server/
 cp -rfv llama.cpp/vendor/nlohmann/json.hpp llama.cpp/tools/grpc-server/
+cp -rfv llama.cpp/tools/server/utils.hpp llama.cpp/tools/grpc-server/
 cp -rfv llama.cpp/vendor/cpp-httplib/httplib.h llama.cpp/tools/grpc-server/

 set +e
--- a/backend/go/whisper/Makefile
+++ b/backend/go/whisper/Makefile
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)

 # whisper.cpp version
 WHISPER_REPO?=https://github.com/ggml-org/whisper.cpp
-WHISPER_CPP_VERSION?=19ceec8eac980403b714d603e5ca31653cd42a3f
+WHISPER_CPP_VERSION?=d9b7613b34a343848af572cc14467fc5e82fc788
 SO_TARGET?=libgowhisper.so

 CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
--- a/backend/python/transformers/requirements-cpu.txt
+++ b/backend/python/transformers/requirements-cpu.txt
@@ -6,4 +6,4 @@ transformers
 bitsandbytes
 outetts
 sentence-transformers==5.1.0
-protobuf==6.33.1
+protobuf==6.32.0
--- a/backend/python/transformers/requirements-cublas11.txt
+++ b/backend/python/transformers/requirements-cublas11.txt
@@ -7,4 +7,4 @@ transformers
 bitsandbytes
 outetts
 sentence-transformers==5.1.0
-protobuf==6.33.1
+protobuf==6.32.0
--- a/backend/python/transformers/requirements-cublas12.txt
+++ b/backend/python/transformers/requirements-cublas12.txt
@@ -6,4 +6,4 @@ transformers
 bitsandbytes
 outetts
 sentence-transformers==5.1.0
-protobuf==6.33.1
+protobuf==6.32.0
--- a/backend/python/transformers/requirements-hipblas.txt
+++ b/backend/python/transformers/requirements-hipblas.txt
@@ -8,4 +8,4 @@ bitsandbytes
 outetts
 bitsandbytes
 sentence-transformers==5.1.0
-protobuf==6.33.1
+protobuf==6.32.0
--- a/backend/python/transformers/requirements-intel.txt
+++ b/backend/python/transformers/requirements-intel.txt
@@ -10,4 +10,4 @@ intel-extension-for-transformers
 bitsandbytes
 outetts
 sentence-transformers==5.1.0
-protobuf==6.33.1
+protobuf==6.32.0
--- a/backend/python/transformers/requirements.txt
+++ b/backend/python/transformers/requirements.txt
@@ -1,5 +1,5 @@
 grpcio==1.76.0
-protobuf==6.33.1
+protobuf==6.32.0
 certifi
 setuptools
 scipy==1.15.1
--- a/backend/python/vllm/install.sh
+++ b/backend/python/vllm/install.sh
@@ -3,13 +3,6 @@ set -e

 EXTRA_PIP_INSTALL_FLAGS="--no-build-isolation"

-# Avoid to overcommit the CPU during build
-# https://github.com/vllm-project/vllm/issues/20079
-# https://docs.vllm.ai/en/v0.8.3/serving/env_vars.html
-# https://docs.redhat.com/it/documentation/red_hat_ai_inference_server/3.0/html/vllm_server_arguments/environment_variables-server-arguments
-export NVCC_THREADS=2
-export MAX_JOBS=1
-
 backend_dir=$(dirname $0)

 if [ -d $backend_dir/common ]; then
--- a/backend/python/vllm/requirements-cublas12-after.txt
+++ b/backend/python/vllm/requirements-cublas12-after.txt
@@ -1 +1 @@
-https://github.com/Dao-AILab/flash-attention/releases/download/v2.8.3/flash_attn-2.8.3+cu12torch2.7cxx11abiTRUE-cp310-cp310-linux_x86_64.whl
+flash-attn
--- a/core/application/application.go
+++ b/core/application/application.go
@@ -1,9 +1,6 @@
 package application

 import (
-	"context"
-	"sync"
-
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/services"
 	"github.com/mudler/LocalAI/core/templates"
@@ -14,14 +11,8 @@ type Application struct {
 	backendLoader      *config.ModelConfigLoader
 	modelLoader        *model.ModelLoader
 	applicationConfig  *config.ApplicationConfig
-	startupConfig      *config.ApplicationConfig // Stores original config from env vars (before file loading)
 	templatesEvaluator *templates.Evaluator
 	galleryService     *services.GalleryService
-	watchdogMutex      sync.Mutex
-	watchdogStop       chan bool
-	p2pMutex           sync.Mutex
-	p2pCtx             context.Context
-	p2pCancel          context.CancelFunc
 }

 func newApplication(appConfig *config.ApplicationConfig) *Application {
@@ -53,11 +44,6 @@ func (a *Application) GalleryService() *services.GalleryService {
 	return a.galleryService
 }

-// StartupConfig returns the original startup configuration (from env vars, before file loading)
-func (a *Application) StartupConfig() *config.ApplicationConfig {
-	return a.startupConfig
-}
-
 func (a *Application) start() error {
 	galleryService := services.NewGalleryService(a.ApplicationConfig(), a.ModelLoader())
 	err := galleryService.Start(a.ApplicationConfig().Context, a.ModelConfigLoader(), a.ApplicationConfig().SystemState)
--- a/core/application/config_file_watcher.go
+++ b/core/application/config_file_watcher.go
@@ -1,343 +1,180 @@
-package application
-
-import (
-	"encoding/json"
-	"fmt"
-	"os"
-	"path"
-	"path/filepath"
-	"time"
-
-	"dario.cat/mergo"
-	"github.com/fsnotify/fsnotify"
-	"github.com/mudler/LocalAI/core/config"
-	"github.com/rs/zerolog/log"
-)
-
-type fileHandler func(fileContent []byte, appConfig *config.ApplicationConfig) error
-
-type configFileHandler struct {
-	handlers map[string]fileHandler
-
-	watcher *fsnotify.Watcher
-
-	appConfig *config.ApplicationConfig
-}
-
-// TODO: This should be a singleton eventually so other parts of the code can register config file handlers,
-// then we can export it to other packages
-func newConfigFileHandler(appConfig *config.ApplicationConfig) configFileHandler {
-	c := configFileHandler{
-		handlers:  make(map[string]fileHandler),
-		appConfig: appConfig,
-	}
-	err := c.Register("api_keys.json", readApiKeysJson(*appConfig), true)
-	if err != nil {
-		log.Error().Err(err).Str("file", "api_keys.json").Msg("unable to register config file handler")
-	}
-	err = c.Register("external_backends.json", readExternalBackendsJson(*appConfig), true)
-	if err != nil {
-		log.Error().Err(err).Str("file", "external_backends.json").Msg("unable to register config file handler")
-	}
-	err = c.Register("runtime_settings.json", readRuntimeSettingsJson(*appConfig), true)
-	if err != nil {
-		log.Error().Err(err).Str("file", "runtime_settings.json").Msg("unable to register config file handler")
-	}
-	return c
-}
-
-func (c *configFileHandler) Register(filename string, handler fileHandler, runNow bool) error {
-	_, ok := c.handlers[filename]
-	if ok {
-		return fmt.Errorf("handler already registered for file %s", filename)
-	}
-	c.handlers[filename] = handler
-	if runNow {
-		c.callHandler(filename, handler)
-	}
-	return nil
-}
-
-func (c *configFileHandler) callHandler(filename string, handler fileHandler) {
-	rootedFilePath := filepath.Join(c.appConfig.DynamicConfigsDir, filepath.Clean(filename))
-	log.Trace().Str("filename", rootedFilePath).Msg("reading file for dynamic config update")
-	fileContent, err := os.ReadFile(rootedFilePath)
-	if err != nil && !os.IsNotExist(err) {
-		log.Error().Err(err).Str("filename", rootedFilePath).Msg("could not read file")
-	}
-
-	if err = handler(fileContent, c.appConfig); err != nil {
-		log.Error().Err(err).Msg("WatchConfigDirectory goroutine failed to update options")
-	}
-}
-
-func (c *configFileHandler) Watch() error {
-	configWatcher, err := fsnotify.NewWatcher()
-	c.watcher = configWatcher
-	if err != nil {
-		return err
-	}
-
-	if c.appConfig.DynamicConfigsDirPollInterval > 0 {
-		log.Debug().Msg("Poll interval set, falling back to polling for configuration changes")
-		ticker := time.NewTicker(c.appConfig.DynamicConfigsDirPollInterval)
-		go func() {
-			for {
-				<-ticker.C
-				for file, handler := range c.handlers {
-					log.Debug().Str("file", file).Msg("polling config file")
-					c.callHandler(file, handler)
-				}
-			}
-		}()
-	}
-
-	// Start listening for events.
-	go func() {
-		for {
-			select {
-			case event, ok := <-c.watcher.Events:
-				if !ok {
-					return
-				}
-				if event.Has(fsnotify.Write | fsnotify.Create | fsnotify.Remove) {
-					handler, ok := c.handlers[path.Base(event.Name)]
-					if !ok {
-						continue
-					}
-
-					c.callHandler(filepath.Base(event.Name), handler)
-				}
-			case err, ok := <-c.watcher.Errors:
-				log.Error().Err(err).Msg("config watcher error received")
-				if !ok {
-					return
-				}
-			}
-		}
-	}()
-
-	// Add a path.
-	err = c.watcher.Add(c.appConfig.DynamicConfigsDir)
-	if err != nil {
-		return fmt.Errorf("unable to create a watcher on the configuration directory: %+v", err)
-	}
-
-	return nil
-}
-
-// TODO: When we institute graceful shutdown, this should be called
-func (c *configFileHandler) Stop() error {
-	return c.watcher.Close()
-}
-
-func readApiKeysJson(startupAppConfig config.ApplicationConfig) fileHandler {
-	handler := func(fileContent []byte, appConfig *config.ApplicationConfig) error {
-		log.Debug().Msg("processing api keys runtime update")
-		log.Trace().Int("numKeys", len(startupAppConfig.ApiKeys)).Msg("api keys provided at startup")
-
-		if len(fileContent) > 0 {
-			// Parse JSON content from the file
-			var fileKeys []string
-			err := json.Unmarshal(fileContent, &fileKeys)
-			if err != nil {
-				return err
-			}
-
-			log.Trace().Int("numKeys", len(fileKeys)).Msg("discovered API keys from api keys dynamic config dile")
-
-			appConfig.ApiKeys = append(startupAppConfig.ApiKeys, fileKeys...)
-		} else {
-			log.Trace().Msg("no API keys discovered from dynamic config file")
-			appConfig.ApiKeys = startupAppConfig.ApiKeys
-		}
-		log.Trace().Int("numKeys", len(appConfig.ApiKeys)).Msg("total api keys after processing")
-		return nil
-	}
-
-	return handler
-}
-
-func readExternalBackendsJson(startupAppConfig config.ApplicationConfig) fileHandler {
-	handler := func(fileContent []byte, appConfig *config.ApplicationConfig) error {
-		log.Debug().Msg("processing external_backends.json")
-
-		if len(fileContent) > 0 {
-			// Parse JSON content from the file
-			var fileBackends map[string]string
-			err := json.Unmarshal(fileContent, &fileBackends)
-			if err != nil {
-				return err
-			}
-			appConfig.ExternalGRPCBackends = startupAppConfig.ExternalGRPCBackends
-			err = mergo.Merge(&appConfig.ExternalGRPCBackends, &fileBackends)
-			if err != nil {
-				return err
-			}
-		} else {
-			appConfig.ExternalGRPCBackends = startupAppConfig.ExternalGRPCBackends
-		}
-		log.Debug().Msg("external backends loaded from external_backends.json")
-		return nil
-	}
-	return handler
-}
-
-type runtimeSettings struct {
-	WatchdogEnabled          *bool             `json:"watchdog_enabled,omitempty"`
-	WatchdogIdleEnabled      *bool             `json:"watchdog_idle_enabled,omitempty"`
-	WatchdogBusyEnabled      *bool             `json:"watchdog_busy_enabled,omitempty"`
-	WatchdogIdleTimeout      *string           `json:"watchdog_idle_timeout,omitempty"`
-	WatchdogBusyTimeout      *string           `json:"watchdog_busy_timeout,omitempty"`
-	SingleBackend            *bool             `json:"single_backend,omitempty"`
-	ParallelBackendRequests  *bool             `json:"parallel_backend_requests,omitempty"`
-	Threads                  *int              `json:"threads,omitempty"`
-	ContextSize              *int              `json:"context_size,omitempty"`
-	F16                      *bool             `json:"f16,omitempty"`
-	Debug                    *bool             `json:"debug,omitempty"`
-	CORS                     *bool             `json:"cors,omitempty"`
-	CSRF                     *bool             `json:"csrf,omitempty"`
-	CORSAllowOrigins         *string           `json:"cors_allow_origins,omitempty"`
-	P2PToken                 *string           `json:"p2p_token,omitempty"`
-	P2PNetworkID             *string           `json:"p2p_network_id,omitempty"`
-	Federated                *bool             `json:"federated,omitempty"`
-	Galleries                *[]config.Gallery `json:"galleries,omitempty"`
-	BackendGalleries         *[]config.Gallery `json:"backend_galleries,omitempty"`
-	AutoloadGalleries        *bool             `json:"autoload_galleries,omitempty"`
-	AutoloadBackendGalleries *bool             `json:"autoload_backend_galleries,omitempty"`
-	ApiKeys                  *[]string         `json:"api_keys,omitempty"`
-}
-
-func readRuntimeSettingsJson(startupAppConfig config.ApplicationConfig) fileHandler {
-	handler := func(fileContent []byte, appConfig *config.ApplicationConfig) error {
-		log.Debug().Msg("processing runtime_settings.json")
-
-		// Determine if settings came from env vars by comparing with startup config
-		// startupAppConfig contains the original values set from env vars at startup.
-		// If current values match startup values, they came from env vars (or defaults).
-		// We apply file settings only if current values match startup values (meaning not from env vars).
-		envWatchdogIdle := appConfig.WatchDogIdle == startupAppConfig.WatchDogIdle
-		envWatchdogBusy := appConfig.WatchDogBusy == startupAppConfig.WatchDogBusy
-		envWatchdogIdleTimeout := appConfig.WatchDogIdleTimeout == startupAppConfig.WatchDogIdleTimeout
-		envWatchdogBusyTimeout := appConfig.WatchDogBusyTimeout == startupAppConfig.WatchDogBusyTimeout
-		envSingleBackend := appConfig.SingleBackend == startupAppConfig.SingleBackend
-		envParallelRequests := appConfig.ParallelBackendRequests == startupAppConfig.ParallelBackendRequests
-		envThreads := appConfig.Threads == startupAppConfig.Threads
-		envContextSize := appConfig.ContextSize == startupAppConfig.ContextSize
-		envF16 := appConfig.F16 == startupAppConfig.F16
-		envDebug := appConfig.Debug == startupAppConfig.Debug
-		envCORS := appConfig.CORS == startupAppConfig.CORS
-		envCSRF := appConfig.CSRF == startupAppConfig.CSRF
-		envCORSAllowOrigins := appConfig.CORSAllowOrigins == startupAppConfig.CORSAllowOrigins
-		envP2PToken := appConfig.P2PToken == startupAppConfig.P2PToken
-		envP2PNetworkID := appConfig.P2PNetworkID == startupAppConfig.P2PNetworkID
-		envFederated := appConfig.Federated == startupAppConfig.Federated
-		envAutoloadGalleries := appConfig.AutoloadGalleries == startupAppConfig.AutoloadGalleries
-		envAutoloadBackendGalleries := appConfig.AutoloadBackendGalleries == startupAppConfig.AutoloadBackendGalleries
-
-		if len(fileContent) > 0 {
-			var settings runtimeSettings
-			err := json.Unmarshal(fileContent, &settings)
-			if err != nil {
-				return err
-			}
-
-			// Apply file settings only if they don't match startup values (i.e., not from env vars)
-			if settings.WatchdogIdleEnabled != nil && !envWatchdogIdle {
-				appConfig.WatchDogIdle = *settings.WatchdogIdleEnabled
-				if appConfig.WatchDogIdle {
-					appConfig.WatchDog = true
-				}
-			}
-			if settings.WatchdogBusyEnabled != nil && !envWatchdogBusy {
-				appConfig.WatchDogBusy = *settings.WatchdogBusyEnabled
-				if appConfig.WatchDogBusy {
-					appConfig.WatchDog = true
-				}
-			}
-			if settings.WatchdogIdleTimeout != nil && !envWatchdogIdleTimeout {
-				dur, err := time.ParseDuration(*settings.WatchdogIdleTimeout)
-				if err == nil {
-					appConfig.WatchDogIdleTimeout = dur
-				} else {
-					log.Warn().Err(err).Str("timeout", *settings.WatchdogIdleTimeout).Msg("invalid watchdog idle timeout in runtime_settings.json")
-				}
-			}
-			if settings.WatchdogBusyTimeout != nil && !envWatchdogBusyTimeout {
-				dur, err := time.ParseDuration(*settings.WatchdogBusyTimeout)
-				if err == nil {
-					appConfig.WatchDogBusyTimeout = dur
-				} else {
-					log.Warn().Err(err).Str("timeout", *settings.WatchdogBusyTimeout).Msg("invalid watchdog busy timeout in runtime_settings.json")
-				}
-			}
-			if settings.SingleBackend != nil && !envSingleBackend {
-				appConfig.SingleBackend = *settings.SingleBackend
-			}
-			if settings.ParallelBackendRequests != nil && !envParallelRequests {
-				appConfig.ParallelBackendRequests = *settings.ParallelBackendRequests
-			}
-			if settings.Threads != nil && !envThreads {
-				appConfig.Threads = *settings.Threads
-			}
-			if settings.ContextSize != nil && !envContextSize {
-				appConfig.ContextSize = *settings.ContextSize
-			}
-			if settings.F16 != nil && !envF16 {
-				appConfig.F16 = *settings.F16
-			}
-			if settings.Debug != nil && !envDebug {
-				appConfig.Debug = *settings.Debug
-			}
-			if settings.CORS != nil && !envCORS {
-				appConfig.CORS = *settings.CORS
-			}
-			if settings.CSRF != nil && !envCSRF {
-				appConfig.CSRF = *settings.CSRF
-			}
-			if settings.CORSAllowOrigins != nil && !envCORSAllowOrigins {
-				appConfig.CORSAllowOrigins = *settings.CORSAllowOrigins
-			}
-			if settings.P2PToken != nil && !envP2PToken {
-				appConfig.P2PToken = *settings.P2PToken
-			}
-			if settings.P2PNetworkID != nil && !envP2PNetworkID {
-				appConfig.P2PNetworkID = *settings.P2PNetworkID
-			}
-			if settings.Federated != nil && !envFederated {
-				appConfig.Federated = *settings.Federated
-			}
-			if settings.Galleries != nil {
-				appConfig.Galleries = *settings.Galleries
-			}
-			if settings.BackendGalleries != nil {
-				appConfig.BackendGalleries = *settings.BackendGalleries
-			}
-			if settings.AutoloadGalleries != nil && !envAutoloadGalleries {
-				appConfig.AutoloadGalleries = *settings.AutoloadGalleries
-			}
-			if settings.AutoloadBackendGalleries != nil && !envAutoloadBackendGalleries {
-				appConfig.AutoloadBackendGalleries = *settings.AutoloadBackendGalleries
-			}
-			if settings.ApiKeys != nil {
-				// API keys from env vars (startup) should be kept, runtime settings keys replace all runtime keys
-				// If runtime_settings.json specifies ApiKeys (even if empty), it replaces all runtime keys
-				// Start with env keys, then add runtime_settings.json keys (which may be empty to clear them)
-				envKeys := startupAppConfig.ApiKeys
-				runtimeKeys := *settings.ApiKeys
-				// Replace all runtime keys with what's in runtime_settings.json
-				appConfig.ApiKeys = append(envKeys, runtimeKeys...)
-			}
-
-			// If watchdog is enabled via file but not via env, ensure WatchDog flag is set
-			if !envWatchdogIdle && !envWatchdogBusy {
-				if settings.WatchdogEnabled != nil && *settings.WatchdogEnabled {
-					appConfig.WatchDog = true
-				}
-			}
-		}
-		log.Debug().Msg("runtime settings loaded from runtime_settings.json")
-		return nil
-	}
-	return handler
-}
+package application
+
+import (
+	"encoding/json"
+	"fmt"
+	"os"
+	"path"
+	"path/filepath"
+	"time"
+
+	"dario.cat/mergo"
+	"github.com/fsnotify/fsnotify"
+	"github.com/mudler/LocalAI/core/config"
+	"github.com/rs/zerolog/log"
+)
+
+type fileHandler func(fileContent []byte, appConfig *config.ApplicationConfig) error
+
+type configFileHandler struct {
+	handlers map[string]fileHandler
+
+	watcher *fsnotify.Watcher
+
+	appConfig *config.ApplicationConfig
+}
+
+// TODO: This should be a singleton eventually so other parts of the code can register config file handlers,
+// then we can export it to other packages
+func newConfigFileHandler(appConfig *config.ApplicationConfig) configFileHandler {
+	c := configFileHandler{
+		handlers:  make(map[string]fileHandler),
+		appConfig: appConfig,
+	}
+	err := c.Register("api_keys.json", readApiKeysJson(*appConfig), true)
+	if err != nil {
+		log.Error().Err(err).Str("file", "api_keys.json").Msg("unable to register config file handler")
+	}
+	err = c.Register("external_backends.json", readExternalBackendsJson(*appConfig), true)
+	if err != nil {
+		log.Error().Err(err).Str("file", "external_backends.json").Msg("unable to register config file handler")
+	}
+	return c
+}
+
+func (c *configFileHandler) Register(filename string, handler fileHandler, runNow bool) error {
+	_, ok := c.handlers[filename]
+	if ok {
+		return fmt.Errorf("handler already registered for file %s", filename)
+	}
+	c.handlers[filename] = handler
+	if runNow {
+		c.callHandler(filename, handler)
+	}
+	return nil
+}
+
+func (c *configFileHandler) callHandler(filename string, handler fileHandler) {
+	rootedFilePath := filepath.Join(c.appConfig.DynamicConfigsDir, filepath.Clean(filename))
+	log.Trace().Str("filename", rootedFilePath).Msg("reading file for dynamic config update")
+	fileContent, err := os.ReadFile(rootedFilePath)
+	if err != nil && !os.IsNotExist(err) {
+		log.Error().Err(err).Str("filename", rootedFilePath).Msg("could not read file")
+	}
+
+	if err = handler(fileContent, c.appConfig); err != nil {
+		log.Error().Err(err).Msg("WatchConfigDirectory goroutine failed to update options")
+	}
+}
+
+func (c *configFileHandler) Watch() error {
+	configWatcher, err := fsnotify.NewWatcher()
+	c.watcher = configWatcher
+	if err != nil {
+		return err
+	}
+
+	if c.appConfig.DynamicConfigsDirPollInterval > 0 {
+		log.Debug().Msg("Poll interval set, falling back to polling for configuration changes")
+		ticker := time.NewTicker(c.appConfig.DynamicConfigsDirPollInterval)
+		go func() {
+			for {
+				<-ticker.C
+				for file, handler := range c.handlers {
+					log.Debug().Str("file", file).Msg("polling config file")
+					c.callHandler(file, handler)
+				}
+			}
+		}()
+	}
+
+	// Start listening for events.
+	go func() {
+		for {
+			select {
+			case event, ok := <-c.watcher.Events:
+				if !ok {
+					return
+				}
+				if event.Has(fsnotify.Write | fsnotify.Create | fsnotify.Remove) {
+					handler, ok := c.handlers[path.Base(event.Name)]
+					if !ok {
+						continue
+					}
+
+					c.callHandler(filepath.Base(event.Name), handler)
+				}
+			case err, ok := <-c.watcher.Errors:
+				log.Error().Err(err).Msg("config watcher error received")
+				if !ok {
+					return
+				}
+			}
+		}
+	}()
+
+	// Add a path.
+	err = c.watcher.Add(c.appConfig.DynamicConfigsDir)
+	if err != nil {
+		return fmt.Errorf("unable to create a watcher on the configuration directory: %+v", err)
+	}
+
+	return nil
+}
+
+// TODO: When we institute graceful shutdown, this should be called
+func (c *configFileHandler) Stop() error {
+	return c.watcher.Close()
+}
+
+func readApiKeysJson(startupAppConfig config.ApplicationConfig) fileHandler {
+	handler := func(fileContent []byte, appConfig *config.ApplicationConfig) error {
+		log.Debug().Msg("processing api keys runtime update")
+		log.Trace().Int("numKeys", len(startupAppConfig.ApiKeys)).Msg("api keys provided at startup")
+
+		if len(fileContent) > 0 {
+			// Parse JSON content from the file
+			var fileKeys []string
+			err := json.Unmarshal(fileContent, &fileKeys)
+			if err != nil {
+				return err
+			}
+
+			log.Trace().Int("numKeys", len(fileKeys)).Msg("discovered API keys from api keys dynamic config dile")
+
+			appConfig.ApiKeys = append(startupAppConfig.ApiKeys, fileKeys...)
+		} else {
+			log.Trace().Msg("no API keys discovered from dynamic config file")
+			appConfig.ApiKeys = startupAppConfig.ApiKeys
+		}
+		log.Trace().Int("numKeys", len(appConfig.ApiKeys)).Msg("total api keys after processing")
+		return nil
+	}
+
+	return handler
+}
+
+func readExternalBackendsJson(startupAppConfig config.ApplicationConfig) fileHandler {
+	handler := func(fileContent []byte, appConfig *config.ApplicationConfig) error {
+		log.Debug().Msg("processing external_backends.json")
+
+		if len(fileContent) > 0 {
+			// Parse JSON content from the file
+			var fileBackends map[string]string
+			err := json.Unmarshal(fileContent, &fileBackends)
+			if err != nil {
+				return err
+			}
+			appConfig.ExternalGRPCBackends = startupAppConfig.ExternalGRPCBackends
+			err = mergo.Merge(&appConfig.ExternalGRPCBackends, &fileBackends)
+			if err != nil {
+				return err
+			}
+		} else {
+			appConfig.ExternalGRPCBackends = startupAppConfig.ExternalGRPCBackends
+		}
+		log.Debug().Msg("external backends loaded from external_backends.json")
+		return nil
+	}
+	return handler
+}
--- a/core/application/p2p.go
+++ b/core/application/p2p.go
@@ -1,240 +0,0 @@
-package application
-
-import (
-	"context"
-	"fmt"
-	"net"
-	"slices"
-	"time"
-
-	"github.com/google/uuid"
-	"github.com/mudler/LocalAI/core/gallery"
-	"github.com/mudler/LocalAI/core/p2p"
-	"github.com/mudler/LocalAI/core/schema"
-	"github.com/mudler/LocalAI/core/services"
-
-	"github.com/mudler/edgevpn/pkg/node"
-	"github.com/rs/zerolog/log"
-	zlog "github.com/rs/zerolog/log"
-)
-
-func (a *Application) StopP2P() error {
-	if a.p2pCancel != nil {
-		a.p2pCancel()
-		a.p2pCancel = nil
-		a.p2pCtx = nil
-		// Wait a bit for shutdown to complete
-		time.Sleep(200 * time.Millisecond)
-	}
-	return nil
-}
-
-func (a *Application) StartP2P() error {
-	// we need a p2p token
-	if a.applicationConfig.P2PToken == "" {
-		return fmt.Errorf("P2P token is not set")
-	}
-
-	networkID := a.applicationConfig.P2PNetworkID
-
-	ctx, cancel := context.WithCancel(a.ApplicationConfig().Context)
-	a.p2pCtx = ctx
-	a.p2pCancel = cancel
-
-	var n *node.Node
-	// Here we are avoiding creating multiple nodes:
-	// - if the federated mode is enabled, we create a federated node and expose a service
-	// - exposing a service creates a node with specific options, and we don't want to create another node
-
-	// If the federated mode is enabled, we expose a service to the local instance running
-	// at r.Address
-	if a.applicationConfig.Federated {
-		_, port, err := net.SplitHostPort(a.applicationConfig.APIAddress)
-		if err != nil {
-			return err
-		}
-
-		// Here a new node is created and started
-		// and a service is exposed by the node
-		node, err := p2p.ExposeService(ctx, "localhost", port, a.applicationConfig.P2PToken, p2p.NetworkID(networkID, p2p.FederatedID))
-		if err != nil {
-			return err
-		}
-
-		if err := p2p.ServiceDiscoverer(ctx, node, a.applicationConfig.P2PToken, p2p.NetworkID(networkID, p2p.FederatedID), nil, false); err != nil {
-			return err
-		}
-
-		n = node
-		// start node sync in the background
-		if err := a.p2pSync(ctx, node); err != nil {
-			return err
-		}
-	}
-
-	// If a node wasn't created previously, create it
-	if n == nil {
-		node, err := p2p.NewNode(a.applicationConfig.P2PToken)
-		if err != nil {
-			return err
-		}
-		err = node.Start(ctx)
-		if err != nil {
-			return fmt.Errorf("starting new node: %w", err)
-		}
-		n = node
-	}
-
-	// Attach a ServiceDiscoverer to the p2p node
-	log.Info().Msg("Starting P2P server discovery...")
-	if err := p2p.ServiceDiscoverer(ctx, n, a.applicationConfig.P2PToken, p2p.NetworkID(networkID, p2p.WorkerID), func(serviceID string, node schema.NodeData) {
-		var tunnelAddresses []string
-		for _, v := range p2p.GetAvailableNodes(p2p.NetworkID(networkID, p2p.WorkerID)) {
-			if v.IsOnline() {
-				tunnelAddresses = append(tunnelAddresses, v.TunnelAddress)
-			} else {
-				log.Info().Msgf("Node %s is offline", v.ID)
-			}
-		}
-		if a.applicationConfig.TunnelCallback != nil {
-			a.applicationConfig.TunnelCallback(tunnelAddresses)
-		}
-	}, true); err != nil {
-		return err
-	}
-
-	return nil
-}
-
-// RestartP2P restarts the P2P stack with current ApplicationConfig settings
-// Note: This method signals that P2P should be restarted, but the actual restart
-// is handled by the caller to avoid import cycles
-func (a *Application) RestartP2P() error {
-	a.p2pMutex.Lock()
-	defer a.p2pMutex.Unlock()
-
-	// Stop existing P2P if running
-	if a.p2pCancel != nil {
-		a.p2pCancel()
-		a.p2pCancel = nil
-		a.p2pCtx = nil
-		// Wait a bit for shutdown to complete
-		time.Sleep(200 * time.Millisecond)
-	}
-
-	appConfig := a.ApplicationConfig()
-
-	// Start P2P if token is set
-	if appConfig.P2PToken == "" {
-		return fmt.Errorf("P2P token is not set")
-	}
-
-	// Create new context for P2P
-	ctx, cancel := context.WithCancel(appConfig.Context)
-	a.p2pCtx = ctx
-	a.p2pCancel = cancel
-
-	// Get API address from config
-	address := appConfig.APIAddress
-	if address == "" {
-		address = "127.0.0.1:8080" // default
-	}
-
-	// Start P2P stack in a goroutine
-	go func() {
-		if err := a.StartP2P(); err != nil {
-			log.Error().Err(err).Msg("Failed to start P2P stack")
-			cancel() // Cancel context on error
-		}
-	}()
-	log.Info().Msg("P2P stack restarted with new settings")
-
-	return nil
-}
-
-func syncState(ctx context.Context, n *node.Node, app *Application) error {
-	zlog.Debug().Msg("[p2p-sync] Syncing state")
-
-	whatWeHave := []string{}
-	for _, model := range app.ModelConfigLoader().GetAllModelsConfigs() {
-		whatWeHave = append(whatWeHave, model.Name)
-	}
-
-	ledger, _ := n.Ledger()
-	currentData := ledger.CurrentData()
-	zlog.Debug().Msgf("[p2p-sync] Current data: %v", currentData)
-	data, exists := ledger.GetKey("shared_state", "models")
-	if !exists {
-		ledger.AnnounceUpdate(ctx, time.Minute, "shared_state", "models", whatWeHave)
-		zlog.Debug().Msgf("No models found in the ledger, announced our models: %v", whatWeHave)
-	}
-
-	models := []string{}
-	if err := data.Unmarshal(&models); err != nil {
-		zlog.Warn().Err(err).Msg("error unmarshalling models")
-		return nil
-	}
-
-	zlog.Debug().Msgf("[p2p-sync] Models that are present in this instance: %v\nModels that are in the ledger: %v", whatWeHave, models)
-
-	// Sync with our state
-	whatIsNotThere := []string{}
-	for _, model := range whatWeHave {
-		if !slices.Contains(models, model) {
-			whatIsNotThere = append(whatIsNotThere, model)
-		}
-	}
-	if len(whatIsNotThere) > 0 {
-		zlog.Debug().Msgf("[p2p-sync] Announcing our models: %v", append(models, whatIsNotThere...))
-		ledger.AnnounceUpdate(
-			ctx,
-			1*time.Minute,
-			"shared_state",
-			"models",
-			append(models, whatIsNotThere...),
-		)
-	}
-
-	// Check if we have a model that is not in our state, otherwise install it
-	for _, model := range models {
-		if slices.Contains(whatWeHave, model) {
-			zlog.Debug().Msgf("[p2p-sync] Model %s is already present in this instance", model)
-			continue
-		}
-
-		// we install model
-		zlog.Info().Msgf("[p2p-sync] Installing model which is not present in this instance: %s", model)
-
-		uuid, err := uuid.NewUUID()
-		if err != nil {
-			zlog.Error().Err(err).Msg("error generating UUID")
-			continue
-		}
-
-		app.GalleryService().ModelGalleryChannel <- services.GalleryOp[gallery.GalleryModel, gallery.ModelConfig]{
-			ID:                 uuid.String(),
-			GalleryElementName: model,
-			Galleries:          app.ApplicationConfig().Galleries,
-			BackendGalleries:   app.ApplicationConfig().BackendGalleries,
-		}
-	}
-
-	return nil
-}
-
-func (a *Application) p2pSync(ctx context.Context, n *node.Node) error {
-	go func() {
-		for {
-			select {
-			case <-ctx.Done():
-				return
-			case <-time.After(1 * time.Minute):
-				if err := syncState(ctx, n, a); err != nil {
-					zlog.Error().Err(err).Msg("error syncing state")
-				}
-			}
-
-		}
-	}()
-	return nil
-}
--- a/core/application/startup.go
+++ b/core/application/startup.go
@@ -1,11 +1,8 @@
 package application

 import (
-	"encoding/json"
 	"fmt"
 	"os"
-	"path/filepath"
-	"time"

 	"github.com/mudler/LocalAI/core/backend"
 	"github.com/mudler/LocalAI/core/config"
@@ -21,12 +18,7 @@ import (

 func New(opts ...config.AppOption) (*Application, error) {
 	options := config.NewApplicationConfig(opts...)
-
-	// Store a copy of the startup config (from env vars, before file loading)
-	// This is used to determine if settings came from env vars vs file
-	startupConfigCopy := *options
 	application := newApplication(options)
-	application.startupConfig = &startupConfigCopy

 	log.Info().Msgf("Starting LocalAI using %d threads, with models path: %s", options.Threads, options.SystemState.Model.ModelsPath)
 	log.Info().Msgf("LocalAI version: %s", internal.PrintableVersion())
@@ -118,13 +110,6 @@ func New(opts ...config.AppOption) (*Application, error) {
 		}
 	}

-	// Load runtime settings from file if DynamicConfigsDir is set
-	// This applies file settings with env var precedence (env vars take priority)
-	// Note: startupConfigCopy was already created above, so it has the original env var values
-	if options.DynamicConfigsDir != "" {
-		loadRuntimeSettingsFromFile(options)
-	}
-
 	// turn off any process that was started by GRPC if the context is canceled
 	go func() {
 		<-options.Context.Done()
@@ -135,8 +120,21 @@ func New(opts ...config.AppOption) (*Application, error) {
 		}
 	}()

-	// Initialize watchdog with current settings (after loading from file)
-	initializeWatchdog(application, options)
+	if options.WatchDog {
+		wd := model.NewWatchDog(
+			application.ModelLoader(),
+			options.WatchDogBusyTimeout,
+			options.WatchDogIdleTimeout,
+			options.WatchDogBusy,
+			options.WatchDogIdle)
+		application.ModelLoader().SetWatchDog(wd)
+		go wd.Run()
+		go func() {
+			<-options.Context.Done()
+			log.Debug().Msgf("Context canceled, shutting down")
+			wd.Shutdown()
+		}()
+	}

 	if options.LoadToMemory != nil && !options.SingleBackend {
 		for _, m := range options.LoadToMemory {
@@ -188,131 +186,3 @@ func startWatcher(options *config.ApplicationConfig) {
 		log.Error().Err(err).Msg("failed creating watcher")
 	}
 }
-
-// loadRuntimeSettingsFromFile loads settings from runtime_settings.json with env var precedence
-// This function is called at startup, before env vars are applied via AppOptions.
-// Since env vars are applied via AppOptions in run.go, we need to check if they're set.
-// We do this by checking if the current options values differ from defaults, which would
-// indicate they were set from env vars. However, a simpler approach is to just apply
-// file settings here, and let the AppOptions (which are applied after this) override them.
-// But actually, this is called AFTER AppOptions are applied in New(), so we need to check env vars.
-// The cleanest solution: Store original values before applying file, or check if values match
-// what would be set from env vars. For now, we'll apply file settings and they'll be
-// overridden by AppOptions if env vars were set (but AppOptions are already applied).
-// Actually, this function is called in New() before AppOptions are fully processed for watchdog.
-// Let's check the call order: New() -> loadRuntimeSettingsFromFile() -> initializeWatchdog()
-// But AppOptions are applied in NewApplicationConfig() which is called first.
-// So at this point, options already has values from env vars. We should compare against
-// defaults to see if env vars were set. But we don't have defaults stored.
-// Simplest: Just apply file settings. If env vars were set, they're already in options.
-// The file watcher handler will handle runtime changes properly by comparing with startupAppConfig.
-func loadRuntimeSettingsFromFile(options *config.ApplicationConfig) {
-	settingsFile := filepath.Join(options.DynamicConfigsDir, "runtime_settings.json")
-	fileContent, err := os.ReadFile(settingsFile)
-	if err != nil {
-		if os.IsNotExist(err) {
-			log.Debug().Msg("runtime_settings.json not found, using defaults")
-			return
-		}
-		log.Warn().Err(err).Msg("failed to read runtime_settings.json")
-		return
-	}
-
-	var settings struct {
-		WatchdogEnabled         *bool   `json:"watchdog_enabled,omitempty"`
-		WatchdogIdleEnabled     *bool   `json:"watchdog_idle_enabled,omitempty"`
-		WatchdogBusyEnabled     *bool   `json:"watchdog_busy_enabled,omitempty"`
-		WatchdogIdleTimeout     *string `json:"watchdog_idle_timeout,omitempty"`
-		WatchdogBusyTimeout     *string `json:"watchdog_busy_timeout,omitempty"`
-		SingleBackend           *bool   `json:"single_backend,omitempty"`
-		ParallelBackendRequests *bool   `json:"parallel_backend_requests,omitempty"`
-	}
-
-	if err := json.Unmarshal(fileContent, &settings); err != nil {
-		log.Warn().Err(err).Msg("failed to parse runtime_settings.json")
-		return
-	}
-
-	// At this point, options already has values from env vars (via AppOptions in run.go).
-	// To avoid env var duplication, we determine if env vars were set by checking if
-	// current values differ from defaults. Defaults are: false for bools, 0 for durations.
-	// If current value is at default, it likely wasn't set from env var, so we can apply file.
-	// If current value is non-default, it was likely set from env var, so we preserve it.
-	// Note: This means env vars explicitly setting to false/0 won't be distinguishable from defaults,
-	// but that's an acceptable limitation to avoid env var duplication.
-
-	if settings.WatchdogIdleEnabled != nil {
-		// Only apply if current value is default (false), suggesting it wasn't set from env var
-		if !options.WatchDogIdle {
-			options.WatchDogIdle = *settings.WatchdogIdleEnabled
-			if options.WatchDogIdle {
-				options.WatchDog = true
-			}
-		}
-	}
-	if settings.WatchdogBusyEnabled != nil {
-		if !options.WatchDogBusy {
-			options.WatchDogBusy = *settings.WatchdogBusyEnabled
-			if options.WatchDogBusy {
-				options.WatchDog = true
-			}
-		}
-	}
-	if settings.WatchdogIdleTimeout != nil {
-		// Only apply if current value is default (0), suggesting it wasn't set from env var
-		if options.WatchDogIdleTimeout == 0 {
-			dur, err := time.ParseDuration(*settings.WatchdogIdleTimeout)
-			if err == nil {
-				options.WatchDogIdleTimeout = dur
-			} else {
-				log.Warn().Err(err).Str("timeout", *settings.WatchdogIdleTimeout).Msg("invalid watchdog idle timeout in runtime_settings.json")
-			}
-		}
-	}
-	if settings.WatchdogBusyTimeout != nil {
-		if options.WatchDogBusyTimeout == 0 {
-			dur, err := time.ParseDuration(*settings.WatchdogBusyTimeout)
-			if err == nil {
-				options.WatchDogBusyTimeout = dur
-			} else {
-				log.Warn().Err(err).Str("timeout", *settings.WatchdogBusyTimeout).Msg("invalid watchdog busy timeout in runtime_settings.json")
-			}
-		}
-	}
-	if settings.SingleBackend != nil {
-		if !options.SingleBackend {
-			options.SingleBackend = *settings.SingleBackend
-		}
-	}
-	if settings.ParallelBackendRequests != nil {
-		if !options.ParallelBackendRequests {
-			options.ParallelBackendRequests = *settings.ParallelBackendRequests
-		}
-	}
-	if !options.WatchDogIdle && !options.WatchDogBusy {
-		if settings.WatchdogEnabled != nil && *settings.WatchdogEnabled {
-			options.WatchDog = true
-		}
-	}
-
-	log.Debug().Msg("Runtime settings loaded from runtime_settings.json")
-}
-
-// initializeWatchdog initializes the watchdog with current ApplicationConfig settings
-func initializeWatchdog(application *Application, options *config.ApplicationConfig) {
-	if options.WatchDog {
-		wd := model.NewWatchDog(
-			application.ModelLoader(),
-			options.WatchDogBusyTimeout,
-			options.WatchDogIdleTimeout,
-			options.WatchDogBusy,
-			options.WatchDogIdle)
-		application.ModelLoader().SetWatchDog(wd)
-		go wd.Run()
-		go func() {
-			<-options.Context.Done()
-			log.Debug().Msgf("Context canceled, shutting down")
-			wd.Shutdown()
-		}()
-	}
-}
--- a/core/application/watchdog.go
+++ b/core/application/watchdog.go
@@ -1,88 +0,0 @@
-package application
-
-import (
-	"time"
-
-	"github.com/mudler/LocalAI/pkg/model"
-	"github.com/rs/zerolog/log"
-)
-
-func (a *Application) StopWatchdog() error {
-	if a.watchdogStop != nil {
-		close(a.watchdogStop)
-		a.watchdogStop = nil
-	}
-	return nil
-}
-
-// startWatchdog starts the watchdog with current ApplicationConfig settings
-// This is an internal method that assumes the caller holds the watchdogMutex
-func (a *Application) startWatchdog() error {
-	appConfig := a.ApplicationConfig()
-
-	// Create new watchdog if enabled
-	if appConfig.WatchDog {
-		wd := model.NewWatchDog(
-			a.modelLoader,
-			appConfig.WatchDogBusyTimeout,
-			appConfig.WatchDogIdleTimeout,
-			appConfig.WatchDogBusy,
-			appConfig.WatchDogIdle)
-		a.modelLoader.SetWatchDog(wd)
-
-		// Create new stop channel
-		a.watchdogStop = make(chan bool, 1)
-
-		// Start watchdog goroutine
-		go wd.Run()
-
-		// Setup shutdown handler
-		go func() {
-			select {
-			case <-a.watchdogStop:
-				log.Debug().Msg("Watchdog stop signal received")
-				wd.Shutdown()
-			case <-appConfig.Context.Done():
-				log.Debug().Msg("Context canceled, shutting down watchdog")
-				wd.Shutdown()
-			}
-		}()
-
-		log.Info().Msg("Watchdog started with new settings")
-	} else {
-		log.Info().Msg("Watchdog disabled")
-	}
-
-	return nil
-}
-
-// StartWatchdog starts the watchdog with current ApplicationConfig settings
-func (a *Application) StartWatchdog() error {
-	a.watchdogMutex.Lock()
-	defer a.watchdogMutex.Unlock()
-
-	return a.startWatchdog()
-}
-
-// RestartWatchdog restarts the watchdog with current ApplicationConfig settings
-func (a *Application) RestartWatchdog() error {
-	a.watchdogMutex.Lock()
-	defer a.watchdogMutex.Unlock()
-
-	// Shutdown existing watchdog if running
-	if a.watchdogStop != nil {
-		close(a.watchdogStop)
-		a.watchdogStop = nil
-	}
-
-	// Shutdown existing watchdog if running
-	currentWD := a.modelLoader.GetWatchDog()
-	if currentWD != nil {
-		currentWD.Shutdown()
-		// Wait a bit for shutdown to complete
-		time.Sleep(100 * time.Millisecond)
-	}
-
-	// Start watchdog with new settings
-	return a.startWatchdog()
-}
--- a/core/backend/image.go
+++ b/core/backend/image.go
@@ -40,7 +40,3 @@ func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negat

 	return fn, nil
 }
-
-// ImageGenerationFunc is a test-friendly indirection to call image generation logic.
-// Tests can override this variable to provide a stub implementation.
-var ImageGenerationFunc = ImageGeneration
--- a/core/cli/api/p2p.go
+++ b/core/cli/api/p2p.go
@@ -0,0 +1,87 @@
+package cli_api
+
+import (
+	"context"
+	"fmt"
+	"net"
+	"os"
+	"strings"
+
+	"github.com/mudler/LocalAI/core/application"
+	"github.com/mudler/LocalAI/core/p2p"
+	"github.com/mudler/LocalAI/core/schema"
+	"github.com/mudler/edgevpn/pkg/node"
+
+	"github.com/rs/zerolog/log"
+)
+
+func StartP2PStack(ctx context.Context, address, token, networkID string, federated bool, app *application.Application) error {
+	var n *node.Node
+	// Here we are avoiding creating multiple nodes:
+	// - if the federated mode is enabled, we create a federated node and expose a service
+	// - exposing a service creates a node with specific options, and we don't want to create another node
+
+	// If the federated mode is enabled, we expose a service to the local instance running
+	// at r.Address
+	if federated {
+		_, port, err := net.SplitHostPort(address)
+		if err != nil {
+			return err
+		}
+
+		// Here a new node is created and started
+		// and a service is exposed by the node
+		node, err := p2p.ExposeService(ctx, "localhost", port, token, p2p.NetworkID(networkID, p2p.FederatedID))
+		if err != nil {
+			return err
+		}
+
+		if err := p2p.ServiceDiscoverer(ctx, node, token, p2p.NetworkID(networkID, p2p.FederatedID), nil, false); err != nil {
+			return err
+		}
+
+		n = node
+
+		// start node sync in the background
+		if err := p2p.Sync(ctx, node, app); err != nil {
+			return err
+		}
+	}
+
+	// If the p2p mode is enabled, we start the service discovery
+	if token != "" {
+		// If a node wasn't created previously, create it
+		if n == nil {
+			node, err := p2p.NewNode(token)
+			if err != nil {
+				return err
+			}
+			err = node.Start(ctx)
+			if err != nil {
+				return fmt.Errorf("starting new node: %w", err)
+			}
+			n = node
+		}
+
+		// Attach a ServiceDiscoverer to the p2p node
+		log.Info().Msg("Starting P2P server discovery...")
+		if err := p2p.ServiceDiscoverer(ctx, n, token, p2p.NetworkID(networkID, p2p.WorkerID), func(serviceID string, node schema.NodeData) {
+			var tunnelAddresses []string
+			for _, v := range p2p.GetAvailableNodes(p2p.NetworkID(networkID, p2p.WorkerID)) {
+				if v.IsOnline() {
+					tunnelAddresses = append(tunnelAddresses, v.TunnelAddress)
+				} else {
+					log.Info().Msgf("Node %s is offline", v.ID)
+				}
+			}
+			tunnelEnvVar := strings.Join(tunnelAddresses, ",")
+
+			os.Setenv("LLAMACPP_GRPC_SERVERS", tunnelEnvVar)
+			log.Debug().Msgf("setting LLAMACPP_GRPC_SERVERS to %s", tunnelEnvVar)
+		}, true); err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
--- a/core/cli/run.go
+++ b/core/cli/run.go
@@ -8,6 +8,7 @@ import (
 	"time"

 	"github.com/mudler/LocalAI/core/application"
+	cli_api "github.com/mudler/LocalAI/core/cli/api"
 	cliContext "github.com/mudler/LocalAI/core/cli/context"
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/http"
@@ -51,7 +52,6 @@ type RunCMD struct {
 	UploadLimit                        int      `env:"LOCALAI_UPLOAD_LIMIT,UPLOAD_LIMIT" default:"15" help:"Default upload-limit in MB" group:"api"`
 	APIKeys                            []string `env:"LOCALAI_API_KEY,API_KEY" help:"List of API Keys to enable API authentication. When this is set, all the requests must be authenticated with one of these API keys" group:"api"`
 	DisableWebUI                       bool     `env:"LOCALAI_DISABLE_WEBUI,DISABLE_WEBUI" default:"false" help:"Disables the web user interface. When set to true, the server will only expose API endpoints without serving the web interface" group:"api"`
-	DisableRuntimeSettings             bool     `env:"LOCALAI_DISABLE_RUNTIME_SETTINGS,DISABLE_RUNTIME_SETTINGS" default:"false" help:"Disables the runtime settings. When set to true, the server will not load the runtime settings from the runtime_settings.json file" group:"api"`
 	DisablePredownloadScan             bool     `env:"LOCALAI_DISABLE_PREDOWNLOAD_SCAN" help:"If true, disables the best-effort security scanner before downloading any files." group:"hardening" default:"false"`
 	OpaqueErrors                       bool     `env:"LOCALAI_OPAQUE_ERRORS" default:"false" help:"If true, all error responses are replaced with blank 500 errors. This is intended only for hardening against information leaks and is normally not recommended." group:"hardening"`
 	UseSubtleKeyComparison             bool     `env:"LOCALAI_SUBTLE_KEY_COMPARISON" default:"false" help:"If true, API Key validation comparisons will be performed using constant-time comparisons rather than simple equality. This trades off performance on each request for resiliancy against timing attacks." group:"hardening"`
@@ -98,7 +98,6 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
 	}

 	opts := []config.AppOption{
-		config.WithContext(context.Background()),
 		config.WithConfigFile(r.ModelsConfigFile),
 		config.WithJSONStringPreload(r.PreloadModels),
 		config.WithYAMLConfigPreload(r.PreloadModelsConfig),
@@ -129,22 +128,12 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
 		config.WithLoadToMemory(r.LoadToMemory),
 		config.WithMachineTag(r.MachineTag),
 		config.WithAPIAddress(r.Address),
-		config.WithTunnelCallback(func(tunnels []string) {
-			tunnelEnvVar := strings.Join(tunnels, ",")
-			// TODO: this is very specific to llama.cpp, we should have a more generic way to set the environment variable
-			os.Setenv("LLAMACPP_GRPC_SERVERS", tunnelEnvVar)
-			log.Debug().Msgf("setting LLAMACPP_GRPC_SERVERS to %s", tunnelEnvVar)
-		}),
 	}

 	if r.DisableMetricsEndpoint {
 		opts = append(opts, config.DisableMetricsEndpoint)
 	}

-	if r.DisableRuntimeSettings {
-		opts = append(opts, config.DisableRuntimeSettings)
-	}
-
 	token := ""
 	if r.Peer2Peer || r.Peer2PeerToken != "" {
 		log.Info().Msg("P2P mode enabled")
@@ -163,9 +152,7 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
 		opts = append(opts, config.WithP2PToken(token))
 	}

-	if r.Federated {
-		opts = append(opts, config.EnableFederated)
-	}
+	backgroundCtx := context.Background()

 	idleWatchDog := r.EnableWatchdogIdle
 	busyWatchDog := r.EnableWatchdogBusy
@@ -235,10 +222,8 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
 		return err
 	}

-	if token != "" {
-		if err := app.StartP2P(); err != nil {
-			return err
-		}
+	if err := cli_api.StartP2PStack(backgroundCtx, r.Address, token, r.Peer2PeerNetworkID, r.Federated, app); err != nil {
+		return err
 	}

 	signals.RegisterGracefulTerminationHandler(func() {
--- a/core/config/application_config.go
+++ b/core/config/application_config.go
@@ -33,7 +33,6 @@ type ApplicationConfig struct {
 	ApiKeys                       []string
 	P2PToken                      string
 	P2PNetworkID                  string
-	Federated                     bool

 	DisableWebUI                       bool
 	EnforcePredownloadScans            bool
@@ -66,10 +65,6 @@ type ApplicationConfig struct {
 	MachineTag string

 	APIAddress string
-
-	TunnelCallback func(tunnels []string)
-
-	DisableRuntimeSettings bool
 }

 type AppOption func(*ApplicationConfig)
@@ -78,6 +73,7 @@ func NewApplicationConfig(o ...AppOption) *ApplicationConfig {
 	opt := &ApplicationConfig{
 		Context:       context.Background(),
 		UploadLimitMB: 15,
+		ContextSize:   512,
 		Debug:         true,
 	}
 	for _, oo := range o {
@@ -156,10 +152,6 @@ var DisableWebUI = func(o *ApplicationConfig) {
 	o.DisableWebUI = true
 }

-var DisableRuntimeSettings = func(o *ApplicationConfig) {
-	o.DisableRuntimeSettings = true
-}
-
 func SetWatchDogBusyTimeout(t time.Duration) AppOption {
 	return func(o *ApplicationConfig) {
 		o.WatchDogBusyTimeout = t
@@ -188,10 +180,6 @@ var EnableBackendGalleriesAutoload = func(o *ApplicationConfig) {
 	o.AutoloadBackendGalleries = true
 }

-var EnableFederated = func(o *ApplicationConfig) {
-	o.Federated = true
-}
-
 func WithExternalBackend(name string, uri string) AppOption {
 	return func(o *ApplicationConfig) {
 		if o.ExternalGRPCBackends == nil {
@@ -285,12 +273,6 @@ func WithContextSize(ctxSize int) AppOption {
 	}
 }

-func WithTunnelCallback(callback func(tunnels []string)) AppOption {
-	return func(o *ApplicationConfig) {
-		o.TunnelCallback = callback
-	}
-}
-
 func WithF16(f16 bool) AppOption {
 	return func(o *ApplicationConfig) {
 		o.F16 = f16
--- a/core/config/model_config.go
+++ b/core/config/model_config.go
@@ -1,7 +1,6 @@
 package config

 import (
-	"fmt"
 	"os"
 	"regexp"
 	"slices"
@@ -476,7 +475,7 @@ func (cfg *ModelConfig) SetDefaults(opts ...ConfigLoaderOption) {
 	cfg.syncKnownUsecasesFromString()
 }

-func (c *ModelConfig) Validate() (bool, error) {
+func (c *ModelConfig) Validate() bool {
 	downloadedFileNames := []string{}
 	for _, f := range c.DownloadFiles {
 		downloadedFileNames = append(downloadedFileNames, f.Filename)
@@ -490,20 +489,17 @@ func (c *ModelConfig) Validate() (bool, error) {
 		}
 		if strings.HasPrefix(n, string(os.PathSeparator)) ||
 			strings.Contains(n, "..") {
-			return false, fmt.Errorf("invalid file path: %s", n)
+			return false
 		}
 	}

 	if c.Backend != "" {
 		// a regex that checks that is a string name with no special characters, except '-' and '_'
 		re := regexp.MustCompile(`^[a-zA-Z0-9-_]+$`)
-		if !re.MatchString(c.Backend) {
-			return false, fmt.Errorf("invalid backend name: %s", c.Backend)
-		}
-		return true, nil
+		return re.MatchString(c.Backend)
 	}

-	return true, nil
+	return true
 }

 func (c *ModelConfig) HasTemplate() bool {
@@ -538,8 +534,7 @@ const (

 func GetAllModelConfigUsecases() map[string]ModelConfigUsecases {
 	return map[string]ModelConfigUsecases{
-		// Note: FLAG_ANY is intentionally excluded from this map
-		// because it's 0 and would always match in HasUsecases checks
+		"FLAG_ANY":              FLAG_ANY,
 		"FLAG_CHAT":             FLAG_CHAT,
 		"FLAG_COMPLETION":       FLAG_COMPLETION,
 		"FLAG_EDIT":             FLAG_EDIT,
@@ -641,7 +636,7 @@ func (c *ModelConfig) GuessUsecases(u ModelConfigUsecases) bool {
 		}
 	}
 	if (u & FLAG_TTS) == FLAG_TTS {
-		ttsBackends := []string{"bark-cpp", "piper", "transformers-musicgen", "kokoro"}
+		ttsBackends := []string{"bark-cpp", "piper", "transformers-musicgen"}
 		if !slices.Contains(ttsBackends, c.Backend) {
 			return false
 		}
--- a/core/config/model_config_loader.go
+++ b/core/config/model_config_loader.go
@@ -169,7 +169,7 @@ func (bcl *ModelConfigLoader) LoadMultipleModelConfigsSingleFile(file string, op
 	}

 	for _, cc := range c {
-		if valid, _ := cc.Validate(); valid {
+		if cc.Validate() {
 			bcl.configs[cc.Name] = *cc
 		}
 	}
@@ -184,7 +184,7 @@ func (bcl *ModelConfigLoader) ReadModelConfig(file string, opts ...ConfigLoaderO
 		return fmt.Errorf("ReadModelConfig cannot read config file %q: %w", file, err)
 	}

-	if valid, _ := c.Validate(); valid {
+	if c.Validate() {
 		bcl.configs[c.Name] = *c
 	} else {
 		return fmt.Errorf("config is not valid")
@@ -362,7 +362,7 @@ func (bcl *ModelConfigLoader) LoadModelConfigsFromPath(path string, opts ...Conf
 			log.Error().Err(err).Str("File Name", file.Name()).Msgf("LoadModelConfigsFromPath cannot read config file")
 			continue
 		}
-		if valid, _ := c.Validate(); valid {
+		if c.Validate() {
 			bcl.configs[c.Name] = *c
 		} else {
 			log.Error().Err(err).Str("Name", c.Name).Msgf("config is not valid")
--- a/core/config/model_config_test.go
+++ b/core/config/model_config_test.go
@@ -28,9 +28,7 @@ known_usecases:
 			config, err := readModelConfigFromFile(tmp.Name())
 			Expect(err).To(BeNil())
 			Expect(config).ToNot(BeNil())
-			valid, err := config.Validate()
-			Expect(err).To(HaveOccurred())
-			Expect(valid).To(BeFalse())
+			Expect(config.Validate()).To(BeFalse())
 			Expect(config.KnownUsecases).ToNot(BeNil())
 		})
 		It("Test Validate", func() {
@@ -48,9 +46,7 @@ parameters:
 			Expect(config).ToNot(BeNil())
 			// two configs in config.yaml
 			Expect(config.Name).To(Equal("bar-baz"))
-			valid, err := config.Validate()
-			Expect(err).To(BeNil())
-			Expect(valid).To(BeTrue())
+			Expect(config.Validate()).To(BeTrue())

 			// download https://raw.githubusercontent.com/mudler/LocalAI/v2.25.0/embedded/models/hermes-2-pro-mistral.yaml
 			httpClient := http.Client{}
@@ -67,9 +63,7 @@ parameters:
 			Expect(config).ToNot(BeNil())
 			// two configs in config.yaml
 			Expect(config.Name).To(Equal("hermes-2-pro-mistral"))
-			valid, err = config.Validate()
-			Expect(err).To(BeNil())
-			Expect(valid).To(BeTrue())
+			Expect(config.Validate()).To(BeTrue())
 		})
 	})
 	It("Properly handles backend usecase matching", func() {
--- a/core/gallery/backends.go
+++ b/core/gallery/backends.go
@@ -164,7 +164,7 @@ func InstallBackend(ctx context.Context, systemState *system.SystemState, modelL
 			return fmt.Errorf("failed copying: %w", err)
 		}
 	} else {
-		log.Debug().Str("uri", config.URI).Str("backendPath", backendPath).Msg("Downloading backend")
+		uri := downloader.URI(config.URI)
 		if err := uri.DownloadFileWithContext(ctx, backendPath, "", 1, 1, downloadStatus); err != nil {
 			success := false
 			// Try to download from mirrors
@@ -177,27 +177,16 @@ func InstallBackend(ctx context.Context, systemState *system.SystemState, modelL
 				}
 				if err := downloader.URI(mirror).DownloadFileWithContext(ctx, backendPath, "", 1, 1, downloadStatus); err == nil {
 					success = true
-					log.Debug().Str("uri", config.URI).Str("backendPath", backendPath).Msg("Downloaded backend")
 					break
 				}
 			}

 			if !success {
-				log.Error().Str("uri", config.URI).Str("backendPath", backendPath).Err(err).Msg("Failed to download backend")
 				return fmt.Errorf("failed to download backend %q: %v", config.URI, err)
 			}
-		} else {
-			log.Debug().Str("uri", config.URI).Str("backendPath", backendPath).Msg("Downloaded backend")
 		}
 	}

-	// sanity check - check if runfile is present
-	runFile := filepath.Join(backendPath, runFile)
-	if _, err := os.Stat(runFile); os.IsNotExist(err) {
-		log.Error().Str("runFile", runFile).Msg("Run file not found")
-		return fmt.Errorf("not a valid backend: run file not found %q", runFile)
-	}
-
 	// Create metadata for the backend
 	metadata := &BackendMetadata{
 		Name:        name,
--- a/core/gallery/backends_test.go
+++ b/core/gallery/backends_test.go
@@ -563,8 +563,8 @@ var _ = Describe("Gallery Backends", func() {
 			)
 			Expect(err).NotTo(HaveOccurred())
 			err = InstallBackend(context.TODO(), systemState, ml, &backend, nil)
-			Expect(newPath).To(BeADirectory())
 			Expect(err).To(HaveOccurred()) // Will fail due to invalid URI, but path should be created
+			Expect(newPath).To(BeADirectory())
 		})

 		It("should overwrite existing backend", func() {
--- a/core/gallery/gallery.go
+++ b/core/gallery/gallery.go
@@ -6,13 +6,11 @@ import (
 	"os"
 	"path/filepath"
 	"strings"
-	"time"

 	"github.com/lithammer/fuzzysearch/fuzzy"
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/pkg/downloader"
 	"github.com/mudler/LocalAI/pkg/system"
-	"github.com/mudler/LocalAI/pkg/xsync"
 	"github.com/rs/zerolog/log"

 	"gopkg.in/yaml.v2"
@@ -21,7 +19,7 @@ import (
 func GetGalleryConfigFromURL[T any](url string, basePath string) (T, error) {
 	var config T
 	uri := downloader.URI(url)
-	err := uri.ReadWithCallback(basePath, func(url string, d []byte) error {
+	err := uri.DownloadWithCallback(basePath, func(url string, d []byte) error {
 		return yaml.Unmarshal(d, &config)
 	})
 	if err != nil {
@@ -34,7 +32,7 @@ func GetGalleryConfigFromURL[T any](url string, basePath string) (T, error) {
 func GetGalleryConfigFromURLWithContext[T any](ctx context.Context, url string, basePath string) (T, error) {
 	var config T
 	uri := downloader.URI(url)
-	err := uri.ReadWithAuthorizationAndCallback(ctx, basePath, "", func(url string, d []byte) error {
+	err := uri.DownloadWithAuthorizationAndCallback(ctx, basePath, "", func(url string, d []byte) error {
 		return yaml.Unmarshal(d, &config)
 	})
 	if err != nil {
@@ -143,7 +141,7 @@ func AvailableGalleryModels(galleries []config.Gallery, systemState *system.Syst

 	// Get models from galleries
 	for _, gallery := range galleries {
-		galleryModels, err := getGalleryElements(gallery, systemState.Model.ModelsPath, func(model *GalleryModel) bool {
+		galleryModels, err := getGalleryElements[*GalleryModel](gallery, systemState.Model.ModelsPath, func(model *GalleryModel) bool {
 			if _, err := os.Stat(filepath.Join(systemState.Model.ModelsPath, fmt.Sprintf("%s.yaml", model.GetName()))); err == nil {
 				return true
 			}
@@ -184,7 +182,7 @@ func AvailableBackends(galleries []config.Gallery, systemState *system.SystemSta
 func findGalleryURLFromReferenceURL(url string, basePath string) (string, error) {
 	var refFile string
 	uri := downloader.URI(url)
-	err := uri.ReadWithCallback(basePath, func(url string, d []byte) error {
+	err := uri.DownloadWithCallback(basePath, func(url string, d []byte) error {
 		refFile = string(d)
 		if len(refFile) == 0 {
 			return fmt.Errorf("invalid reference file at url %s: %s", url, d)
@@ -196,17 +194,6 @@ func findGalleryURLFromReferenceURL(url string, basePath string) (string, error)
 	return refFile, err
 }

-type galleryCacheEntry struct {
-	yamlEntry   []byte
-	lastUpdated time.Time
-}
-
-func (entry galleryCacheEntry) hasExpired() bool {
-	return entry.lastUpdated.Before(time.Now().Add(-1 * time.Hour))
-}
-
-var galleryCache = xsync.NewSyncedMap[string, galleryCacheEntry]()
-
 func getGalleryElements[T GalleryElement](gallery config.Gallery, basePath string, isInstalledCallback func(T) bool) ([]T, error) {
 	var models []T = []T{}

@@ -217,37 +204,16 @@ func getGalleryElements[T GalleryElement](gallery config.Gallery, basePath strin
 			return models, err
 		}
 	}
-
-	cacheKey := fmt.Sprintf("%s-%s", gallery.Name, gallery.URL)
-	if galleryCache.Exists(cacheKey) {
-		entry := galleryCache.Get(cacheKey)
-		// refresh if last updated is more than 1 hour ago
-		if !entry.hasExpired() {
-			err := yaml.Unmarshal(entry.yamlEntry, &models)
-			if err != nil {
-				return models, err
-			}
-		} else {
-			galleryCache.Delete(cacheKey)
-		}
-	}
-
 	uri := downloader.URI(gallery.URL)

-	if len(models) == 0 {
-		err := uri.ReadWithCallback(basePath, func(url string, d []byte) error {
-			galleryCache.Set(cacheKey, galleryCacheEntry{
-				yamlEntry:   d,
-				lastUpdated: time.Now(),
-			})
-			return yaml.Unmarshal(d, &models)
-		})
-		if err != nil {
-			if yamlErr, ok := err.(*yaml.TypeError); ok {
-				log.Debug().Msgf("YAML errors: %s\n\nwreckage of models: %+v", strings.Join(yamlErr.Errors, "\n"), models)
-			}
-			return models, fmt.Errorf("failed to read gallery elements: %w", err)
+	err := uri.DownloadWithCallback(basePath, func(url string, d []byte) error {
+		return yaml.Unmarshal(d, &models)
+	})
+	if err != nil {
+		if yamlErr, ok := err.(*yaml.TypeError); ok {
+			log.Debug().Msgf("YAML errors: %s\n\nwreckage of models: %+v", strings.Join(yamlErr.Errors, "\n"), models)
 		}
+		return models, err
 	}

 	// Add gallery to models
--- a/core/gallery/importers/diffuser.go
+++ b/core/gallery/importers/diffuser.go
@@ -1,121 +0,0 @@
-package importers
-
-import (
-	"encoding/json"
-	"path/filepath"
-	"strings"
-
-	"github.com/mudler/LocalAI/core/config"
-	"github.com/mudler/LocalAI/core/gallery"
-	"github.com/mudler/LocalAI/core/schema"
-	"gopkg.in/yaml.v3"
-)
-
-var _ Importer = &DiffuserImporter{}
-
-type DiffuserImporter struct{}
-
-func (i *DiffuserImporter) Match(details Details) bool {
-	preferences, err := details.Preferences.MarshalJSON()
-	if err != nil {
-		return false
-	}
-	preferencesMap := make(map[string]any)
-	err = json.Unmarshal(preferences, &preferencesMap)
-	if err != nil {
-		return false
-	}
-
-	b, ok := preferencesMap["backend"].(string)
-	if ok && b == "diffusers" {
-		return true
-	}
-
-	if details.HuggingFace != nil {
-		for _, file := range details.HuggingFace.Files {
-			if strings.Contains(file.Path, "model_index.json") ||
-				strings.Contains(file.Path, "scheduler/scheduler_config.json") {
-				return true
-			}
-		}
-	}
-
-	return false
-}
-
-func (i *DiffuserImporter) Import(details Details) (gallery.ModelConfig, error) {
-	preferences, err := details.Preferences.MarshalJSON()
-	if err != nil {
-		return gallery.ModelConfig{}, err
-	}
-	preferencesMap := make(map[string]any)
-	err = json.Unmarshal(preferences, &preferencesMap)
-	if err != nil {
-		return gallery.ModelConfig{}, err
-	}
-
-	name, ok := preferencesMap["name"].(string)
-	if !ok {
-		name = filepath.Base(details.URI)
-	}
-
-	description, ok := preferencesMap["description"].(string)
-	if !ok {
-		description = "Imported from " + details.URI
-	}
-
-	backend := "diffusers"
-	b, ok := preferencesMap["backend"].(string)
-	if ok {
-		backend = b
-	}
-
-	pipelineType, ok := preferencesMap["pipeline_type"].(string)
-	if !ok {
-		pipelineType = "StableDiffusionPipeline"
-	}
-
-	schedulerType, ok := preferencesMap["scheduler_type"].(string)
-	if !ok {
-		schedulerType = ""
-	}
-
-	enableParameters, ok := preferencesMap["enable_parameters"].(string)
-	if !ok {
-		enableParameters = "negative_prompt,num_inference_steps"
-	}
-
-	cuda := false
-	if cudaVal, ok := preferencesMap["cuda"].(bool); ok {
-		cuda = cudaVal
-	}
-
-	modelConfig := config.ModelConfig{
-		Name:                name,
-		Description:         description,
-		KnownUsecaseStrings: []string{"image"},
-		Backend:             backend,
-		PredictionOptions: schema.PredictionOptions{
-			BasicModelRequest: schema.BasicModelRequest{
-				Model: details.URI,
-			},
-		},
-		Diffusers: config.Diffusers{
-			PipelineType:     pipelineType,
-			SchedulerType:    schedulerType,
-			EnableParameters: enableParameters,
-			CUDA:             cuda,
-		},
-	}
-
-	data, err := yaml.Marshal(modelConfig)
-	if err != nil {
-		return gallery.ModelConfig{}, err
-	}
-
-	return gallery.ModelConfig{
-		Name:        name,
-		Description: description,
-		ConfigFile:  string(data),
-	}, nil
-}
--- a/core/gallery/importers/diffuser_test.go
+++ b/core/gallery/importers/diffuser_test.go
@@ -1,246 +0,0 @@
-package importers_test
-
-import (
-	"encoding/json"
-
-	"github.com/mudler/LocalAI/core/gallery/importers"
-	. "github.com/mudler/LocalAI/core/gallery/importers"
-	hfapi "github.com/mudler/LocalAI/pkg/huggingface-api"
-	. "github.com/onsi/ginkgo/v2"
-	. "github.com/onsi/gomega"
-)
-
-var _ = Describe("DiffuserImporter", func() {
-	var importer *DiffuserImporter
-
-	BeforeEach(func() {
-		importer = &DiffuserImporter{}
-	})
-
-	Context("Match", func() {
-		It("should match when backend preference is diffusers", func() {
-			preferences := json.RawMessage(`{"backend": "diffusers"}`)
-			details := Details{
-				URI:         "https://example.com/model",
-				Preferences: preferences,
-			}
-
-			result := importer.Match(details)
-			Expect(result).To(BeTrue())
-		})
-
-		It("should match when HuggingFace details contain model_index.json", func() {
-			hfDetails := &hfapi.ModelDetails{
-				Files: []hfapi.ModelFile{
-					{Path: "model_index.json"},
-				},
-			}
-			details := Details{
-				URI:         "https://huggingface.co/test/model",
-				HuggingFace: hfDetails,
-			}
-
-			result := importer.Match(details)
-			Expect(result).To(BeTrue())
-		})
-
-		It("should match when HuggingFace details contain scheduler config", func() {
-			hfDetails := &hfapi.ModelDetails{
-				Files: []hfapi.ModelFile{
-					{Path: "scheduler/scheduler_config.json"},
-				},
-			}
-			details := Details{
-				URI:         "https://huggingface.co/test/model",
-				HuggingFace: hfDetails,
-			}
-
-			result := importer.Match(details)
-			Expect(result).To(BeTrue())
-		})
-
-		It("should not match when URI has no diffuser files and no backend preference", func() {
-			details := Details{
-				URI: "https://example.com/model.bin",
-			}
-
-			result := importer.Match(details)
-			Expect(result).To(BeFalse())
-		})
-
-		It("should not match when backend preference is different", func() {
-			preferences := json.RawMessage(`{"backend": "llama-cpp"}`)
-			details := Details{
-				URI:         "https://example.com/model",
-				Preferences: preferences,
-			}
-
-			result := importer.Match(details)
-			Expect(result).To(BeFalse())
-		})
-
-		It("should return false when JSON preferences are invalid", func() {
-			preferences := json.RawMessage(`invalid json`)
-			details := Details{
-				URI:         "https://example.com/model",
-				Preferences: preferences,
-			}
-
-			result := importer.Match(details)
-			Expect(result).To(BeFalse())
-		})
-	})
-
-	Context("Import", func() {
-		It("should import model config with default name and description", func() {
-			details := Details{
-				URI: "https://huggingface.co/test/my-diffuser-model",
-			}
-
-			modelConfig, err := importer.Import(details)
-
-			Expect(err).ToNot(HaveOccurred())
-			Expect(modelConfig.Name).To(Equal("my-diffuser-model"))
-			Expect(modelConfig.Description).To(Equal("Imported from https://huggingface.co/test/my-diffuser-model"))
-			Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: diffusers"))
-			Expect(modelConfig.ConfigFile).To(ContainSubstring("model: https://huggingface.co/test/my-diffuser-model"))
-			Expect(modelConfig.ConfigFile).To(ContainSubstring("pipeline_type: StableDiffusionPipeline"))
-			Expect(modelConfig.ConfigFile).To(ContainSubstring("enable_parameters: negative_prompt,num_inference_steps"))
-		})
-
-		It("should import model config with custom name and description from preferences", func() {
-			preferences := json.RawMessage(`{"name": "custom-diffuser", "description": "Custom diffuser model"}`)
-			details := Details{
-				URI:         "https://huggingface.co/test/my-model",
-				Preferences: preferences,
-			}
-
-			modelConfig, err := importer.Import(details)
-
-			Expect(err).ToNot(HaveOccurred())
-			Expect(modelConfig.Name).To(Equal("custom-diffuser"))
-			Expect(modelConfig.Description).To(Equal("Custom diffuser model"))
-		})
-
-		It("should use custom pipeline_type from preferences", func() {
-			preferences := json.RawMessage(`{"pipeline_type": "StableDiffusion3Pipeline"}`)
-			details := Details{
-				URI:         "https://huggingface.co/test/my-model",
-				Preferences: preferences,
-			}
-
-			modelConfig, err := importer.Import(details)
-
-			Expect(err).ToNot(HaveOccurred())
-			Expect(modelConfig.ConfigFile).To(ContainSubstring("pipeline_type: StableDiffusion3Pipeline"))
-		})
-
-		It("should use default pipeline_type when not specified", func() {
-			details := Details{
-				URI: "https://huggingface.co/test/my-model",
-			}
-
-			modelConfig, err := importer.Import(details)
-
-			Expect(err).ToNot(HaveOccurred())
-			Expect(modelConfig.ConfigFile).To(ContainSubstring("pipeline_type: StableDiffusionPipeline"))
-		})
-
-		It("should use custom scheduler_type from preferences", func() {
-			preferences := json.RawMessage(`{"scheduler_type": "k_dpmpp_2m"}`)
-			details := Details{
-				URI:         "https://huggingface.co/test/my-model",
-				Preferences: preferences,
-			}
-
-			modelConfig, err := importer.Import(details)
-
-			Expect(err).ToNot(HaveOccurred())
-			Expect(modelConfig.ConfigFile).To(ContainSubstring("scheduler_type: k_dpmpp_2m"))
-		})
-
-		It("should use cuda setting from preferences", func() {
-			preferences := json.RawMessage(`{"cuda": true}`)
-			details := Details{
-				URI:         "https://huggingface.co/test/my-model",
-				Preferences: preferences,
-			}
-
-			modelConfig, err := importer.Import(details)
-
-			Expect(err).ToNot(HaveOccurred())
-			Expect(modelConfig.ConfigFile).To(ContainSubstring("cuda: true"))
-		})
-
-		It("should use custom enable_parameters from preferences", func() {
-			preferences := json.RawMessage(`{"enable_parameters": "num_inference_steps,guidance_scale"}`)
-			details := Details{
-				URI:         "https://huggingface.co/test/my-model",
-				Preferences: preferences,
-			}
-
-			modelConfig, err := importer.Import(details)
-
-			Expect(err).ToNot(HaveOccurred())
-			Expect(modelConfig.ConfigFile).To(ContainSubstring("enable_parameters: num_inference_steps,guidance_scale"))
-		})
-
-		It("should use custom backend from preferences", func() {
-			preferences := json.RawMessage(`{"backend": "diffusers"}`)
-			details := Details{
-				URI:         "https://huggingface.co/test/my-model",
-				Preferences: preferences,
-			}
-
-			modelConfig, err := importer.Import(details)
-
-			Expect(err).ToNot(HaveOccurred())
-			Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: diffusers"))
-		})
-
-		It("should handle invalid JSON preferences", func() {
-			preferences := json.RawMessage(`invalid json`)
-			details := Details{
-				URI:         "https://huggingface.co/test/my-model",
-				Preferences: preferences,
-			}
-
-			_, err := importer.Import(details)
-			Expect(err).To(HaveOccurred())
-		})
-
-		It("should extract filename correctly from URI with path", func() {
-			details := importers.Details{
-				URI: "https://huggingface.co/test/path/to/model",
-			}
-
-			modelConfig, err := importer.Import(details)
-
-			Expect(err).ToNot(HaveOccurred())
-			Expect(modelConfig.Name).To(Equal("model"))
-		})
-
-		It("should include known_usecases as image in config", func() {
-			details := Details{
-				URI: "https://huggingface.co/test/my-model",
-			}
-
-			modelConfig, err := importer.Import(details)
-
-			Expect(err).ToNot(HaveOccurred())
-			Expect(modelConfig.ConfigFile).To(ContainSubstring("known_usecases:"))
-			Expect(modelConfig.ConfigFile).To(ContainSubstring("- image"))
-		})
-
-		It("should include diffusers configuration in config", func() {
-			details := Details{
-				URI: "https://huggingface.co/test/my-model",
-			}
-
-			modelConfig, err := importer.Import(details)
-
-			Expect(err).ToNot(HaveOccurred())
-			Expect(modelConfig.ConfigFile).To(ContainSubstring("diffusers:"))
-		})
-	})
-})
--- a/core/gallery/importers/importers.go
+++ b/core/gallery/importers/importers.go
@@ -2,16 +2,11 @@ package importers

 import (
 	"encoding/json"
-	"fmt"
-	"os"
 	"strings"

 	"github.com/rs/zerolog/log"
-	"gopkg.in/yaml.v3"

-	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/gallery"
-	"github.com/mudler/LocalAI/pkg/downloader"
 	hfapi "github.com/mudler/LocalAI/pkg/huggingface-api"
 )

@@ -20,7 +15,6 @@ var defaultImporters = []Importer{
 	&MLXImporter{},
 	&VLLMImporter{},
 	&TransformersImporter{},
-	&DiffuserImporter{},
 }

 type Details struct {
@@ -34,10 +28,6 @@ type Importer interface {
 	Import(details Details) (gallery.ModelConfig, error)
 }

-func hasYAMLExtension(uri string) bool {
-	return strings.HasSuffix(uri, ".yaml") || strings.HasSuffix(uri, ".yml")
-}
-
 func DiscoverModelConfig(uri string, preferences json.RawMessage) (gallery.ModelConfig, error) {
 	var err error
 	var modelConfig gallery.ModelConfig
@@ -52,61 +42,20 @@ func DiscoverModelConfig(uri string, preferences json.RawMessage) (gallery.Model
 	if err != nil {
 		// maybe not a HF repository
 		// TODO: maybe we can check if the URI is a valid HF repository
-		log.Debug().Str("uri", uri).Str("hfrepoID", hfrepoID).Msg("Failed to get model details, maybe not a HF repository")
+		log.Debug().Str("uri", uri).Msg("Failed to get model details, maybe not a HF repository")
 	} else {
 		log.Debug().Str("uri", uri).Msg("Got model details")
 		log.Debug().Any("details", hfDetails).Msg("Model details")
 	}

-	// handle local config files ("/my-model.yaml" or "file://my-model.yaml")
-	localURI := uri
-	if strings.HasPrefix(uri, downloader.LocalPrefix) {
-		localURI = strings.TrimPrefix(uri, downloader.LocalPrefix)
-	}
-
-	// if a file exists or it's an url that ends with .yaml or .yml, read the config file directly
-	if _, e := os.Stat(localURI); hasYAMLExtension(localURI) && (e == nil || downloader.URI(localURI).LooksLikeURL()) {
-		var modelYAML []byte
-		if downloader.URI(localURI).LooksLikeURL() {
-			err := downloader.URI(localURI).ReadWithCallback(localURI, func(url string, i []byte) error {
-				modelYAML = i
-				return nil
-			})
-			if err != nil {
-				log.Error().Err(err).Str("filepath", localURI).Msg("error reading model definition")
-				return gallery.ModelConfig{}, err
-			}
-		} else {
-			modelYAML, err = os.ReadFile(localURI)
-			if err != nil {
-				log.Error().Err(err).Str("filepath", localURI).Msg("error reading model definition")
-				return gallery.ModelConfig{}, err
-			}
-		}
-
-		var modelConfig config.ModelConfig
-		if e := yaml.Unmarshal(modelYAML, &modelConfig); e != nil {
-			return gallery.ModelConfig{}, e
-		}
-
-		configFile, err := yaml.Marshal(modelConfig)
-		return gallery.ModelConfig{
-			Description: modelConfig.Description,
-			Name:        modelConfig.Name,
-			ConfigFile:  string(configFile),
-		}, err
-	}
-
 	details := Details{
 		HuggingFace: hfDetails,
 		URI:         uri,
 		Preferences: preferences,
 	}

-	importerMatched := false
 	for _, importer := range defaultImporters {
 		if importer.Match(details) {
-			importerMatched = true
 			modelConfig, err = importer.Import(details)
 			if err != nil {
 				continue
@@ -114,8 +63,5 @@ func DiscoverModelConfig(uri string, preferences json.RawMessage) (gallery.Model
 			break
 		}
 	}
-	if !importerMatched {
-		return gallery.ModelConfig{}, fmt.Errorf("no importer matched for %s", uri)
-	}
-	return modelConfig, nil
+	return modelConfig, err
 }
--- a/core/gallery/importers/importers_test.go
+++ b/core/gallery/importers/importers_test.go
@@ -3,8 +3,6 @@ package importers_test
 import (
 	"encoding/json"
 	"fmt"
-	"os"
-	"path/filepath"

 	"github.com/mudler/LocalAI/core/gallery/importers"
 	. "github.com/onsi/ginkgo/v2"
@@ -214,139 +212,4 @@ var _ = Describe("DiscoverModelConfig", func() {
 			Expect(modelConfig.Name).To(BeEmpty())
 		})
 	})
-
-	Context("with local YAML config files", func() {
-		var tempDir string
-
-		BeforeEach(func() {
-			var err error
-			tempDir, err = os.MkdirTemp("", "importers-test-*")
-			Expect(err).ToNot(HaveOccurred())
-		})
-
-		AfterEach(func() {
-			os.RemoveAll(tempDir)
-		})
-
-		It("should read local YAML file with file:// prefix", func() {
-			yamlContent := `name: test-model
-backend: llama-cpp
-description: Test model from local YAML
-parameters:
-  model: /path/to/model.gguf
-  temperature: 0.7
-`
-			yamlFile := filepath.Join(tempDir, "test-model.yaml")
-			err := os.WriteFile(yamlFile, []byte(yamlContent), 0644)
-			Expect(err).ToNot(HaveOccurred())
-
-			uri := "file://" + yamlFile
-			preferences := json.RawMessage(`{}`)
-
-			modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
-
-			Expect(err).ToNot(HaveOccurred())
-			Expect(modelConfig.Name).To(Equal("test-model"))
-			Expect(modelConfig.Description).To(Equal("Test model from local YAML"))
-			Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: llama-cpp"))
-			Expect(modelConfig.ConfigFile).To(ContainSubstring("name: test-model"))
-		})
-
-		It("should read local YAML file without file:// prefix (direct path)", func() {
-			yamlContent := `name: direct-path-model
-backend: mlx
-description: Test model from direct path
-parameters:
-  model: /path/to/model.safetensors
-`
-			yamlFile := filepath.Join(tempDir, "direct-model.yaml")
-			err := os.WriteFile(yamlFile, []byte(yamlContent), 0644)
-			Expect(err).ToNot(HaveOccurred())
-
-			uri := yamlFile
-			preferences := json.RawMessage(`{}`)
-
-			modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
-
-			Expect(err).ToNot(HaveOccurred())
-			Expect(modelConfig.Name).To(Equal("direct-path-model"))
-			Expect(modelConfig.Description).To(Equal("Test model from direct path"))
-			Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: mlx"))
-		})
-
-		It("should read local YAML file with .yml extension", func() {
-			yamlContent := `name: yml-extension-model
-backend: transformers
-description: Test model with .yml extension
-parameters:
-  model: /path/to/model
-`
-			yamlFile := filepath.Join(tempDir, "test-model.yml")
-			err := os.WriteFile(yamlFile, []byte(yamlContent), 0644)
-			Expect(err).ToNot(HaveOccurred())
-
-			uri := "file://" + yamlFile
-			preferences := json.RawMessage(`{}`)
-
-			modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
-
-			Expect(err).ToNot(HaveOccurred())
-			Expect(modelConfig.Name).To(Equal("yml-extension-model"))
-			Expect(modelConfig.Description).To(Equal("Test model with .yml extension"))
-			Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: transformers"))
-		})
-
-		It("should ignore preferences when reading YAML files directly", func() {
-			yamlContent := `name: yaml-model
-backend: llama-cpp
-description: Original description
-parameters:
-  model: /path/to/model.gguf
-`
-			yamlFile := filepath.Join(tempDir, "prefs-test.yaml")
-			err := os.WriteFile(yamlFile, []byte(yamlContent), 0644)
-			Expect(err).ToNot(HaveOccurred())
-
-			uri := "file://" + yamlFile
-			// Preferences should be ignored when reading YAML directly
-			preferences := json.RawMessage(`{"name": "custom-name", "description": "Custom description", "backend": "mlx"}`)
-
-			modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
-
-			Expect(err).ToNot(HaveOccurred())
-			// Should use values from YAML file, not preferences
-			Expect(modelConfig.Name).To(Equal("yaml-model"))
-			Expect(modelConfig.Description).To(Equal("Original description"))
-			Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: llama-cpp"))
-		})
-
-		It("should return error when local YAML file doesn't exist", func() {
-			nonExistentFile := filepath.Join(tempDir, "nonexistent.yaml")
-			uri := "file://" + nonExistentFile
-			preferences := json.RawMessage(`{}`)
-
-			modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
-
-			Expect(err).To(HaveOccurred())
-			Expect(modelConfig.Name).To(BeEmpty())
-		})
-
-		It("should return error when YAML file is invalid/malformed", func() {
-			invalidYaml := `name: invalid-model
-backend: llama-cpp
-invalid: yaml: content: [unclosed bracket
-`
-			yamlFile := filepath.Join(tempDir, "invalid.yaml")
-			err := os.WriteFile(yamlFile, []byte(invalidYaml), 0644)
-			Expect(err).ToNot(HaveOccurred())
-
-			uri := "file://" + yamlFile
-			preferences := json.RawMessage(`{}`)
-
-			modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
-
-			Expect(err).To(HaveOccurred())
-			Expect(modelConfig.Name).To(BeEmpty())
-		})
-	})
 })
--- a/core/gallery/importers/llama-cpp.go
+++ b/core/gallery/importers/llama-cpp.go
@@ -9,9 +9,7 @@ import (
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/gallery"
 	"github.com/mudler/LocalAI/core/schema"
-	"github.com/mudler/LocalAI/pkg/downloader"
 	"github.com/mudler/LocalAI/pkg/functions"
-	"github.com/rs/zerolog/log"
 	"go.yaml.in/yaml/v2"
 )

@@ -22,22 +20,14 @@ type LlamaCPPImporter struct{}
 func (i *LlamaCPPImporter) Match(details Details) bool {
 	preferences, err := details.Preferences.MarshalJSON()
 	if err != nil {
-		log.Error().Err(err).Msg("failed to marshal preferences")
 		return false
 	}
-
 	preferencesMap := make(map[string]any)
-
-	if len(preferences) > 0 {
-		err = json.Unmarshal(preferences, &preferencesMap)
-		if err != nil {
-			log.Error().Err(err).Msg("failed to unmarshal preferences")
-			return false
-		}
+	err = json.Unmarshal(preferences, &preferencesMap)
+	if err != nil {
+		return false
 	}

-	uri := downloader.URI(details.URI)
-
 	if preferencesMap["backend"] == "llama-cpp" {
 		return true
 	}
@@ -46,10 +36,6 @@ func (i *LlamaCPPImporter) Match(details Details) bool {
 		return true
 	}

-	if uri.LooksLikeOCI() {
-		return true
-	}
-
 	if details.HuggingFace != nil {
 		for _, file := range details.HuggingFace.Files {
 			if strings.HasSuffix(file.Path, ".gguf") {
@@ -62,19 +48,14 @@ func (i *LlamaCPPImporter) Match(details Details) bool {
 }

 func (i *LlamaCPPImporter) Import(details Details) (gallery.ModelConfig, error) {
-
-	log.Debug().Str("uri", details.URI).Msg("llama.cpp importer matched")
-
 	preferences, err := details.Preferences.MarshalJSON()
 	if err != nil {
 		return gallery.ModelConfig{}, err
 	}
 	preferencesMap := make(map[string]any)
-	if len(preferences) > 0 {
-		err = json.Unmarshal(preferences, &preferencesMap)
-		if err != nil {
-			return gallery.ModelConfig{}, err
-		}
+	err = json.Unmarshal(preferences, &preferencesMap)
+	if err != nil {
+		return gallery.ModelConfig{}, err
 	}

 	name, ok := preferencesMap["name"].(string)
@@ -127,40 +108,7 @@ func (i *LlamaCPPImporter) Import(details Details) (gallery.ModelConfig, error)
 		Description: description,
 	}

-	uri := downloader.URI(details.URI)
-
-	switch {
-	case uri.LooksLikeOCI():
-		ociName := strings.TrimPrefix(string(uri), downloader.OCIPrefix)
-		ociName = strings.TrimPrefix(ociName, downloader.OllamaPrefix)
-		ociName = strings.ReplaceAll(ociName, "/", "__")
-		ociName = strings.ReplaceAll(ociName, ":", "__")
-		cfg.Files = append(cfg.Files, gallery.File{
-			URI:      details.URI,
-			Filename: ociName,
-		})
-		modelConfig.PredictionOptions = schema.PredictionOptions{
-			BasicModelRequest: schema.BasicModelRequest{
-				Model: ociName,
-			},
-		}
-	case uri.LooksLikeURL() && strings.HasSuffix(details.URI, ".gguf"):
-		// Extract filename from URL
-		fileName, e := uri.FilenameFromUrl()
-		if e != nil {
-			return gallery.ModelConfig{}, e
-		}
-
-		cfg.Files = append(cfg.Files, gallery.File{
-			URI:      details.URI,
-			Filename: fileName,
-		})
-		modelConfig.PredictionOptions = schema.PredictionOptions{
-			BasicModelRequest: schema.BasicModelRequest{
-				Model: fileName,
-			},
-		}
-	case strings.HasSuffix(details.URI, ".gguf"):
+	if strings.HasSuffix(details.URI, ".gguf") {
 		cfg.Files = append(cfg.Files, gallery.File{
 			URI:      details.URI,
 			Filename: filepath.Base(details.URI),
@@ -170,7 +118,7 @@ func (i *LlamaCPPImporter) Import(details Details) (gallery.ModelConfig, error)
 				Model: filepath.Base(details.URI),
 			},
 		}
-	case details.HuggingFace != nil:
+	} else if details.HuggingFace != nil {
 		// We want to:
 		// Get first the chosen quants that match filenames
 		// OR the first mmproj/gguf file found
@@ -247,6 +195,7 @@ func (i *LlamaCPPImporter) Import(details Details) (gallery.ModelConfig, error)
 			}
 			break
 		}
+
 	}

 	data, err := yaml.Marshal(modelConfig)
--- a/core/gallery/models.go
+++ b/core/gallery/models.go
@@ -6,10 +6,10 @@ import (
 	"fmt"
 	"os"
 	"path/filepath"
-	"slices"
 	"strings"

 	"dario.cat/mergo"
+	"github.com/mudler/LocalAI/core/config"
 	lconfig "github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/pkg/downloader"
 	"github.com/mudler/LocalAI/pkg/model"
@@ -17,7 +17,7 @@ import (
 	"github.com/mudler/LocalAI/pkg/utils"

 	"github.com/rs/zerolog/log"
-	"gopkg.in/yaml.v3"
+	"gopkg.in/yaml.v2"
 )

 /*
@@ -74,7 +74,7 @@ type PromptTemplate struct {
 // Installs a model from the gallery
 func InstallModelFromGallery(
 	ctx context.Context,
-	modelGalleries, backendGalleries []lconfig.Gallery,
+	modelGalleries, backendGalleries []config.Gallery,
 	systemState *system.SystemState,
 	modelLoader *model.ModelLoader,
 	name string, req GalleryModel, downloadStatus func(string, string, string, float64), enforceScan, automaticallyInstallBackend bool) error {
@@ -260,8 +260,8 @@ func InstallModel(ctx context.Context, systemState *system.SystemState, nameOver
 			return nil, fmt.Errorf("failed to unmarshal updated config YAML: %v", err)
 		}

-		if valid, err := modelConfig.Validate(); !valid {
-			return nil, fmt.Errorf("failed to validate updated config YAML: %v", err)
+		if !modelConfig.Validate() {
+			return nil, fmt.Errorf("failed to validate updated config YAML")
 		}

 		err = os.WriteFile(configFilePath, updatedConfigYAML, 0600)
@@ -294,32 +294,21 @@ func GetLocalModelConfiguration(basePath string, name string) (*ModelConfig, err
 	return ReadConfigFile[ModelConfig](galleryFile)
 }

-func listModelFiles(systemState *system.SystemState, name string) ([]string, error) {
+func DeleteModelFromSystem(systemState *system.SystemState, name string) error {
+	additionalFiles := []string{}

 	configFile := filepath.Join(systemState.Model.ModelsPath, fmt.Sprintf("%s.yaml", name))
 	if err := utils.VerifyPath(configFile, systemState.Model.ModelsPath); err != nil {
-		return nil, fmt.Errorf("failed to verify path %s: %w", configFile, err)
+		return fmt.Errorf("failed to verify path %s: %w", configFile, err)
 	}
-
-	// os.PathSeparator is not allowed in model names. Replace them with "__" to avoid conflicts with file paths.
-	name = strings.ReplaceAll(name, string(os.PathSeparator), "__")
-
-	galleryFile := filepath.Join(systemState.Model.ModelsPath, galleryFileName(name))
-	if err := utils.VerifyPath(galleryFile, systemState.Model.ModelsPath); err != nil {
-		return nil, fmt.Errorf("failed to verify path %s: %w", galleryFile, err)
-	}
-
-	additionalFiles := []string{}
-	allFiles := []string{}
-
 	// Galleryname is the name of the model in this case
 	dat, err := os.ReadFile(configFile)
 	if err == nil {
-		modelConfig := &lconfig.ModelConfig{}
+		modelConfig := &config.ModelConfig{}

 		err = yaml.Unmarshal(dat, &modelConfig)
 		if err != nil {
-			return nil, err
+			return err
 		}
 		if modelConfig.Model != "" {
 			additionalFiles = append(additionalFiles, modelConfig.ModelFileName())
@@ -330,15 +319,26 @@ func listModelFiles(systemState *system.SystemState, name string) ([]string, err
 		}
 	}

+	// os.PathSeparator is not allowed in model names. Replace them with "__" to avoid conflicts with file paths.
+	name = strings.ReplaceAll(name, string(os.PathSeparator), "__")
+
+	galleryFile := filepath.Join(systemState.Model.ModelsPath, galleryFileName(name))
+	if err := utils.VerifyPath(galleryFile, systemState.Model.ModelsPath); err != nil {
+		return fmt.Errorf("failed to verify path %s: %w", galleryFile, err)
+	}
+
+	var filesToRemove []string
+
+	// Delete all the files associated to the model
 	// read the model config
 	galleryconfig, err := ReadConfigFile[ModelConfig](galleryFile)
 	if err == nil && galleryconfig != nil {
 		for _, f := range galleryconfig.Files {
 			fullPath := filepath.Join(systemState.Model.ModelsPath, f.Filename)
 			if err := utils.VerifyPath(fullPath, systemState.Model.ModelsPath); err != nil {
-				return allFiles, fmt.Errorf("failed to verify path %s: %w", fullPath, err)
+				return fmt.Errorf("failed to verify path %s: %w", fullPath, err)
 			}
-			allFiles = append(allFiles, fullPath)
+			filesToRemove = append(filesToRemove, fullPath)
 		}
 	} else {
 		log.Error().Err(err).Msgf("failed to read gallery file %s", configFile)
@@ -347,68 +347,18 @@ func listModelFiles(systemState *system.SystemState, name string) ([]string, err
 	for _, f := range additionalFiles {
 		fullPath := filepath.Join(filepath.Join(systemState.Model.ModelsPath, f))
 		if err := utils.VerifyPath(fullPath, systemState.Model.ModelsPath); err != nil {
-			return allFiles, fmt.Errorf("failed to verify path %s: %w", fullPath, err)
+			return fmt.Errorf("failed to verify path %s: %w", fullPath, err)
 		}
-		allFiles = append(allFiles, fullPath)
+		filesToRemove = append(filesToRemove, fullPath)
 	}

-	allFiles = append(allFiles, galleryFile)
+	filesToRemove = append(filesToRemove, galleryFile)

 	// skip duplicates
-	allFiles = utils.Unique(allFiles)
-
-	return allFiles, nil
-}
-
-func DeleteModelFromSystem(systemState *system.SystemState, name string) error {
-	configFile := filepath.Join(systemState.Model.ModelsPath, fmt.Sprintf("%s.yaml", name))
-
-	filesToRemove, err := listModelFiles(systemState, name)
-	if err != nil {
-		return err
-	}
-
-	allOtherFiles := []string{}
-	// Get all files of all other models
-	fi, err := os.ReadDir(systemState.Model.ModelsPath)
-	if err != nil {
-		return err
-	}
-	for _, f := range fi {
-		if f.IsDir() {
-			continue
-		}
-		if strings.HasPrefix(f.Name(), "._gallery_") {
-			continue
-		}
-		if !strings.HasSuffix(f.Name(), ".yaml") && !strings.HasSuffix(f.Name(), ".yml") {
-			continue
-		}
-		if f.Name() == fmt.Sprintf("%s.yaml", name) || f.Name() == fmt.Sprintf("%s.yml", name) {
-			continue
-		}
-
-		name := strings.TrimSuffix(f.Name(), ".yaml")
-		name = strings.TrimSuffix(name, ".yml")
-
-		log.Debug().Msgf("Checking file %s", f.Name())
-		files, err := listModelFiles(systemState, name)
-		if err != nil {
-			log.Debug().Err(err).Msgf("failed to list files for model %s", f.Name())
-			continue
-		}
-		allOtherFiles = append(allOtherFiles, files...)
-	}
-
-	log.Debug().Msgf("Files to remove: %+v", filesToRemove)
-	log.Debug().Msgf("All other files: %+v", allOtherFiles)
+	filesToRemove = utils.Unique(filesToRemove)

 	// Removing files
 	for _, f := range filesToRemove {
-		if slices.Contains(allOtherFiles, f) {
-			log.Debug().Msgf("Skipping file %s because it is part of another model", f)
-			continue
-		}
 		if e := os.Remove(f); e != nil {
 			log.Error().Err(e).Msgf("failed to remove file %s", f)
 		}
@@ -419,7 +369,7 @@ func DeleteModelFromSystem(systemState *system.SystemState, name string) error {

 // This is ***NEVER*** going to be perfect or finished.
 // This is a BEST EFFORT function to surface known-vulnerable models to users.
-func SafetyScanGalleryModels(galleries []lconfig.Gallery, systemState *system.SystemState) error {
+func SafetyScanGalleryModels(galleries []config.Gallery, systemState *system.SystemState) error {
 	galleryModels, err := AvailableGalleryModels(galleries, systemState)
 	if err != nil {
 		return err
--- a/core/gallery/models_test.go
+++ b/core/gallery/models_test.go
@@ -183,98 +183,5 @@ var _ = Describe("Model test", func() {
 			_, err = InstallModel(context.TODO(), systemState, "../../../foo", c, map[string]interface{}{}, func(string, string, string, float64) {}, true)
 			Expect(err).To(HaveOccurred())
 		})
-
-		It("does not delete shared model files when one config is deleted", func() {
-			tempdir, err := os.MkdirTemp("", "test")
-			Expect(err).ToNot(HaveOccurred())
-			defer os.RemoveAll(tempdir)
-
-			systemState, err := system.GetSystemState(
-				system.WithModelPath(tempdir),
-			)
-			Expect(err).ToNot(HaveOccurred())
-
-			// Create a shared model file
-			sharedModelFile := filepath.Join(tempdir, "shared_model.bin")
-			err = os.WriteFile(sharedModelFile, []byte("fake model content"), 0600)
-			Expect(err).ToNot(HaveOccurred())
-
-			// Create first model configuration
-			config1 := `name: model1
-model: shared_model.bin`
-			err = os.WriteFile(filepath.Join(tempdir, "model1.yaml"), []byte(config1), 0600)
-			Expect(err).ToNot(HaveOccurred())
-
-			// Create first model's gallery file
-			galleryConfig1 := ModelConfig{
-				Name: "model1",
-				Files: []File{
-					{Filename: "shared_model.bin"},
-				},
-			}
-			galleryData1, err := yaml.Marshal(galleryConfig1)
-			Expect(err).ToNot(HaveOccurred())
-			err = os.WriteFile(filepath.Join(tempdir, "._gallery_model1.yaml"), galleryData1, 0600)
-			Expect(err).ToNot(HaveOccurred())
-
-			// Create second model configuration sharing the same model file
-			config2 := `name: model2
-model: shared_model.bin`
-			err = os.WriteFile(filepath.Join(tempdir, "model2.yaml"), []byte(config2), 0600)
-			Expect(err).ToNot(HaveOccurred())
-
-			// Create second model's gallery file
-			galleryConfig2 := ModelConfig{
-				Name: "model2",
-				Files: []File{
-					{Filename: "shared_model.bin"},
-				},
-			}
-			galleryData2, err := yaml.Marshal(galleryConfig2)
-			Expect(err).ToNot(HaveOccurred())
-			err = os.WriteFile(filepath.Join(tempdir, "._gallery_model2.yaml"), galleryData2, 0600)
-			Expect(err).ToNot(HaveOccurred())
-
-			// Verify both configurations exist
-			_, err = os.Stat(filepath.Join(tempdir, "model1.yaml"))
-			Expect(err).ToNot(HaveOccurred())
-			_, err = os.Stat(filepath.Join(tempdir, "model2.yaml"))
-			Expect(err).ToNot(HaveOccurred())
-
-			// Verify the shared model file exists
-			_, err = os.Stat(sharedModelFile)
-			Expect(err).ToNot(HaveOccurred())
-
-			// Delete the first model
-			err = DeleteModelFromSystem(systemState, "model1")
-			Expect(err).ToNot(HaveOccurred())
-
-			// Verify the first configuration is deleted
-			_, err = os.Stat(filepath.Join(tempdir, "model1.yaml"))
-			Expect(err).To(HaveOccurred())
-			Expect(errors.Is(err, os.ErrNotExist)).To(BeTrue())
-
-			// Verify the shared model file still exists (not deleted because model2 still uses it)
-			_, err = os.Stat(sharedModelFile)
-			Expect(err).ToNot(HaveOccurred(), "shared model file should not be deleted when used by other configs")
-
-			// Verify the second configuration still exists
-			_, err = os.Stat(filepath.Join(tempdir, "model2.yaml"))
-			Expect(err).ToNot(HaveOccurred())
-
-			// Now delete the second model
-			err = DeleteModelFromSystem(systemState, "model2")
-			Expect(err).ToNot(HaveOccurred())
-
-			// Verify the second configuration is deleted
-			_, err = os.Stat(filepath.Join(tempdir, "model2.yaml"))
-			Expect(err).To(HaveOccurred())
-			Expect(errors.Is(err, os.ErrNotExist)).To(BeTrue())
-
-			// Verify the shared model file is now deleted (no more references)
-			_, err = os.Stat(sharedModelFile)
-			Expect(err).To(HaveOccurred(), "shared model file should be deleted when no configs reference it")
-			Expect(errors.Is(err, os.ErrNotExist)).To(BeTrue())
-		})
 	})
 })
--- a/core/http/app.go
+++ b/core/http/app.go
@@ -208,7 +208,7 @@ func API(application *application.Application) (*echo.Echo, error) {
 	routes.RegisterLocalAIRoutes(e, requestExtractor, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig(), application.GalleryService(), opcache, application.TemplatesEvaluator())
 	routes.RegisterOpenAIRoutes(e, requestExtractor, application)
 	if !application.ApplicationConfig().DisableWebUI {
-		routes.RegisterUIAPIRoutes(e, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig(), application.GalleryService(), opcache, application)
+		routes.RegisterUIAPIRoutes(e, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig(), application.GalleryService(), opcache)
 		routes.RegisterUIRoutes(e, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig(), application.GalleryService())
 	}
 	routes.RegisterJINARoutes(e, requestExtractor, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig())
--- a/core/http/app_test.go
+++ b/core/http/app_test.go
@@ -87,7 +87,7 @@ func getModels(url string) ([]gallery.GalleryModel, error) {
 	response := []gallery.GalleryModel{}
 	uri := downloader.URI(url)
 	// TODO: No tests currently seem to exercise file:// urls. Fix?
-	err := uri.ReadWithAuthorizationAndCallback(context.TODO(), "", bearerKey, func(url string, i []byte) error {
+	err := uri.DownloadWithAuthorizationAndCallback(context.TODO(), "", bearerKey, func(url string, i []byte) error {
 		// Unmarshal YAML data into a struct
 		return json.Unmarshal(i, &response)
 	})
@@ -513,124 +513,6 @@ var _ = Describe("API test", func() {
 			})

 		})
-
-		Context("Importing models from URI", func() {
-			var testYamlFile string
-
-			BeforeEach(func() {
-				// Create a test YAML config file
-				yamlContent := `name: test-import-model
-backend: llama-cpp
-description: Test model imported from file URI
-parameters:
-  model: path/to/model.gguf
-  temperature: 0.7
-`
-				testYamlFile = filepath.Join(tmpdir, "test-import.yaml")
-				err := os.WriteFile(testYamlFile, []byte(yamlContent), 0644)
-				Expect(err).ToNot(HaveOccurred())
-			})
-
-			AfterEach(func() {
-				err := os.Remove(testYamlFile)
-				Expect(err).ToNot(HaveOccurred())
-			})
-
-			It("should import model from file:// URI pointing to local YAML config", func() {
-				importReq := schema.ImportModelRequest{
-					URI:         "file://" + testYamlFile,
-					Preferences: json.RawMessage(`{}`),
-				}
-
-				var response schema.GalleryResponse
-				err := postRequestResponseJSON("http://127.0.0.1:9090/models/import-uri", &importReq, &response)
-				Expect(err).ToNot(HaveOccurred())
-				Expect(response.ID).ToNot(BeEmpty())
-
-				uuid := response.ID
-				resp := map[string]interface{}{}
-				Eventually(func() bool {
-					response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
-					resp = response
-					return response["processed"].(bool)
-				}, "360s", "10s").Should(Equal(true))
-
-				// Check that the model was imported successfully
-				Expect(resp["message"]).ToNot(ContainSubstring("error"))
-				Expect(resp["error"]).To(BeNil())
-
-				// Verify the model config file was created
-				dat, err := os.ReadFile(filepath.Join(modelDir, "test-import-model.yaml"))
-				Expect(err).ToNot(HaveOccurred())
-
-				content := map[string]interface{}{}
-				err = yaml.Unmarshal(dat, &content)
-				Expect(err).ToNot(HaveOccurred())
-				Expect(content["name"]).To(Equal("test-import-model"))
-				Expect(content["backend"]).To(Equal("llama-cpp"))
-			})
-
-			It("should return error when file:// URI points to non-existent file", func() {
-				nonExistentFile := filepath.Join(tmpdir, "nonexistent.yaml")
-				importReq := schema.ImportModelRequest{
-					URI:         "file://" + nonExistentFile,
-					Preferences: json.RawMessage(`{}`),
-				}
-
-				var response schema.GalleryResponse
-				err := postRequestResponseJSON("http://127.0.0.1:9090/models/import-uri", &importReq, &response)
-				// The endpoint should return an error immediately
-				Expect(err).To(HaveOccurred())
-				Expect(err.Error()).To(ContainSubstring("failed to discover model config"))
-			})
-		})
-
-		Context("Importing models from URI can't point to absolute paths", func() {
-			var testYamlFile string
-
-			BeforeEach(func() {
-				// Create a test YAML config file
-				yamlContent := `name: test-import-model
-backend: llama-cpp
-description: Test model imported from file URI
-parameters:
-  model: /path/to/model.gguf
-  temperature: 0.7
-`
-				testYamlFile = filepath.Join(tmpdir, "test-import.yaml")
-				err := os.WriteFile(testYamlFile, []byte(yamlContent), 0644)
-				Expect(err).ToNot(HaveOccurred())
-			})
-
-			AfterEach(func() {
-				err := os.Remove(testYamlFile)
-				Expect(err).ToNot(HaveOccurred())
-			})
-
-			It("should fail to import model from file:// URI pointing to local YAML config", func() {
-				importReq := schema.ImportModelRequest{
-					URI:         "file://" + testYamlFile,
-					Preferences: json.RawMessage(`{}`),
-				}
-
-				var response schema.GalleryResponse
-				err := postRequestResponseJSON("http://127.0.0.1:9090/models/import-uri", &importReq, &response)
-				Expect(err).ToNot(HaveOccurred())
-				Expect(response.ID).ToNot(BeEmpty())
-
-				uuid := response.ID
-				resp := map[string]interface{}{}
-				Eventually(func() bool {
-					response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
-					resp = response
-					return response["processed"].(bool)
-				}, "360s", "10s").Should(Equal(true))
-
-				// Check that the model was imported successfully
-				Expect(resp["message"]).To(ContainSubstring("error"))
-				Expect(resp["error"]).ToNot(BeNil())
-			})
-		})
 	})

 	Context("Model gallery", func() {
@@ -1199,9 +1081,6 @@ parameters:

 	Context("Config file", func() {
 		BeforeEach(func() {
-			if runtime.GOOS != "linux" {
-				Skip("run this test only on linux")
-			}
 			modelPath := os.Getenv("MODELS_PATH")
 			backendPath := os.Getenv("BACKENDS_PATH")
 			c, cancel = context.WithCancel(context.Background())
--- a/core/http/endpoints/localai/edit_model.go
+++ b/core/http/endpoints/localai/edit_model.go
@@ -135,7 +135,7 @@ func EditModelEndpoint(cl *config.ModelConfigLoader, appConfig *config.Applicati
 		}

 		// Validate the configuration
-		if valid, _ := req.Validate(); !valid {
+		if !req.Validate() {
 			response := ModelResponse{
 				Success: false,
 				Error:   "Validation failed",
@@ -196,7 +196,7 @@ func EditModelEndpoint(cl *config.ModelConfigLoader, appConfig *config.Applicati
 func ReloadModelsEndpoint(cl *config.ModelConfigLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc {
 	return func(c echo.Context) error {
 		// Reload configurations
-		if err := cl.LoadModelConfigsFromPath(appConfig.SystemState.Model.ModelsPath, appConfig.ToConfigLoaderOptions()...); err != nil {
+		if err := cl.LoadModelConfigsFromPath(appConfig.SystemState.Model.ModelsPath); err != nil {
 			response := ModelResponse{
 				Success: false,
 				Error:   "Failed to reload configurations: " + err.Error(),
--- a/core/http/endpoints/localai/import_model.go
+++ b/core/http/endpoints/localai/import_model.go
@@ -145,10 +145,10 @@ func ImportModelEndpoint(cl *config.ModelConfigLoader, appConfig *config.Applica
 		}

 		// Set defaults
-		modelConfig.SetDefaults(appConfig.ToConfigLoaderOptions()...)
+		modelConfig.SetDefaults()

 		// Validate the configuration
-		if valid, _ := modelConfig.Validate(); !valid {
+		if !modelConfig.Validate() {
 			response := ModelResponse{
 				Success: false,
 				Error:   "Invalid configuration",
@@ -185,7 +185,7 @@ func ImportModelEndpoint(cl *config.ModelConfigLoader, appConfig *config.Applica
 			return c.JSON(http.StatusInternalServerError, response)
 		}
 		// Reload configurations
-		if err := cl.LoadModelConfigsFromPath(appConfig.SystemState.Model.ModelsPath, appConfig.ToConfigLoaderOptions()...); err != nil {
+		if err := cl.LoadModelConfigsFromPath(appConfig.SystemState.Model.ModelsPath); err != nil {
 			response := ModelResponse{
 				Success: false,
 				Error:   "Failed to reload configurations: " + err.Error(),
--- a/core/http/endpoints/localai/mcp.go
+++ b/core/http/endpoints/localai/mcp.go
@@ -5,7 +5,7 @@ import (
 	"encoding/json"
 	"errors"
 	"fmt"
-	"net"
+	"strings"
 	"time"

 	"github.com/labstack/echo/v4"
@@ -105,10 +105,7 @@ func MCPStreamEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, eval
 			fragment = fragment.AddMessage(message.Role, message.StringContent)
 		}

-		_, port, err := net.SplitHostPort(appConfig.APIAddress)
-		if err != nil {
-			return err
-		}
+		port := appConfig.APIAddress[strings.LastIndex(appConfig.APIAddress, ":")+1:]
 		apiKey := ""
 		if len(appConfig.ApiKeys) > 0 {
 			apiKey = appConfig.ApiKeys[0]
--- a/core/http/endpoints/localai/settings.go
+++ b/core/http/endpoints/localai/settings.go
@@ -1,340 +0,0 @@
-package localai
-
-import (
-	"encoding/json"
-	"io"
-	"net/http"
-	"os"
-	"path/filepath"
-	"time"
-
-	"github.com/labstack/echo/v4"
-	"github.com/mudler/LocalAI/core/application"
-	"github.com/mudler/LocalAI/core/config"
-	"github.com/mudler/LocalAI/core/p2p"
-	"github.com/rs/zerolog/log"
-)
-
-type SettingsResponse struct {
-	Success bool   `json:"success"`
-	Error   string `json:"error,omitempty"`
-	Message string `json:"message,omitempty"`
-}
-
-type RuntimeSettings struct {
-	WatchdogEnabled          *bool             `json:"watchdog_enabled,omitempty"`
-	WatchdogIdleEnabled      *bool             `json:"watchdog_idle_enabled,omitempty"`
-	WatchdogBusyEnabled      *bool             `json:"watchdog_busy_enabled,omitempty"`
-	WatchdogIdleTimeout      *string           `json:"watchdog_idle_timeout,omitempty"`
-	WatchdogBusyTimeout      *string           `json:"watchdog_busy_timeout,omitempty"`
-	SingleBackend            *bool             `json:"single_backend,omitempty"`
-	ParallelBackendRequests  *bool             `json:"parallel_backend_requests,omitempty"`
-	Threads                  *int              `json:"threads,omitempty"`
-	ContextSize              *int              `json:"context_size,omitempty"`
-	F16                      *bool             `json:"f16,omitempty"`
-	Debug                    *bool             `json:"debug,omitempty"`
-	CORS                     *bool             `json:"cors,omitempty"`
-	CSRF                     *bool             `json:"csrf,omitempty"`
-	CORSAllowOrigins         *string           `json:"cors_allow_origins,omitempty"`
-	P2PToken                 *string           `json:"p2p_token,omitempty"`
-	P2PNetworkID             *string           `json:"p2p_network_id,omitempty"`
-	Federated                *bool             `json:"federated,omitempty"`
-	Galleries                *[]config.Gallery `json:"galleries,omitempty"`
-	BackendGalleries         *[]config.Gallery `json:"backend_galleries,omitempty"`
-	AutoloadGalleries        *bool             `json:"autoload_galleries,omitempty"`
-	AutoloadBackendGalleries *bool             `json:"autoload_backend_galleries,omitempty"`
-	ApiKeys                  *[]string         `json:"api_keys"` // No omitempty - we need to save empty arrays to clear keys
-}
-
-// GetSettingsEndpoint returns current settings with precedence (env > file > defaults)
-func GetSettingsEndpoint(app *application.Application) echo.HandlerFunc {
-	return func(c echo.Context) error {
-		appConfig := app.ApplicationConfig()
-		startupConfig := app.StartupConfig()
-
-		if startupConfig == nil {
-			// Fallback if startup config not available
-			startupConfig = appConfig
-		}
-
-		settings := RuntimeSettings{}
-
-		// Set all current values (using pointers for RuntimeSettings)
-		watchdogIdle := appConfig.WatchDogIdle
-		watchdogBusy := appConfig.WatchDogBusy
-		watchdogEnabled := appConfig.WatchDog
-		singleBackend := appConfig.SingleBackend
-		parallelBackendRequests := appConfig.ParallelBackendRequests
-		threads := appConfig.Threads
-		contextSize := appConfig.ContextSize
-		f16 := appConfig.F16
-		debug := appConfig.Debug
-		cors := appConfig.CORS
-		csrf := appConfig.CSRF
-		corsAllowOrigins := appConfig.CORSAllowOrigins
-		p2pToken := appConfig.P2PToken
-		p2pNetworkID := appConfig.P2PNetworkID
-		federated := appConfig.Federated
-		galleries := appConfig.Galleries
-		backendGalleries := appConfig.BackendGalleries
-		autoloadGalleries := appConfig.AutoloadGalleries
-		autoloadBackendGalleries := appConfig.AutoloadBackendGalleries
-		apiKeys := appConfig.ApiKeys
-
-		settings.WatchdogIdleEnabled = &watchdogIdle
-		settings.WatchdogBusyEnabled = &watchdogBusy
-		settings.WatchdogEnabled = &watchdogEnabled
-		settings.SingleBackend = &singleBackend
-		settings.ParallelBackendRequests = &parallelBackendRequests
-		settings.Threads = &threads
-		settings.ContextSize = &contextSize
-		settings.F16 = &f16
-		settings.Debug = &debug
-		settings.CORS = &cors
-		settings.CSRF = &csrf
-		settings.CORSAllowOrigins = &corsAllowOrigins
-		settings.P2PToken = &p2pToken
-		settings.P2PNetworkID = &p2pNetworkID
-		settings.Federated = &federated
-		settings.Galleries = &galleries
-		settings.BackendGalleries = &backendGalleries
-		settings.AutoloadGalleries = &autoloadGalleries
-		settings.AutoloadBackendGalleries = &autoloadBackendGalleries
-		settings.ApiKeys = &apiKeys
-
-		var idleTimeout, busyTimeout string
-		if appConfig.WatchDogIdleTimeout > 0 {
-			idleTimeout = appConfig.WatchDogIdleTimeout.String()
-		} else {
-			idleTimeout = "15m" // default
-		}
-		if appConfig.WatchDogBusyTimeout > 0 {
-			busyTimeout = appConfig.WatchDogBusyTimeout.String()
-		} else {
-			busyTimeout = "5m" // default
-		}
-		settings.WatchdogIdleTimeout = &idleTimeout
-		settings.WatchdogBusyTimeout = &busyTimeout
-		return c.JSON(http.StatusOK, settings)
-	}
-}
-
-// UpdateSettingsEndpoint updates settings, saves to file, and applies immediately
-func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc {
-	return func(c echo.Context) error {
-		appConfig := app.ApplicationConfig()
-		startupConfig := app.StartupConfig()
-
-		if startupConfig == nil {
-			// Fallback if startup config not available
-			startupConfig = appConfig
-		}
-
-		body, err := io.ReadAll(c.Request().Body)
-		if err != nil {
-			return c.JSON(http.StatusBadRequest, SettingsResponse{
-				Success: false,
-				Error:   "Failed to read request body: " + err.Error(),
-			})
-		}
-
-		var settings RuntimeSettings
-		if err := json.Unmarshal(body, &settings); err != nil {
-			return c.JSON(http.StatusBadRequest, SettingsResponse{
-				Success: false,
-				Error:   "Failed to parse JSON: " + err.Error(),
-			})
-		}
-
-		// Validate timeouts if provided
-		if settings.WatchdogIdleTimeout != nil {
-			_, err := time.ParseDuration(*settings.WatchdogIdleTimeout)
-			if err != nil {
-				return c.JSON(http.StatusBadRequest, SettingsResponse{
-					Success: false,
-					Error:   "Invalid watchdog_idle_timeout format: " + err.Error(),
-				})
-			}
-		}
-		if settings.WatchdogBusyTimeout != nil {
-			_, err := time.ParseDuration(*settings.WatchdogBusyTimeout)
-			if err != nil {
-				return c.JSON(http.StatusBadRequest, SettingsResponse{
-					Success: false,
-					Error:   "Invalid watchdog_busy_timeout format: " + err.Error(),
-				})
-			}
-		}
-
-		// Save to file
-		if appConfig.DynamicConfigsDir == "" {
-			return c.JSON(http.StatusBadRequest, SettingsResponse{
-				Success: false,
-				Error:   "DynamicConfigsDir is not set",
-			})
-		}
-
-		settingsFile := filepath.Join(appConfig.DynamicConfigsDir, "runtime_settings.json")
-		settingsJSON, err := json.MarshalIndent(settings, "", "  ")
-		if err != nil {
-			return c.JSON(http.StatusInternalServerError, SettingsResponse{
-				Success: false,
-				Error:   "Failed to marshal settings: " + err.Error(),
-			})
-		}
-
-		if err := os.WriteFile(settingsFile, settingsJSON, 0600); err != nil {
-			return c.JSON(http.StatusInternalServerError, SettingsResponse{
-				Success: false,
-				Error:   "Failed to write settings file: " + err.Error(),
-			})
-		}
-
-		// Apply settings immediately, checking env var overrides per field
-		watchdogChanged := false
-		if settings.WatchdogEnabled != nil {
-			appConfig.WatchDog = *settings.WatchdogEnabled
-			watchdogChanged = true
-		}
-		if settings.WatchdogIdleEnabled != nil {
-			appConfig.WatchDogIdle = *settings.WatchdogIdleEnabled
-			if appConfig.WatchDogIdle {
-				appConfig.WatchDog = true
-			}
-			watchdogChanged = true
-		}
-		if settings.WatchdogBusyEnabled != nil {
-			appConfig.WatchDogBusy = *settings.WatchdogBusyEnabled
-			if appConfig.WatchDogBusy {
-				appConfig.WatchDog = true
-			}
-			watchdogChanged = true
-		}
-		if settings.WatchdogIdleTimeout != nil {
-			dur, _ := time.ParseDuration(*settings.WatchdogIdleTimeout)
-			appConfig.WatchDogIdleTimeout = dur
-			watchdogChanged = true
-		}
-		if settings.WatchdogBusyTimeout != nil {
-			dur, _ := time.ParseDuration(*settings.WatchdogBusyTimeout)
-			appConfig.WatchDogBusyTimeout = dur
-			watchdogChanged = true
-		}
-		if settings.SingleBackend != nil {
-			appConfig.SingleBackend = *settings.SingleBackend
-		}
-		if settings.ParallelBackendRequests != nil {
-			appConfig.ParallelBackendRequests = *settings.ParallelBackendRequests
-		}
-		if settings.Threads != nil {
-			appConfig.Threads = *settings.Threads
-		}
-		if settings.ContextSize != nil {
-			appConfig.ContextSize = *settings.ContextSize
-		}
-		if settings.F16 != nil {
-			appConfig.F16 = *settings.F16
-		}
-		if settings.Debug != nil {
-			appConfig.Debug = *settings.Debug
-		}
-		if settings.CORS != nil {
-			appConfig.CORS = *settings.CORS
-		}
-		if settings.CSRF != nil {
-			appConfig.CSRF = *settings.CSRF
-		}
-		if settings.CORSAllowOrigins != nil {
-			appConfig.CORSAllowOrigins = *settings.CORSAllowOrigins
-		}
-		if settings.P2PToken != nil {
-			appConfig.P2PToken = *settings.P2PToken
-		}
-		if settings.P2PNetworkID != nil {
-			appConfig.P2PNetworkID = *settings.P2PNetworkID
-		}
-		if settings.Federated != nil {
-			appConfig.Federated = *settings.Federated
-		}
-		if settings.Galleries != nil {
-			appConfig.Galleries = *settings.Galleries
-		}
-		if settings.BackendGalleries != nil {
-			appConfig.BackendGalleries = *settings.BackendGalleries
-		}
-		if settings.AutoloadGalleries != nil {
-			appConfig.AutoloadGalleries = *settings.AutoloadGalleries
-		}
-		if settings.AutoloadBackendGalleries != nil {
-			appConfig.AutoloadBackendGalleries = *settings.AutoloadBackendGalleries
-		}
-		if settings.ApiKeys != nil {
-			// API keys from env vars (startup) should be kept, runtime settings keys are added
-			// Combine startup keys (env vars) with runtime settings keys
-			envKeys := startupConfig.ApiKeys
-			runtimeKeys := *settings.ApiKeys
-			// Merge: env keys first (they take precedence), then runtime keys
-			appConfig.ApiKeys = append(envKeys, runtimeKeys...)
-
-			// Note: We only save to runtime_settings.json (not api_keys.json) to avoid duplication
-			// The runtime_settings.json is the unified config file. If api_keys.json exists,
-			// it will be loaded first, but runtime_settings.json takes precedence and deduplicates.
-		}
-
-		// Restart watchdog if settings changed
-		if watchdogChanged {
-			if settings.WatchdogEnabled != nil && !*settings.WatchdogEnabled || settings.WatchdogEnabled == nil {
-				if err := app.StopWatchdog(); err != nil {
-					log.Error().Err(err).Msg("Failed to stop watchdog")
-					return c.JSON(http.StatusInternalServerError, SettingsResponse{
-						Success: false,
-						Error:   "Settings saved but failed to stop watchdog: " + err.Error(),
-					})
-				}
-			} else {
-				if err := app.RestartWatchdog(); err != nil {
-					log.Error().Err(err).Msg("Failed to restart watchdog")
-					return c.JSON(http.StatusInternalServerError, SettingsResponse{
-						Success: false,
-						Error:   "Settings saved but failed to restart watchdog: " + err.Error(),
-					})
-				}
-			}
-		}
-
-		// Restart P2P if P2P settings changed
-		p2pChanged := settings.P2PToken != nil || settings.P2PNetworkID != nil || settings.Federated != nil
-		if p2pChanged {
-			if settings.P2PToken != nil && *settings.P2PToken == "" {
-				// stop P2P
-				if err := app.StopP2P(); err != nil {
-					log.Error().Err(err).Msg("Failed to stop P2P")
-					return c.JSON(http.StatusInternalServerError, SettingsResponse{
-						Success: false,
-						Error:   "Settings saved but failed to stop P2P: " + err.Error(),
-					})
-				}
-			} else {
-				if settings.P2PToken != nil && *settings.P2PToken == "0" {
-					// generate a token if users sets 0 (disabled)
-					token := p2p.GenerateToken(60, 60)
-					settings.P2PToken = &token
-					appConfig.P2PToken = token
-				}
-				// Stop existing P2P
-				if err := app.RestartP2P(); err != nil {
-					log.Error().Err(err).Msg("Failed to stop P2P")
-					return c.JSON(http.StatusInternalServerError, SettingsResponse{
-						Success: false,
-						Error:   "Settings saved but failed to stop P2P: " + err.Error(),
-					})
-				}
-			}
-		}
-
-		return c.JSON(http.StatusOK, SettingsResponse{
-			Success: true,
-			Message: "Settings updated successfully",
-		})
-	}
-}
--- a/core/http/endpoints/localai/welcome.go
+++ b/core/http/endpoints/localai/welcome.go
@@ -43,18 +43,17 @@ func WelcomeEndpoint(appConfig *config.ApplicationConfig,
 		processingModels, taskTypes := opcache.GetStatus()

 		summary := map[string]interface{}{
-			"Title":                  "LocalAI API - " + internal.PrintableVersion(),
-			"Version":                internal.PrintableVersion(),
-			"BaseURL":                middleware.BaseURL(c),
-			"Models":                 modelsWithoutConfig,
-			"ModelsConfig":           modelConfigs,
-			"GalleryConfig":          galleryConfigs,
-			"ApplicationConfig":      appConfig,
-			"ProcessingModels":       processingModels,
-			"TaskTypes":              taskTypes,
-			"LoadedModels":           loadedModelsMap,
-			"InstalledBackends":      installedBackends,
-			"DisableRuntimeSettings": appConfig.DisableRuntimeSettings,
+			"Title":             "LocalAI API - " + internal.PrintableVersion(),
+			"Version":           internal.PrintableVersion(),
+			"BaseURL":           middleware.BaseURL(c),
+			"Models":            modelsWithoutConfig,
+			"ModelsConfig":      modelConfigs,
+			"GalleryConfig":     galleryConfigs,
+			"ApplicationConfig": appConfig,
+			"ProcessingModels":  processingModels,
+			"TaskTypes":         taskTypes,
+			"LoadedModels":      loadedModelsMap,
+			"InstalledBackends": installedBackends,
 		}

 		contentType := c.Request().Header.Get("Content-Type")
--- a/core/http/endpoints/openai/inpainting.go
+++ b/core/http/endpoints/openai/inpainting.go
@@ -1,268 +0,0 @@
-package openai
-
-import (
-	"encoding/base64"
-	"encoding/json"
-	"fmt"
-	"io"
-	"net/http"
-	"net/url"
-	"os"
-	"path/filepath"
-	"strconv"
-	"time"
-
-	"github.com/google/uuid"
-	"github.com/labstack/echo/v4"
-	"github.com/rs/zerolog/log"
-
-	"github.com/mudler/LocalAI/core/backend"
-	"github.com/mudler/LocalAI/core/config"
-	"github.com/mudler/LocalAI/core/http/middleware"
-	"github.com/mudler/LocalAI/core/schema"
-	model "github.com/mudler/LocalAI/pkg/model"
-)
-
-// InpaintingEndpoint handles POST /v1/images/inpainting
-//
-// Swagger / OpenAPI docstring (swaggo):
-// @Summary      Image inpainting
-// @Description  Perform image inpainting. Accepts multipart/form-data with `image` and `mask` files.
-// @Tags         images
-// @Accept       multipart/form-data
-// @Produce      application/json
-// @Param        model   formData  string  true   "Model identifier"
-// @Param        prompt  formData  string  true   "Text prompt guiding the generation"
-// @Param        steps   formData  int     false  "Number of inference steps (default 25)"
-// @Param        image   formData  file    true   "Original image file"
-// @Param        mask    formData  file    true   "Mask image file (white = area to inpaint)"
-// @Success      200 {object} schema.OpenAIResponse
-// @Failure      400 {object} map[string]string
-// @Failure      500 {object} map[string]string
-// @Router       /v1/images/inpainting [post]
-func InpaintingEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc {
-	return func(c echo.Context) error {
-		// Parse basic form values
-		modelName := c.FormValue("model")
-		prompt := c.FormValue("prompt")
-		stepsStr := c.FormValue("steps")
-
-		if modelName == "" || prompt == "" {
-			log.Error().Msg("Inpainting Endpoint - missing model or prompt")
-			return echo.ErrBadRequest
-		}
-
-		// steps default
-		steps := 25
-		if stepsStr != "" {
-			if v, err := strconv.Atoi(stepsStr); err == nil {
-				steps = v
-			}
-		}
-
-		// Get uploaded files
-		imageFile, err := c.FormFile("image")
-		if err != nil {
-			log.Error().Err(err).Msg("Inpainting Endpoint - missing image file")
-			return echo.NewHTTPError(http.StatusBadRequest, "missing image file")
-		}
-		maskFile, err := c.FormFile("mask")
-		if err != nil {
-			log.Error().Err(err).Msg("Inpainting Endpoint - missing mask file")
-			return echo.NewHTTPError(http.StatusBadRequest, "missing mask file")
-		}
-
-		// Read files into memory (small files expected)
-		imgSrc, err := imageFile.Open()
-		if err != nil {
-			return err
-		}
-		defer imgSrc.Close()
-		imgBytes, err := io.ReadAll(imgSrc)
-		if err != nil {
-			return err
-		}
-
-		maskSrc, err := maskFile.Open()
-		if err != nil {
-			return err
-		}
-		defer maskSrc.Close()
-		maskBytes, err := io.ReadAll(maskSrc)
-		if err != nil {
-			return err
-		}
-
-		// Create JSON with base64 fields expected by backend
-		b64Image := base64.StdEncoding.EncodeToString(imgBytes)
-		b64Mask := base64.StdEncoding.EncodeToString(maskBytes)
-
-		// get model config from context (middleware set it)
-		cfg, ok := c.Get(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.ModelConfig)
-		if !ok || cfg == nil {
-			log.Error().Msg("Inpainting Endpoint - model config not found in context")
-			return echo.ErrBadRequest
-		}
-
-		// Use the GeneratedContentDir so the generated PNG is placed where the
-		// HTTP static handler serves `/generated-images`.
-		tmpDir := appConfig.GeneratedContentDir
-		// Ensure the directory exists
-		if err := os.MkdirAll(tmpDir, 0750); err != nil {
-			log.Error().Err(err).Msgf("Inpainting Endpoint - failed to create generated content dir: %s", tmpDir)
-			return echo.NewHTTPError(http.StatusInternalServerError, "failed to prepare storage")
-		}
-		id := uuid.New().String()
-		jsonPath := filepath.Join(tmpDir, fmt.Sprintf("inpaint_%s.json", id))
-		jsonFile := map[string]string{
-			"image":      b64Image,
-			"mask_image": b64Mask,
-		}
-		jf, err := os.CreateTemp(tmpDir, "inpaint_")
-		if err != nil {
-			return err
-		}
-		// setup cleanup on error; if everything succeeds we set success = true
-		success := false
-		var dst string
-		var origRef string
-		var maskRef string
-		defer func() {
-			if !success {
-				// Best-effort cleanup; log any failures
-				if jf != nil {
-					if cerr := jf.Close(); cerr != nil {
-						log.Warn().Err(cerr).Msg("Inpainting Endpoint - failed to close temp json file in cleanup")
-					}
-					if name := jf.Name(); name != "" {
-						if rerr := os.Remove(name); rerr != nil && !os.IsNotExist(rerr) {
-							log.Warn().Err(rerr).Msgf("Inpainting Endpoint - failed to remove temp json file %s in cleanup", name)
-						}
-					}
-				}
-				if jsonPath != "" {
-					if rerr := os.Remove(jsonPath); rerr != nil && !os.IsNotExist(rerr) {
-						log.Warn().Err(rerr).Msgf("Inpainting Endpoint - failed to remove json file %s in cleanup", jsonPath)
-					}
-				}
-				if dst != "" {
-					if rerr := os.Remove(dst); rerr != nil && !os.IsNotExist(rerr) {
-						log.Warn().Err(rerr).Msgf("Inpainting Endpoint - failed to remove dst file %s in cleanup", dst)
-					}
-				}
-				if origRef != "" {
-					if rerr := os.Remove(origRef); rerr != nil && !os.IsNotExist(rerr) {
-						log.Warn().Err(rerr).Msgf("Inpainting Endpoint - failed to remove orig ref file %s in cleanup", origRef)
-					}
-				}
-				if maskRef != "" {
-					if rerr := os.Remove(maskRef); rerr != nil && !os.IsNotExist(rerr) {
-						log.Warn().Err(rerr).Msgf("Inpainting Endpoint - failed to remove mask ref file %s in cleanup", maskRef)
-					}
-				}
-			}
-		}()
-
-		// write original image and mask to disk as ref images so backends that
-		// accept reference image files can use them (maintainer request).
-		origTmp, err := os.CreateTemp(tmpDir, "refimg_")
-		if err != nil {
-			return err
-		}
-		if _, err := origTmp.Write(imgBytes); err != nil {
-			_ = origTmp.Close()
-			_ = os.Remove(origTmp.Name())
-			return err
-		}
-		if cerr := origTmp.Close(); cerr != nil {
-			log.Warn().Err(cerr).Msg("Inpainting Endpoint - failed to close orig temp file")
-		}
-		origRef = origTmp.Name()
-
-		maskTmp, err := os.CreateTemp(tmpDir, "refmask_")
-		if err != nil {
-			// cleanup origTmp on error
-			_ = os.Remove(origRef)
-			return err
-		}
-		if _, err := maskTmp.Write(maskBytes); err != nil {
-			_ = maskTmp.Close()
-			_ = os.Remove(maskTmp.Name())
-			_ = os.Remove(origRef)
-			return err
-		}
-		if cerr := maskTmp.Close(); cerr != nil {
-			log.Warn().Err(cerr).Msg("Inpainting Endpoint - failed to close mask temp file")
-		}
-		maskRef = maskTmp.Name()
-		// write JSON
-		enc := json.NewEncoder(jf)
-		if err := enc.Encode(jsonFile); err != nil {
-			if cerr := jf.Close(); cerr != nil {
-				log.Warn().Err(cerr).Msg("Inpainting Endpoint - failed to close temp json file after encode error")
-			}
-			return err
-		}
-		if cerr := jf.Close(); cerr != nil {
-			log.Warn().Err(cerr).Msg("Inpainting Endpoint - failed to close temp json file")
-		}
-		// rename to desired name
-		if err := os.Rename(jf.Name(), jsonPath); err != nil {
-			return err
-		}
-		// prepare dst
-		outTmp, err := os.CreateTemp(tmpDir, "out_")
-		if err != nil {
-			return err
-		}
-		if cerr := outTmp.Close(); cerr != nil {
-			log.Warn().Err(cerr).Msg("Inpainting Endpoint - failed to close out temp file")
-		}
-		dst = outTmp.Name() + ".png"
-		if err := os.Rename(outTmp.Name(), dst); err != nil {
-			return err
-		}
-
-		// Determine width/height default
-		width := 512
-		height := 512
-
-		// Call backend image generation via indirection so tests can stub it
-		// Note: ImageGenerationFunc will call into the loaded model's GenerateImage which expects src JSON
-		// Also pass ref images (orig + mask) so backends that support ref images can use them.
-		refImages := []string{origRef, maskRef}
-		fn, err := backend.ImageGenerationFunc(height, width, 0, steps, 0, prompt, "", jsonPath, dst, ml, *cfg, appConfig, refImages)
-		if err != nil {
-			return err
-		}
-
-		// Execute generation function (blocking)
-		if err := fn(); err != nil {
-			return err
-		}
-
-		// On success, build response URL using BaseURL middleware helper and
-		// the same `generated-images` prefix used by the server static mount.
-		baseURL := middleware.BaseURL(c)
-
-		// Build response using url.JoinPath for correct URL escaping
-		imgPath, err := url.JoinPath(baseURL, "generated-images", filepath.Base(dst))
-		if err != nil {
-			return err
-		}
-
-		created := int(time.Now().Unix())
-		resp := &schema.OpenAIResponse{
-			ID:      id,
-			Created: created,
-			Data: []schema.Item{{
-				URL: imgPath,
-			}},
-		}
-
-		// mark success so defer cleanup will not remove output files
-		success = true
-
-		return c.JSON(http.StatusOK, resp)
-	}
-}
--- a/core/http/endpoints/openai/inpainting_test.go
+++ b/core/http/endpoints/openai/inpainting_test.go
@@ -1,107 +0,0 @@
-package openai
-
-import (
-	"bytes"
-	"mime/multipart"
-	"net/http"
-	"net/http/httptest"
-	"os"
-	"path/filepath"
-	"testing"
-
-	"github.com/labstack/echo/v4"
-	"github.com/mudler/LocalAI/core/http/middleware"
-	"github.com/mudler/LocalAI/core/backend"
-	"github.com/mudler/LocalAI/core/config"
-	model "github.com/mudler/LocalAI/pkg/model"
-	"github.com/stretchr/testify/require"
-)
-
-func makeMultipartRequest(t *testing.T, fields map[string]string, files map[string][]byte) (*http.Request, string) {
-	b := &bytes.Buffer{}
-	w := multipart.NewWriter(b)
-	for k, v := range fields {
-		_ = w.WriteField(k, v)
-	}
-	for fname, content := range files {
-		fw, err := w.CreateFormFile(fname, fname+".png")
-		require.NoError(t, err)
-		_, err = fw.Write(content)
-		require.NoError(t, err)
-	}
-	require.NoError(t, w.Close())
-	req := httptest.NewRequest(http.MethodPost, "/v1/images/inpainting", b)
-	req.Header.Set("Content-Type", w.FormDataContentType())
-	return req, w.FormDataContentType()
-}
-
-func TestInpainting_MissingFiles(t *testing.T) {
-	e := echo.New()
-	// handler requires cl, ml, appConfig but this test verifies missing files early
-	h := InpaintingEndpoint(nil, nil, config.NewApplicationConfig())
-
-	req := httptest.NewRequest(http.MethodPost, "/v1/images/inpainting", nil)
-	rec := httptest.NewRecorder()
-	c := e.NewContext(req, rec)
-
-	err := h(c)
-	require.Error(t, err)
-}
-
-func TestInpainting_HappyPath(t *testing.T) {
-	// Setup temp generated content dir
-	tmpDir, err := os.MkdirTemp("", "gencontent")
-	require.NoError(t, err)
-	defer os.RemoveAll(tmpDir)
-
-	appConf := config.NewApplicationConfig(config.WithGeneratedContentDir(tmpDir))
-
-	// stub the backend.ImageGenerationFunc
-	orig := backend.ImageGenerationFunc
-	backend.ImageGenerationFunc = func(height, width, mode, step, seed int, positive_prompt, negative_prompt, src, dst string, loader *model.ModelLoader, modelConfig config.ModelConfig, appConfig *config.ApplicationConfig, refImages []string) (func() error, error) {
-		fn := func() error {
-			// write a fake png file to dst
-			return os.WriteFile(dst, []byte("PNGDATA"), 0644)
-		}
-		return fn, nil
-	}
-	defer func() { backend.ImageGenerationFunc = orig }()
-
-	// prepare multipart request with image and mask
-	fields := map[string]string{"model": "dreamshaper-8-inpainting", "prompt": "A test"}
-	files := map[string][]byte{"image": []byte("IMAGEDATA"), "mask": []byte("MASKDATA")}
-	reqBuf, _ := makeMultipartRequest(t, fields, files)
-
-	rec := httptest.NewRecorder()
-	e := echo.New()
-	c := e.NewContext(reqBuf, rec)
-
-	// set a minimal model config in context as handler expects
-	c.Set(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG, &config.ModelConfig{Backend: "diffusers"})
-
-	h := InpaintingEndpoint(nil, nil, appConf)
-
-	// call handler
-	err = h(c)
-	require.NoError(t, err)
-	require.Equal(t, http.StatusOK, rec.Code)
-
-	// verify response body contains generated-images path
-	body := rec.Body.String()
-	require.Contains(t, body, "generated-images")
-
-	// confirm the file was created in tmpDir
-	// parse out filename from response (naive search)
-	// find "generated-images/" and extract until closing quote or brace
-	idx := bytes.Index(rec.Body.Bytes(), []byte("generated-images/"))
-	require.True(t, idx >= 0)
-	rest := rec.Body.Bytes()[idx:]
-	end := bytes.IndexAny(rest, "\",}\n")
-	if end == -1 {
-		end = len(rest)
-	}
-	fname := string(rest[len("generated-images/"):end])
-	// ensure file exists
-	_, err = os.Stat(filepath.Join(tmpDir, fname))
-	require.NoError(t, err)
-}
--- a/core/http/endpoints/openai/mcp.go
+++ b/core/http/endpoints/openai/mcp.go
@@ -5,7 +5,7 @@ import (
 	"encoding/json"
 	"errors"
 	"fmt"
-	"net"
+	"strings"
 	"time"

 	"github.com/labstack/echo/v4"
@@ -75,11 +75,7 @@ func MCPCompletionEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader,
 			fragment = fragment.AddMessage(message.Role, message.StringContent)
 		}

-		_, port, err := net.SplitHostPort(appConfig.APIAddress)
-		if err != nil {
-			return err
-		}
-
+		port := appConfig.APIAddress[strings.LastIndex(appConfig.APIAddress, ":")+1:]
 		apiKey := ""
 		if appConfig.ApiKeys != nil {
 			apiKey = appConfig.ApiKeys[0]
@@ -108,11 +104,11 @@ func MCPCompletionEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader,
 				log.Debug().Msgf("[model agent] [model: %s] Reasoning: %s", config.Name, s)
 			}),
 			cogito.WithToolCallBack(func(t *cogito.ToolChoice) bool {
-				log.Debug().Msgf("[model agent] [model: %s] Tool call: %s, reasoning: %s, arguments: %+v", config.Name, t.Name, t.Reasoning, t.Arguments)
+				log.Debug().Msgf("[model agent] [model: %s] Tool call: %s, reasoning: %s, arguments: %+v", t.Name, t.Reasoning, t.Arguments)
 				return true
 			}),
 			cogito.WithToolCallResultCallback(func(t cogito.ToolStatus) {
-				log.Debug().Msgf("[model agent] [model: %s] Tool call result: %s, result: %s, tool arguments: %+v", config.Name, t.Name, t.Result, t.ToolArguments)
+				log.Debug().Msgf("[model agent] [model: %s] Tool call result: %s, tool arguments: %+v", t.Name, t.Result, t.ToolArguments)
 			}),
 		)

--- a/core/http/endpoints/openai/realtime_model.go
+++ b/core/http/endpoints/openai/realtime_model.go
@@ -112,7 +112,7 @@ func newTranscriptionOnlyModel(pipeline *config.Pipeline, cl *config.ModelConfig
 		return nil, nil, fmt.Errorf("failed to load backend config: %w", err)
 	}

-	if valid, _ := cfgVAD.Validate(); !valid {
+	if !cfgVAD.Validate() {
 		return nil, nil, fmt.Errorf("failed to validate config: %w", err)
 	}

@@ -128,7 +128,7 @@ func newTranscriptionOnlyModel(pipeline *config.Pipeline, cl *config.ModelConfig
 		return nil, nil, fmt.Errorf("failed to load backend config: %w", err)
 	}

-	if valid, _ := cfgSST.Validate(); !valid {
+	if !cfgSST.Validate() {
 		return nil, nil, fmt.Errorf("failed to validate config: %w", err)
 	}

@@ -155,7 +155,7 @@ func newModel(pipeline *config.Pipeline, cl *config.ModelConfigLoader, ml *model
 		return nil, fmt.Errorf("failed to load backend config: %w", err)
 	}

-	if valid, _ := cfgVAD.Validate(); !valid {
+	if !cfgVAD.Validate() {
 		return nil, fmt.Errorf("failed to validate config: %w", err)
 	}

@@ -172,7 +172,7 @@ func newModel(pipeline *config.Pipeline, cl *config.ModelConfigLoader, ml *model
 		return nil, fmt.Errorf("failed to load backend config: %w", err)
 	}

-	if valid, _ := cfgSST.Validate(); !valid {
+	if !cfgSST.Validate() {
 		return nil, fmt.Errorf("failed to validate config: %w", err)
 	}

@@ -191,7 +191,7 @@ func newModel(pipeline *config.Pipeline, cl *config.ModelConfigLoader, ml *model
 			return nil, fmt.Errorf("failed to load backend config: %w", err)
 		}

-		if valid, _ := cfgAnyToAny.Validate(); !valid {
+		if !cfgAnyToAny.Validate() {
 			return nil, fmt.Errorf("failed to validate config: %w", err)
 		}

@@ -218,7 +218,7 @@ func newModel(pipeline *config.Pipeline, cl *config.ModelConfigLoader, ml *model
 		return nil, fmt.Errorf("failed to load backend config: %w", err)
 	}

-	if valid, _ := cfgLLM.Validate(); !valid {
+	if !cfgLLM.Validate() {
 		return nil, fmt.Errorf("failed to validate config: %w", err)
 	}

@@ -228,7 +228,7 @@ func newModel(pipeline *config.Pipeline, cl *config.ModelConfigLoader, ml *model
 		return nil, fmt.Errorf("failed to load backend config: %w", err)
 	}

-	if valid, _ := cfgTTS.Validate(); !valid {
+	if !cfgTTS.Validate() {
 		return nil, fmt.Errorf("failed to validate config: %w", err)
 	}

--- a/core/http/middleware/request.go
+++ b/core/http/middleware/request.go
@@ -55,11 +55,6 @@ func (re *RequestExtractor) setModelNameFromRequest(c echo.Context) {
 		model = c.QueryParam("model")
 	}

-	// Check FormValue for multipart/form-data requests (e.g., /v1/images/inpainting)
-	if model == "" {
-		model = c.FormValue("model")
-	}
-
 	if model == "" {
 		// Set model from bearer token, if available
 		auth := c.Request().Header.Get("Authorization")
@@ -480,7 +475,7 @@ func mergeOpenAIRequestAndModelConfig(config *config.ModelConfig, input *schema.
 		}
 	}

-	if valid, _ := config.Validate(); valid {
+	if config.Validate() {
 		return nil
 	}
 	return fmt.Errorf("unable to validate configuration after merging")
--- a/core/http/routes/openai.go
+++ b/core/http/routes/openai.go
@@ -140,8 +140,7 @@ func RegisterOpenAIRoutes(app *echo.Echo,
 	// images
 	imageHandler := openai.ImageEndpoint(application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig())
 	imageMiddleware := []echo.MiddlewareFunc{
-		// Default: use the first available image generation model
-		re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_IMAGE)),
+		re.BuildConstantDefaultModelNameMiddleware("stablediffusion"),
 		re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.OpenAIRequest) }),
 		func(next echo.HandlerFunc) echo.HandlerFunc {
 			return func(c echo.Context) error {
@@ -156,11 +155,6 @@ func RegisterOpenAIRoutes(app *echo.Echo,
 	app.POST("/v1/images/generations", imageHandler, imageMiddleware...)
 	app.POST("/images/generations", imageHandler, imageMiddleware...)

-	// inpainting endpoint (image + mask) - reuse same middleware config as images
-	inpaintingHandler := openai.InpaintingEndpoint(application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig())
-	app.POST("/v1/images/inpainting", inpaintingHandler, imageMiddleware...)
-	app.POST("/images/inpainting", inpaintingHandler, imageMiddleware...)
-
 	// videos (OpenAI-compatible endpoints mapped to LocalAI video handler)
 	videoHandler := openai.VideoEndpoint(application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig())
 	videoMiddleware := []echo.MiddlewareFunc{
--- a/core/http/routes/ui.go
+++ b/core/http/routes/ui.go
@@ -23,17 +23,6 @@ func RegisterUIRoutes(app *echo.Echo,
 	app.GET("/", localai.WelcomeEndpoint(appConfig, cl, ml, processingOps))
 	app.GET("/manage", localai.WelcomeEndpoint(appConfig, cl, ml, processingOps))

-	if !appConfig.DisableRuntimeSettings {
-		// Settings page
-		app.GET("/settings", func(c echo.Context) error {
-			summary := map[string]interface{}{
-				"Title":   "LocalAI - Settings",
-				"BaseURL": middleware.BaseURL(c),
-			}
-			return c.Render(200, "views/settings", summary)
-		})
-	}
-
 	// P2P
 	app.GET("/p2p/", func(c echo.Context) error {
 		summary := map[string]interface{}{
--- a/core/http/routes/ui_api.go
+++ b/core/http/routes/ui_api.go
@@ -12,10 +12,8 @@ import (

 	"github.com/google/uuid"
 	"github.com/labstack/echo/v4"
-	"github.com/mudler/LocalAI/core/application"
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/gallery"
-	"github.com/mudler/LocalAI/core/http/endpoints/localai"
 	"github.com/mudler/LocalAI/core/p2p"
 	"github.com/mudler/LocalAI/core/services"
 	"github.com/mudler/LocalAI/pkg/model"
@@ -23,7 +21,7 @@ import (
 )

 // RegisterUIAPIRoutes registers JSON API routes for the web UI
-func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig, galleryService *services.GalleryService, opcache *services.OpCache, applicationInstance *application.Application) {
+func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig, galleryService *services.GalleryService, opcache *services.OpCache) {

 	// Operations API - Get all current operations (models + backends)
 	app.GET("/api/operations", func(c echo.Context) error {
@@ -266,17 +264,17 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model
 		installedModelsCount := len(modelConfigs) + len(modelsWithoutConfig)

 		return c.JSON(200, map[string]interface{}{
-			"models":           modelsJSON,
-			"repositories":     appConfig.Galleries,
-			"allTags":          tags,
-			"processingModels": processingModelsData,
-			"taskTypes":        taskTypes,
-			"availableModels":  totalModels,
-			"installedModels":  installedModelsCount,
-			"currentPage":      pageNum,
-			"totalPages":       totalPages,
-			"prevPage":         prevPage,
-			"nextPage":         nextPage,
+			"models":            modelsJSON,
+			"repositories":      appConfig.Galleries,
+			"allTags":           tags,
+			"processingModels":  processingModelsData,
+			"taskTypes":         taskTypes,
+			"availableModels":   totalModels,
+			"installedModels":   installedModelsCount,
+			"currentPage":       pageNum,
+			"totalPages":        totalPages,
+			"prevPage":          prevPage,
+			"nextPage":          nextPage,
 		})
 	})

@@ -804,10 +802,4 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model
 			},
 		})
 	})
-
-	if !appConfig.DisableRuntimeSettings {
-		// Settings API
-		app.GET("/api/settings", localai.GetSettingsEndpoint(applicationInstance))
-		app.POST("/api/settings", localai.UpdateSettingsEndpoint(applicationInstance))
-	}
 }
--- a/core/http/static/chat.js
+++ b/core/http/static/chat.js
--- a/core/http/views/backends.html
+++ b/core/http/views/backends.html
@@ -629,33 +629,11 @@ function backendsGallery() {
                        this.fetchBackends();
                    }
                    
-                    if (jobData.error || (jobData.message && jobData.message.startsWith('error:'))) {
+                    if (jobData.error) {
                        backend.processing = false;
                        delete this.jobProgress[backend.jobID];
                        const action = backend.isDeletion ? 'deleting' : 'installing';
-                        // Extract error message - handle both string and object errors
-                        let errorMessage = 'Unknown error';
-                        if (typeof jobData.error === 'string') {
-                            errorMessage = jobData.error;
-                        } else if (jobData.error && typeof jobData.error === 'object') {
-                            // Check if error object has any properties
-                            const errorKeys = Object.keys(jobData.error);
-                            if (errorKeys.length > 0) {
-                                // Try common error object properties
-                                errorMessage = jobData.error.message || jobData.error.error || jobData.error.Error || JSON.stringify(jobData.error);
-                            } else {
-                                // Empty object {}, fall back to message field
-                                errorMessage = jobData.message || 'Unknown error';
-                            }
-                        } else if (jobData.message) {
-                            // Use message field if error is not present or is empty
-                            errorMessage = jobData.message;
-                        }
-                        // Remove "error: " prefix if present
-                        if (errorMessage.startsWith('error: ')) {
-                            errorMessage = errorMessage.substring(7);
-                        }
-                        this.addNotification(`Error ${action} backend "${backend.name}": ${errorMessage}`, 'error');
+                        this.addNotification(`Error ${action} backend "${backend.name}": ${jobData.error}`, 'error');
                    }
                } catch (error) {
                    console.error('Error polling job:', error);
--- a/core/http/views/chat.html
+++ b/core/http/views/chat.html
--- a/core/http/views/index.html
+++ b/core/http/views/index.html
@@ -127,10 +127,6 @@
                imageFiles: [], 
                audioFiles: [], 
                textFiles: [],
-                attachedFiles: [],
-                mcpMode: false,
-                mcpAvailable: false,
-                mcpModels: {},
                currentPlaceholder: 'Send a message...',
                placeholderIndex: 0,
                charIndex: 0,
@@ -166,8 +162,6 @@
                init() {
                    window.currentPlaceholderText = this.currentPlaceholder;
                    this.startTypingAnimation();
-                    // Build MCP models map from data attributes
-                    this.buildMCPModelsMap();
                    // Select first model by default
                    this.$nextTick(() => {
                        const select = this.$el.querySelector('select');
@@ -176,43 +170,9 @@
                            const firstModelOption = select.options[1];
                            if (firstModelOption && firstModelOption.value) {
                                this.selectedModel = firstModelOption.value;
-                                this.checkMCPAvailability();
                            }
                        }
                    });
-                    // Watch for changes to selectedModel to update MCP availability
-                    this.$watch('selectedModel', () => {
-                        this.checkMCPAvailability();
-                    });
-                },
-                buildMCPModelsMap() {
-                    const select = this.$el.querySelector('select');
-                    if (!select) return;
-                    this.mcpModels = {};
-                    for (let i = 0; i < select.options.length; i++) {
-                        const option = select.options[i];
-                        if (option.value) {
-                            const hasMcpAttr = option.getAttribute('data-has-mcp');
-                            this.mcpModels[option.value] = hasMcpAttr === 'true';
-                        }
-                    }
-                    // Debug: uncomment to see the MCP models map
-                    // console.log('MCP Models Map:', this.mcpModels);
-                },
-                checkMCPAvailability() {
-                    if (!this.selectedModel) {
-                        this.mcpAvailable = false;
-                        this.mcpMode = false;
-                        return;
-                    }
-                    // Check MCP availability from the map
-                    const hasMCP = this.mcpModels[this.selectedModel] === true;
-                    this.mcpAvailable = hasMCP;
-                    // Debug: uncomment to see what's happening
-                    // console.log('MCP Check:', { model: this.selectedModel, hasMCP, mcpAvailable: this.mcpAvailable, map: this.mcpModels });
-                    if (!hasMCP) {
-                        this.mcpMode = false;
-                    }
                },
                startTypingAnimation() {
                    if (this.isTyping) return;
@@ -281,98 +241,30 @@
                    } else {
                        this.resumeTyping();
                    }
-                },
-                handleFileSelection(files, fileType) {
-                    Array.from(files).forEach(file => {
-                        // Check if file already exists
-                        const exists = this.attachedFiles.some(f => f.name === file.name && f.type === fileType);
-                        if (!exists) {
-                            this.attachedFiles.push({ name: file.name, type: fileType });
-                        }
-                    });
-                },
-                removeAttachedFile(fileType, fileName) {
-                    // Remove from attachedFiles array
-                    const index = this.attachedFiles.findIndex(f => f.name === fileName && f.type === fileType);
-                    if (index !== -1) {
-                        this.attachedFiles.splice(index, 1);
-                    }
-                    // Remove from corresponding file array
-                    if (fileType === 'image') {
-                        this.imageFiles = this.imageFiles.filter(f => f.name !== fileName);
-                    } else if (fileType === 'audio') {
-                        this.audioFiles = this.audioFiles.filter(f => f.name !== fileName);
-                    } else if (fileType === 'file') {
-                        this.textFiles = this.textFiles.filter(f => f.name !== fileName);
-                    }
                }
            }">
-                <!-- Model Selector with MCP Toggle -->
+                <!-- Model Selector -->
                <div class="mb-4">
                    <label class="block text-sm font-medium text-[#94A3B8] mb-2">Select Model</label>
-                    <div class="flex items-center gap-3">
-                        <select
-                            x-model="selectedModel"
-                            @change="$nextTick(() => checkMCPAvailability())"
-                            class="flex-1 bg-[#1E293B] text-[#E5E7EB] border border-[#38BDF8]/20 focus:border-[#38BDF8] focus:ring-2 focus:ring-[#38BDF8]/50 rounded-lg p-3 appearance-none"
-                            required
-                        >
-                            <option value="" disabled class="text-[#94A3B8]">Select a model to chat with...</option>
-                            {{ range .ModelsConfig }}
-                            {{ $cfg := . }}
-                            {{ $hasMCP := or (ne $cfg.MCP.Servers "") (ne $cfg.MCP.Stdio "") }}
-                            {{ range .KnownUsecaseStrings }}
-                                {{ if eq . "FLAG_CHAT" }}
-                                <option value="{{$cfg.Name}}" data-has-mcp="{{if $hasMCP}}true{{else}}false{{end}}" class="bg-[#1E293B] text-[#E5E7EB]">{{$cfg.Name}}</option>
-                                {{ end }}
+                    <select
+                        x-model="selectedModel"
+                        class="w-full bg-[#1E293B] text-[#E5E7EB] border border-[#38BDF8]/20 focus:border-[#38BDF8] focus:ring-2 focus:ring-[#38BDF8]/50 rounded-lg p-3 appearance-none"
+                        required
+                    >
+                        <option value="" disabled class="text-[#94A3B8]">Select a model to chat with...</option>
+                        {{ range .ModelsConfig }}
+                        {{ $cfg := . }}
+                        {{ range .KnownUsecaseStrings }}
+                            {{ if eq . "FLAG_CHAT" }}
+                            <option value="{{$cfg.Name}}" class="bg-[#1E293B] text-[#E5E7EB]">{{$cfg.Name}}</option>
                            {{ end }}
-                            {{ end }}
-                        </select>
-                        
-                        <!-- Compact MCP Toggle - Show only if MCP is available for selected model -->
-                        <div 
-                            x-show="mcpAvailable"
-                            class="flex items-center gap-2 px-3 py-2 text-xs rounded text-[#E5E7EB] bg-[#1E293B] border border-[#38BDF8]/20 whitespace-nowrap">
-                            <i class="fa-solid fa-plug text-[#38BDF8] text-sm"></i>
-                            <span class="text-[#94A3B8]">MCP</span>
-                            <label class="relative inline-flex items-center cursor-pointer ml-1">
-                                <input type="checkbox" id="index_mcp_toggle" class="sr-only peer" x-model="mcpMode">
-                                <div class="w-9 h-5 bg-[#101827] peer-focus:outline-none peer-focus:ring-2 peer-focus:ring-[#38BDF8]/30 rounded-full peer peer-checked:after:translate-x-full peer-checked:after:border-white after:content-[''] after:absolute after:top-[2px] after:left-[2px] after:bg-white after:border-[#1E293B] after:border after:rounded-full after:h-4 after:w-4 after:transition-all peer-checked:bg-[#38BDF8]"></div>
-                            </label>
-                        </div>
-                    </div>
-                    
-                    <!-- MCP Mode Notification - Compact tooltip style -->
-                    <div 
-                        x-show="mcpMode && mcpAvailable"
-                        class="mt-2 p-2 bg-[#38BDF8]/10 border border-[#38BDF8]/30 rounded text-[#94A3B8] text-xs">
-                        <div class="flex items-start space-x-2">
-                            <i class="fa-solid fa-info-circle text-[#38BDF8] mt-0.5 text-xs"></i>
-                            <p class="text-[#94A3B8]">Non-streaming mode active. Responses may take longer to process.</p>
-                        </div>
-                    </div>
+                        {{ end }}
+                        {{ end }}
+                    </select>
                </div>

                <!-- Input Bar -->
                <form @submit.prevent="startChat($event)" class="relative w-full">
-                    <!-- Attachment Tags - Show above input when files are attached -->
-                    <div x-show="attachedFiles.length > 0" class="mb-3 flex flex-wrap gap-2 items-center">
-                        <template x-for="(file, index) in attachedFiles" :key="index">
-                            <div class="inline-flex items-center gap-2 px-3 py-1.5 rounded-lg text-sm bg-[#38BDF8]/20 border border-[#38BDF8]/40 text-[#E5E7EB]">
-                                <i :class="file.type === 'image' ? 'fa-solid fa-image' : file.type === 'audio' ? 'fa-solid fa-microphone' : 'fa-solid fa-file'" class="text-[#38BDF8]"></i>
-                                <span x-text="file.name" class="max-w-[200px] truncate"></span>
-                                <button 
-                                    type="button"
-                                    @click="attachedFiles.splice(index, 1); removeAttachedFile(file.type, file.name)"
-                                    class="ml-1 text-[#94A3B8] hover:text-[#E5E7EB] transition-colors"
-                                    title="Remove attachment"
-                                >
-                                    <i class="fa-solid fa-times text-xs"></i>
-                                </button>
-                            </div>
-                        </template>
-                    </div>
-
                    <div class="relative w-full bg-[#1E293B] border border-[#38BDF8]/20 rounded-xl focus-within:ring-2 focus-within:ring-[#38BDF8]/50 focus-within:border-[#38BDF8] transition-all duration-200">
                        <textarea
                            x-model="inputValue"
@@ -387,6 +279,7 @@
                            @input="handleInput()"
                            rows="2"
                        ></textarea>
+                        <span x-show="fileName" x-text="fileName" class="absolute right-16 top-3 text-[#94A3B8] text-xs mr-2"></span>
                        
                        <!-- Attachment Buttons -->
                        <button
@@ -428,7 +321,7 @@
                    multiple
                    accept="image/*"
                    style="display: none;"
-                    @change="imageFiles = Array.from($event.target.files); handleFileSelection($event.target.files, 'image')"
+                    @change="imageFiles = Array.from($event.target.files); fileName = imageFiles.length > 0 ? imageFiles.length + ' image(s) selected' : ''"
                />
                <input
                    id="index_input_audio"
@@ -436,7 +329,7 @@
                    multiple
                    accept="audio/*"
                    style="display: none;"
-                    @change="audioFiles = Array.from($event.target.files); handleFileSelection($event.target.files, 'audio')"
+                    @change="audioFiles = Array.from($event.target.files); fileName = audioFiles.length > 0 ? audioFiles.length + ' audio file(s) selected' : ''"
                />
                <input
                    id="index_input_file"
@@ -444,7 +337,7 @@
                    multiple
                    accept=".txt,.md,.pdf"
                    style="display: none;"
-                    @change="textFiles = Array.from($event.target.files); handleFileSelection($event.target.files, 'file')"
+                    @change="textFiles = Array.from($event.target.files); fileName = textFiles.length > 0 ? textFiles.length + ' file(s) selected' : ''"
                />
            </div>

@@ -541,20 +434,12 @@ function startChat(event) {
        return;
    }

-    // Get MCP mode from checkbox (if available)
-    let mcpMode = false;
-    const mcpToggle = document.getElementById('index_mcp_toggle');
-    if (mcpToggle && mcpToggle.checked) {
-        mcpMode = true;
-    }
-
    // Store message and files in localStorage for chat page to pick up
    const chatData = {
        message: message,
        imageFiles: [],
        audioFiles: [],
-        textFiles: [],
-        mcpMode: mcpMode
+        textFiles: []
    };

    // Convert files to base64 for storage
--- a/core/http/views/manage.html
+++ b/core/http/views/manage.html
@@ -66,14 +66,6 @@
                <i class="fas fa-cogs mr-1.5 text-[10px]"></i>
                <span>Backend Gallery</span>
            </a>
-
-            {{ if not .DisableRuntimeSettings }}
-            <a href="/settings"
-               class="inline-flex items-center bg-[#1E293B] hover:bg-[#1E293B]/80 border border-[#38BDF8]/20 text-[#E5E7EB] py-1.5 px-3 rounded text-xs font-medium transition-colors">
-                <i class="fas fa-cog mr-1.5 text-[10px]"></i>
-                <span>Settings</span>
-            </a>
-            {{ end }}
        </div>

        <!-- Models Section -->
@@ -287,22 +279,10 @@
        <!-- Backends Section -->
        <div class="mt-8">
            <div class="mb-6">
-                <div class="flex items-center justify-between mb-1">
-                    <h2 class="text-2xl font-semibold text-[#E5E7EB] flex items-center">
-                        <i class="fas fa-cogs mr-2 text-[#8B5CF6] text-sm"></i>
-                        Installed Backends
-                    </h2>
-                    {{ if gt (len .InstalledBackends) 0 }}
-                    <button
-                        @click="reinstallAllBackends()"
-                        :disabled="reinstallingAll"
-                        class="inline-flex items-center bg-[#38BDF8] hover:bg-[#38BDF8]/80 disabled:opacity-50 disabled:cursor-not-allowed text-white py-1.5 px-3 rounded text-xs font-medium transition-colors"
-                        title="Reinstall all backends">
-                        <i class="fas fa-arrow-rotate-right mr-1.5 text-[10px]" :class="reinstallingAll ? 'fa-spin' : ''"></i>
-                        <span x-text="reinstallingAll ? 'Reinstalling...' : 'Reinstall All'"></span>
-                    </button>
-                    {{ end }}
-                </div>
+                <h2 class="text-2xl font-semibold text-[#E5E7EB] mb-1 flex items-center">
+                    <i class="fas fa-cogs mr-2 text-[#8B5CF6] text-sm"></i>
+                    Installed Backends
+                </h2>
                <p class="text-sm text-[#94A3B8] mb-4">
                    <span class="text-[#8B5CF6] font-medium">{{len .InstalledBackends}}</span> backend{{if gt (len .InstalledBackends) 1}}s{{end}} ready to use
                </p>
@@ -344,7 +324,7 @@
                    </thead>
                    <tbody>
                        {{ range .InstalledBackends }}
-                        <tr class="hover:bg-[#1E293B]/50 border-b border-[#1E293B] transition-colors" data-backend-name="{{.Name}}" data-is-system="{{.IsSystem}}">
+                        <tr class="hover:bg-[#1E293B]/50 border-b border-[#1E293B] transition-colors">
                            <!-- Name Column -->
                            <td class="p-2">
                                <div class="flex items-center gap-2">
@@ -398,13 +378,6 @@
                            <td class="p-2">
                                <div class="flex items-center justify-end gap-1">
                                    {{ if not .IsSystem }}
-                                    <button
-                                        @click="reinstallBackend('{{.Name}}')"
-                                        :disabled="reinstallingBackends['{{.Name}}']"
-                                        class="text-[#38BDF8]/60 hover:text-[#38BDF8] hover:bg-[#38BDF8]/10 disabled:opacity-50 disabled:cursor-not-allowed rounded p-1 transition-colors"
-                                        title="Reinstall {{.Name}}">
-                                        <i class="fas fa-arrow-rotate-right text-xs" :class="reinstallingBackends['{{.Name}}'] ? 'fa-spin' : ''"></i>
-                                    </button>
                                    <button
                                        @click="deleteBackend('{{.Name}}')"
                                        class="text-red-400/60 hover:text-red-400 hover:bg-red-500/10 rounded p-1 transition-colors"
@@ -433,13 +406,9 @@
 function indexDashboard() {
    return {
        notifications: [],
-        reinstallingBackends: {},
-        reinstallingAll: false,
-        backendJobs: {},
        
        init() {
-            // Poll for job progress every 600ms
-            setInterval(() => this.pollJobs(), 600);
+            // Initialize component
        },
        
        addNotification(message, type = 'success') {
@@ -453,137 +422,6 @@ function indexDashboard() {
            this.notifications = this.notifications.filter(n => n.id !== id);
        },
        
-        async reinstallBackend(backendName) {
-            if (this.reinstallingBackends[backendName]) {
-                return; // Already reinstalling
-            }
-            
-            try {
-                this.reinstallingBackends[backendName] = true;
-                const response = await fetch(`/api/backends/install/${encodeURIComponent(backendName)}`, {
-                    method: 'POST'
-                });
-                
-                const data = await response.json();
-                
-                if (response.ok && data.jobID) {
-                    this.backendJobs[backendName] = data.jobID;
-                    this.addNotification(`Reinstalling backend "${backendName}"...`, 'success');
-                } else {
-                    this.reinstallingBackends[backendName] = false;
-                    this.addNotification(`Failed to start reinstall: ${data.error || 'Unknown error'}`, 'error');
-                }
-            } catch (error) {
-                console.error('Error reinstalling backend:', error);
-                this.reinstallingBackends[backendName] = false;
-                this.addNotification(`Failed to reinstall backend: ${error.message}`, 'error');
-            }
-        },
-        
-        async reinstallAllBackends() {
-            if (this.reinstallingAll) {
-                return; // Already reinstalling
-            }
-            
-            if (!confirm('Are you sure you want to reinstall all backends? This may take some time.')) {
-                return;
-            }
-            
-            this.reinstallingAll = true;
-            
-            // Get all non-system backends from the page using data attributes
-            const backendRows = document.querySelectorAll('tr[data-backend-name]');
-            const backendsToReinstall = [];
-            
-            backendRows.forEach(row => {
-                const backendName = row.getAttribute('data-backend-name');
-                const isSystem = row.getAttribute('data-is-system') === 'true';
-                if (backendName && !isSystem && !this.reinstallingBackends[backendName]) {
-                    backendsToReinstall.push(backendName);
-                }
-            });
-            
-            if (backendsToReinstall.length === 0) {
-                this.reinstallingAll = false;
-                this.addNotification('No backends available to reinstall', 'error');
-                return;
-            }
-            
-            this.addNotification(`Starting reinstall of ${backendsToReinstall.length} backend(s)...`, 'success');
-            
-            // Reinstall all backends sequentially to avoid overwhelming the system
-            for (const backendName of backendsToReinstall) {
-                await this.reinstallBackend(backendName);
-                // Small delay between installations
-                await new Promise(resolve => setTimeout(resolve, 500));
-            }
-            
-            // Don't set reinstallingAll to false here - let pollJobs handle it when all jobs complete
-            // This allows the UI to show the batch operation is in progress
-        },
-        
-        async pollJobs() {
-            for (const [backendName, jobID] of Object.entries(this.backendJobs)) {
-                try {
-                    const response = await fetch(`/api/backends/job/${jobID}`);
-                    const jobData = await response.json();
-                    
-                    if (jobData.completed) {
-                        delete this.backendJobs[backendName];
-                        this.reinstallingBackends[backendName] = false;
-                        this.addNotification(`Backend "${backendName}" reinstalled successfully!`, 'success');
-                        
-                        // Only reload if not in batch mode and no other jobs are running
-                        if (!this.reinstallingAll && Object.keys(this.backendJobs).length === 0) {
-                            setTimeout(() => {
-                                window.location.reload();
-                            }, 1500);
-                        }
-                    }
-                    
-                    if (jobData.error || (jobData.message && jobData.message.startsWith('error:'))) {
-                        delete this.backendJobs[backendName];
-                        this.reinstallingBackends[backendName] = false;
-                        let errorMessage = 'Unknown error';
-                        if (typeof jobData.error === 'string') {
-                            errorMessage = jobData.error;
-                        } else if (jobData.error && typeof jobData.error === 'object') {
-                            const errorKeys = Object.keys(jobData.error);
-                            if (errorKeys.length > 0) {
-                                errorMessage = jobData.error.message || jobData.error.error || jobData.error.Error || JSON.stringify(jobData.error);
-                            } else {
-                                errorMessage = jobData.message || 'Unknown error';
-                            }
-                        } else if (jobData.message) {
-                            errorMessage = jobData.message;
-                        }
-                        if (errorMessage.startsWith('error: ')) {
-                            errorMessage = errorMessage.substring(7);
-                        }
-                        this.addNotification(`Error reinstalling backend "${backendName}": ${errorMessage}`, 'error');
-                        
-                        // If batch mode and all jobs are done (completed or errored), reload
-                        if (this.reinstallingAll && Object.keys(this.backendJobs).length === 0) {
-                            this.reinstallingAll = false;
-                            setTimeout(() => {
-                                window.location.reload();
-                            }, 2000);
-                        }
-                    }
-                } catch (error) {
-                    console.error('Error polling job:', error);
-                }
-            }
-            
-            // If batch mode completed and no jobs left, reload
-            if (this.reinstallingAll && Object.keys(this.backendJobs).length === 0) {
-                this.reinstallingAll = false;
-                setTimeout(() => {
-                    window.location.reload();
-                }, 2000);
-            }
-        },
-        
        async deleteBackend(backendName) {
            if (!confirm(`Are you sure you want to delete the backend "${backendName}"?`)) {
                return;
--- a/core/http/views/model-editor.html
+++ b/core/http/views/model-editor.html
@@ -77,197 +77,18 @@

                <!-- URI Input -->
                <div>
-                    <div class="flex items-center justify-between mb-2">
-                        <label class="block text-sm font-medium text-[#94A3B8]">
-                            <i class="fas fa-link mr-2"></i>Model URI
-                        </label>
-                        <div class="flex gap-2">
-                            <a href="https://huggingface.co/models?search=gguf&sort=trending" 
-                               target="_blank"
-                               class="text-xs px-3 py-1.5 rounded-lg bg-purple-600/20 hover:bg-purple-600/30 text-purple-300 border border-purple-500/30 transition-all flex items-center gap-1.5">
-                                <i class="fab fa-huggingface"></i>
-                                <span>Search GGUF Models on Hugging Face</span>
-                                <i class="fas fa-external-link-alt text-xs"></i>
-                            </a>
-                            <a href="https://huggingface.co/models?sort=trending" 
-                               target="_blank"
-                               class="text-xs px-3 py-1.5 rounded-lg bg-purple-600/20 hover:bg-purple-600/30 text-purple-300 border border-purple-500/30 transition-all flex items-center gap-1.5">
-                                <i class="fab fa-huggingface"></i>
-                                <span>Browse All Models on Hugging Face</span>
-                                <i class="fas fa-external-link-alt text-xs"></i>
-                            </a>
-                        </div>
-                    </div>
+                    <label class="block text-sm font-medium text-[#94A3B8] mb-2">
+                        <i class="fas fa-link mr-2"></i>Model URI
+                    </label>
                    <input 
                        x-model="importUri"
                        type="text" 
-                        placeholder="huggingface://TheBloke/Llama-2-7B-Chat-GGUF or https://example.com/model.gguf"
+                        placeholder="https://example.com/model.gguf or file:///path/to/model.gguf"
                        class="w-full px-4 py-3 bg-[#101827] border border-[#1E293B] rounded-lg text-[#E5E7EB] focus:border-green-500 focus:ring-2 focus:ring-green-500/50 focus:outline-none transition-colors"
                        :disabled="isSubmitting">
                    <p class="mt-2 text-xs text-[#94A3B8]">
                        Enter the URI or path to the model file you want to import
                    </p>
-                    
-                    <!-- URI Format Guide -->
-                    <div class="mt-4" x-data="{ showGuide: false }">
-                        <button @click="showGuide = !showGuide" 
-                                class="flex items-center gap-2 text-sm text-[#94A3B8] hover:text-[#E5E7EB] transition-colors">
-                            <i class="fas" :class="showGuide ? 'fa-chevron-down' : 'fa-chevron-right'"></i>
-                            <i class="fas fa-info-circle"></i>
-                            <span>Supported URI Formats</span>
-                        </button>
-                        
-                        <div x-show="showGuide" 
-                             x-transition:enter="transition ease-out duration-200"
-                             x-transition:enter-start="opacity-0 transform -translate-y-2"
-                             x-transition:enter-end="opacity-100 transform translate-y-0"
-                             class="mt-3 p-4 bg-[#101827] border border-[#1E293B] rounded-lg space-y-4">
-                            
-                            <!-- HuggingFace -->
-                            <div>
-                                <h4 class="text-sm font-semibold text-[#E5E7EB] mb-2 flex items-center gap-2">
-                                    <i class="fab fa-huggingface text-purple-400"></i>
-                                    HuggingFace
-                                </h4>
-                                <div class="space-y-1.5 text-xs text-[#94A3B8] font-mono pl-6">
-                                    <div class="flex items-start gap-2">
-                                        <span class="text-green-400">•</span>
-                                        <div>
-                                            <code class="text-[#10B981]">huggingface://</code><span class="text-[#94A3B8]">TheBloke/Llama-2-7B-Chat-GGUF</span>
-                                            <p class="text-[#6B7280] mt-0.5">Standard HuggingFace format</p>
-                                        </div>
-                                    </div>
-                                    <div class="flex items-start gap-2">
-                                        <span class="text-green-400">•</span>
-                                        <div>
-                                            <code class="text-[#10B981]">hf://</code><span class="text-[#94A3B8]">TheBloke/Llama-2-7B-Chat-GGUF</span>
-                                            <p class="text-[#6B7280] mt-0.5">Short HuggingFace format</p>
-                                        </div>
-                                    </div>
-                                    <div class="flex items-start gap-2">
-                                        <span class="text-green-400">•</span>
-                                        <div>
-                                            <code class="text-[#10B981]">https://huggingface.co/</code><span class="text-[#94A3B8]">TheBloke/Llama-2-7B-Chat-GGUF</span>
-                                            <p class="text-[#6B7280] mt-0.5">Full HuggingFace URL</p>
-                                        </div>
-                                    </div>
-                                </div>
-                            </div>
-                            
-                            <!-- HTTP/HTTPS -->
-                            <div>
-                                <h4 class="text-sm font-semibold text-[#E5E7EB] mb-2 flex items-center gap-2">
-                                    <i class="fas fa-globe text-blue-400"></i>
-                                    HTTP/HTTPS URLs
-                                </h4>
-                                <div class="space-y-1.5 text-xs text-[#94A3B8] font-mono pl-6">
-                                    <div class="flex items-start gap-2">
-                                        <span class="text-green-400">•</span>
-                                        <div>
-                                            <code class="text-[#10B981]">https://</code><span class="text-[#94A3B8]">example.com/model.gguf</span>
-                                            <p class="text-[#6B7280] mt-0.5">Direct download from any HTTPS URL</p>
-                                        </div>
-                                    </div>
-                                </div>
-                            </div>
-                            
-                            <!-- Local Files -->
-                            <div>
-                                <h4 class="text-sm font-semibold text-[#E5E7EB] mb-2 flex items-center gap-2">
-                                    <i class="fas fa-file text-yellow-400"></i>
-                                    Local Files
-                                </h4>
-                                <div class="space-y-1.5 text-xs text-[#94A3B8] font-mono pl-6">
-                                    <div class="flex items-start gap-2">
-                                        <span class="text-green-400">•</span>
-                                        <div>
-                                            <code class="text-[#10B981]">file://</code><span class="text-[#94A3B8]">/path/to/model.gguf</span>
-                                            <p class="text-[#6B7280] mt-0.5">Local file path (absolute)</p>
-                                        </div>
-                                    </div>
-                                    <div class="flex items-start gap-2">
-                                        <span class="text-green-400">•</span>
-                                        <div>
-                                            <code class="text-[#94A3B8]">/path/to/model.yaml</code>
-                                            <p class="text-[#6B7280] mt-0.5">Direct local YAML config file</p>
-                                        </div>
-                                    </div>
-                                </div>
-                            </div>
-                            
-                            <!-- OCI -->
-                            <div>
-                                <h4 class="text-sm font-semibold text-[#E5E7EB] mb-2 flex items-center gap-2">
-                                    <i class="fas fa-box text-cyan-400"></i>
-                                    OCI Registry
-                                </h4>
-                                <div class="space-y-1.5 text-xs text-[#94A3B8] font-mono pl-6">
-                                    <div class="flex items-start gap-2">
-                                        <span class="text-green-400">•</span>
-                                        <div>
-                                            <code class="text-[#10B981]">oci://</code><span class="text-[#94A3B8]">registry.example.com/model:tag</span>
-                                            <p class="text-[#6B7280] mt-0.5">OCI container registry</p>
-                                        </div>
-                                    </div>
-                                    <div class="flex items-start gap-2">
-                                        <span class="text-green-400">•</span>
-                                        <div>
-                                            <code class="text-[#10B981]">ocifile://</code><span class="text-[#94A3B8]">/path/to/image.tar</span>
-                                            <p class="text-[#6B7280] mt-0.5">Local OCI tarball file</p>
-                                        </div>
-                                    </div>
-                                </div>
-                            </div>
-                            
-                            <!-- Ollama -->
-                            <div>
-                                <h4 class="text-sm font-semibold text-[#E5E7EB] mb-2 flex items-center gap-2">
-                                    <i class="fas fa-cube text-indigo-400"></i>
-                                    Ollama
-                                </h4>
-                                <div class="space-y-1.5 text-xs text-[#94A3B8] font-mono pl-6">
-                                    <div class="flex items-start gap-2">
-                                        <span class="text-green-400">•</span>
-                                        <div>
-                                            <code class="text-[#10B981]">ollama://</code><span class="text-[#94A3B8]">llama2:7b</span>
-                                            <p class="text-[#6B7280] mt-0.5">Ollama model format</p>
-                                        </div>
-                                    </div>
-                                </div>
-                            </div>
-                            
-                            <!-- YAML Config Files -->
-                            <div>
-                                <h4 class="text-sm font-semibold text-[#E5E7EB] mb-2 flex items-center gap-2">
-                                    <i class="fas fa-code text-pink-400"></i>
-                                    YAML Configuration Files
-                                </h4>
-                                <div class="space-y-1.5 text-xs text-[#94A3B8] font-mono pl-6">
-                                    <div class="flex items-start gap-2">
-                                        <span class="text-green-400">•</span>
-                                        <div>
-                                            <code class="text-[#94A3B8]">https://example.com/model.yaml</code>
-                                            <p class="text-[#6B7280] mt-0.5">Remote YAML config file</p>
-                                        </div>
-                                    </div>
-                                    <div class="flex items-start gap-2">
-                                        <span class="text-green-400">•</span>
-                                        <div>
-                                            <code class="text-[#94A3B8]">file:///path/to/config.yaml</code>
-                                            <p class="text-[#6B7280] mt-0.5">Local YAML config file</p>
-                                        </div>
-                                    </div>
-                                </div>
-                            </div>
-                            
-                            <div class="pt-2 mt-3 border-t border-[#1E293B]">
-                                <p class="text-xs text-[#6B7280] italic">
-                                    <i class="fas fa-lightbulb mr-1.5 text-yellow-400"></i>
-                                    Tip: For HuggingFace models, you can use any of the three formats. The system will automatically detect and download the appropriate model files.
-                                </p>
-                            </div>
-                        </div>
-                    </div>
                </div>

                <!-- Preferences Section -->
@@ -299,7 +120,6 @@
                                <option value="mlx-vlm">mlx-vlm</option>
                                <option value="transformers">transformers</option>
                                <option value="vllm">vllm</option>
-                                <option value="diffusers">diffusers</option>
                            </select>
                            <p class="mt-1 text-xs text-gray-400">
                                Force a specific backend. Leave empty to auto-detect from URI.
@@ -402,71 +222,6 @@
                                Model type for transformers backend. Examples: AutoModelForCausalLM, SentenceTransformer, Mamba, MusicgenForConditionalGeneration. Leave empty to use default (AutoModelForCausalLM).
                            </p>
                        </div>
-                        
-                        <!-- Pipeline Type (Diffusers) -->
-                        <div x-show="commonPreferences.backend === 'diffusers'">
-                            <label class="block text-sm font-medium text-gray-300 mb-2">
-                                <i class="fas fa-stream mr-2"></i>Pipeline Type
-                            </label>
-                            <input 
-                                x-model="commonPreferences.pipeline_type"
-                                type="text" 
-                                placeholder="StableDiffusionPipeline (for diffusers backend)"
-                                class="w-full px-4 py-2 bg-gray-900/90 border border-gray-700/70 rounded-lg text-gray-200 focus:border-green-500 focus:ring-2 focus:ring-green-500/50 focus:outline-none transition-all"
-                                :disabled="isSubmitting">
-                            <p class="mt-1 text-xs text-gray-400">
-                                Pipeline type for diffusers backend. Examples: StableDiffusionPipeline, StableDiffusion3Pipeline, FluxPipeline. Leave empty to use default (StableDiffusionPipeline).
-                            </p>
-                        </div>
-                        
-                        <!-- Scheduler Type (Diffusers) -->
-                        <div x-show="commonPreferences.backend === 'diffusers'">
-                            <label class="block text-sm font-medium text-gray-300 mb-2">
-                                <i class="fas fa-clock mr-2"></i>Scheduler Type
-                            </label>
-                            <input 
-                                x-model="commonPreferences.scheduler_type"
-                                type="text" 
-                                placeholder="k_dpmpp_2m (optional)"
-                                class="w-full px-4 py-2 bg-gray-900/90 border border-gray-700/70 rounded-lg text-gray-200 focus:border-green-500 focus:ring-2 focus:ring-green-500/50 focus:outline-none transition-all"
-                                :disabled="isSubmitting">
-                            <p class="mt-1 text-xs text-gray-400">
-                                Scheduler type for diffusers backend. Examples: k_dpmpp_2m, euler_a, ddim. Leave empty to use model default.
-                            </p>
-                        </div>
-                        
-                        <!-- Enable Parameters (Diffusers) -->
-                        <div x-show="commonPreferences.backend === 'diffusers'">
-                            <label class="block text-sm font-medium text-gray-300 mb-2">
-                                <i class="fas fa-cogs mr-2"></i>Enable Parameters
-                            </label>
-                            <input 
-                                x-model="commonPreferences.enable_parameters"
-                                type="text" 
-                                placeholder="negative_prompt,num_inference_steps (comma-separated)"
-                                class="w-full px-4 py-2 bg-gray-900/90 border border-gray-700/70 rounded-lg text-gray-200 focus:border-green-500 focus:ring-2 focus:ring-green-500/50 focus:outline-none transition-all"
-                                :disabled="isSubmitting">
-                            <p class="mt-1 text-xs text-gray-400">
-                                Enabled parameters for diffusers backend (comma-separated). Leave empty to use default (negative_prompt,num_inference_steps).
-                            </p>
-                        </div>
-                        
-                        <!-- CUDA (Diffusers) -->
-                        <div x-show="commonPreferences.backend === 'diffusers'">
-                            <label class="flex items-center cursor-pointer">
-                                <input 
-                                    x-model="commonPreferences.cuda"
-                                    type="checkbox" 
-                                    class="w-5 h-5 rounded bg-gray-900/90 border-gray-700/70 text-green-500 focus:ring-2 focus:ring-green-500/50 focus:outline-none transition-all cursor-pointer"
-                                    :disabled="isSubmitting">
-                                <span class="ml-3 text-sm font-medium text-gray-300">
-                                    <i class="fas fa-microchip mr-2"></i>CUDA
-                                </span>
-                            </label>
-                            <p class="mt-1 ml-8 text-xs text-gray-400">
-                                Enable CUDA support for GPU acceleration with diffusers backend.
-                            </p>
-                        </div>
                    </div>
                    
                    <!-- Custom Preferences -->
@@ -724,11 +479,7 @@ function importModel() {
            quantizations: '',
            mmproj_quantizations: '',
            embeddings: false,
-            type: '',
-            pipeline_type: '',
-            scheduler_type: '',
-            enable_parameters: '',
-            cuda: false
+            type: ''
        },
        isSubmitting: false,
        currentJobId: null,
@@ -803,18 +554,6 @@ function importModel() {
                if (this.commonPreferences.type && this.commonPreferences.type.trim()) {
                    prefsObj.type = this.commonPreferences.type.trim();
                }
-                if (this.commonPreferences.pipeline_type && this.commonPreferences.pipeline_type.trim()) {
-                    prefsObj.pipeline_type = this.commonPreferences.pipeline_type.trim();
-                }
-                if (this.commonPreferences.scheduler_type && this.commonPreferences.scheduler_type.trim()) {
-                    prefsObj.scheduler_type = this.commonPreferences.scheduler_type.trim();
-                }
-                if (this.commonPreferences.enable_parameters && this.commonPreferences.enable_parameters.trim()) {
-                    prefsObj.enable_parameters = this.commonPreferences.enable_parameters.trim();
-                }
-                if (this.commonPreferences.cuda) {
-                    prefsObj.cuda = true;
-                }
                
                // Add custom preferences (can override common ones)
                this.preferences.forEach(pref => {
@@ -890,33 +629,11 @@ function importModel() {
                        setTimeout(() => {
                            window.location.reload();
                        }, 2000);
-                    } else if (jobData.error || (jobData.message && jobData.message.startsWith('error:'))) {
+                    } else if (jobData.error) {
                        clearInterval(this.jobPollInterval);
                        this.isSubmitting = false;
                        this.currentJobId = null;
-                        // Extract error message - handle both string and object errors
-                        let errorMessage = 'Unknown error';
-                        if (typeof jobData.error === 'string') {
-                            errorMessage = jobData.error;
-                        } else if (jobData.error && typeof jobData.error === 'object') {
-                            // Check if error object has any properties
-                            const errorKeys = Object.keys(jobData.error);
-                            if (errorKeys.length > 0) {
-                                // Try common error object properties
-                                errorMessage = jobData.error.message || jobData.error.error || jobData.error.Error || JSON.stringify(jobData.error);
-                            } else {
-                                // Empty object {}, fall back to message field
-                                errorMessage = jobData.message || 'Unknown error';
-                            }
-                        } else if (jobData.message) {
-                            // Use message field if error is not present or is empty
-                            errorMessage = jobData.message;
-                        }
-                        // Remove "error: " prefix if present
-                        if (errorMessage.startsWith('error: ')) {
-                            errorMessage = errorMessage.substring(7);
-                        }
-                        this.showAlert('error', 'Import failed: ' + errorMessage);
+                        this.showAlert('error', 'Import failed: ' + jobData.error);
                    }
                } catch (error) {
                    console.error('Error polling job status:', error);
--- a/core/http/views/models.html
+++ b/core/http/views/models.html
@@ -714,33 +714,11 @@ function modelsGallery() {
                        this.fetchModels();
                    }

-                    if (jobData.error || (jobData.message && jobData.message.startsWith('error:'))) {
+                    if (jobData.error) {
                        model.processing = false;
                        delete this.jobProgress[model.jobID];
                        const action = model.isDeletion ? 'deleting' : 'installing';
-                        // Extract error message - handle both string and object errors
-                        let errorMessage = 'Unknown error';
-                        if (typeof jobData.error === 'string') {
-                            errorMessage = jobData.error;
-                        } else if (jobData.error && typeof jobData.error === 'object') {
-                            // Check if error object has any properties
-                            const errorKeys = Object.keys(jobData.error);
-                            if (errorKeys.length > 0) {
-                                // Try common error object properties
-                                errorMessage = jobData.error.message || jobData.error.error || jobData.error.Error || JSON.stringify(jobData.error);
-                            } else {
-                                // Empty object {}, fall back to message field
-                                errorMessage = jobData.message || 'Unknown error';
-                            }
-                        } else if (jobData.message) {
-                            // Use message field if error is not present or is empty
-                            errorMessage = jobData.message;
-                        }
-                        // Remove "error: " prefix if present
-                        if (errorMessage.startsWith('error: ')) {
-                            errorMessage = errorMessage.substring(7);
-                        }
-                        this.addNotification(`Error ${action} model "${model.name}": ${errorMessage}`, 'error');
+                        this.addNotification(`Error ${action} model "${model.name}": ${jobData.error}`, 'error');
                    }
                } catch (error) {
                    console.error('Error polling job:', error);
--- a/core/http/views/partials/navbar.html
+++ b/core/http/views/partials/navbar.html
@@ -1,12 +1,12 @@
 <nav class="bg-[#101827] shadow-2xl border-b border-[#1E293B]">
-    <div class="container mx-auto px-4 py-2">
+    <div class="container mx-auto px-4 py-3">
        <div class="flex items-center justify-between">
            <div class="flex items-center">
                <!-- Logo Image -->
                <a href="./" class="flex items-center group">
                    <img src="static/logo_horizontal.png" 
                         alt="LocalAI Logo" 
-                         class="h-10 mr-3 brightness-110 transition-all duration-300 group-hover:brightness-125 group-hover:drop-shadow-[0_0_8px_rgba(56,189,248,0.5)]">
+                         class="h-14 mr-3 brightness-110 transition-all duration-300 group-hover:brightness-125 group-hover:drop-shadow-[0_0_8px_rgba(56,189,248,0.5)]">
                </a>
            </div>
            
--- a/core/http/views/settings.html
+++ b/core/http/views/settings.html
@@ -1,653 +0,0 @@
-<!DOCTYPE html>
-<html lang="en">
-{{template "views/partials/head" .}}
-
-<body class="bg-[#101827] text-[#E5E7EB]">
-<div class="flex flex-col min-h-screen" x-data="settingsDashboard()">
-
-    {{template "views/partials/navbar" .}}
-
-    <!-- Notifications -->
-    <div class="fixed top-20 right-4 z-50 space-y-2" style="max-width: 400px;">
-        <template x-for="notification in notifications" :key="notification.id">
-            <div x-show="true" 
-                 x-transition:enter="transition ease-out duration-200"
-                 x-transition:enter-start="opacity-0"
-                 x-transition:enter-end="opacity-100"
-                 x-transition:leave="transition ease-in duration-150"
-                 x-transition:leave-start="opacity-100"
-                 x-transition:leave-end="opacity-0"
-                 :class="notification.type === 'error' ? 'bg-red-500' : 'bg-green-500'"
-                 class="rounded-lg p-4 text-white flex items-start space-x-3">
-                <div class="flex-shrink-0">
-                    <i :class="notification.type === 'error' ? 'fas fa-exclamation-circle' : 'fas fa-check-circle'" class="text-xl"></i>
-                </div>
-                <div class="flex-1 min-w-0">
-                    <p class="text-sm font-medium break-words" x-text="notification.message"></p>
-                </div>
-                <button @click="dismissNotification(notification.id)" class="flex-shrink-0 text-white hover:opacity-80 transition-opacity">
-                    <i class="fas fa-times"></i>
-                </button>
-            </div>
-        </template>
-    </div>
-
-    <div class="container mx-auto px-4 py-6 flex-grow max-w-4xl">
-        <!-- Header -->
-        <div class="mb-6">
-            <div class="flex items-center justify-between mb-2">
-                <h1 class="text-2xl font-semibold text-[#E5E7EB]">
-                    Application Settings
-                </h1>
-                <a href="/manage" 
-                   class="inline-flex items-center text-[#94A3B8] hover:text-[#E5E7EB] transition-colors">
-                    <i class="fas fa-arrow-left mr-2 text-sm"></i>
-                    <span class="text-sm">Back to Manage</span>
-                </a>
-            </div>
-            <p class="text-sm text-[#94A3B8]">Configure watchdog and backend request settings</p>
-        </div>
-
-        <!-- Settings Form -->
-        <form @submit.prevent="saveSettings()" class="space-y-6">
-            <!-- Watchdog Settings Section -->
-            <div class="bg-[#1E293B] border border-[#38BDF8]/20 rounded-lg p-6">
-                <h2 class="text-xl font-semibold text-[#E5E7EB] mb-4 flex items-center">
-                    <i class="fas fa-shield-alt mr-2 text-[#38BDF8] text-sm"></i>
-                    Watchdog Settings
-                </h2>
-                <p class="text-xs text-[#94A3B8] mb-4">
-                    Configure automatic monitoring and management of backend processes
-                </p>
-
-                <div class="space-y-4">
-                    <!-- Enable Watchdog -->
-                    <div class="flex items-center justify-between">
-                        <div>
-                            <label class="text-sm font-medium text-[#E5E7EB]">Enable Watchdog</label>
-                            <p class="text-xs text-[#94A3B8] mt-1">Enable automatic monitoring of backend processes</p>
-                        </div>
-                        <label class="relative inline-flex items-center cursor-pointer">
-                            <input type="checkbox" x-model="settings.watchdog_enabled" 
-                                   @change="updateWatchdogEnabled()"
-                                   class="sr-only peer">
-                            <div class="w-11 h-6 bg-[#101827] peer-focus:outline-none peer-focus:ring-4 peer-focus:ring-[#38BDF8]/20 rounded-full peer peer-checked:after:translate-x-full peer-checked:after:border-white after:content-[''] after:absolute after:top-[2px] after:left-[2px] after:bg-white after:border-gray-300 after:border after:rounded-full after:h-5 after:w-5 after:transition-all peer-checked:bg-[#38BDF8]"></div>
-                        </label>
-                    </div>
-
-                    <!-- Enable Idle Check -->
-                    <div class="flex items-center justify-between">
-                        <div>
-                            <label class="text-sm font-medium text-[#E5E7EB]">Enable Idle Check</label>
-                            <p class="text-xs text-[#94A3B8] mt-1">Automatically stop backends that are idle for too long</p>
-                        </div>
-                        <label class="relative inline-flex items-center cursor-pointer">
-                            <input type="checkbox" x-model="settings.watchdog_idle_enabled" 
-                                   :disabled="!settings.watchdog_enabled"
-                                   class="sr-only peer" :class="!settings.watchdog_enabled ? 'opacity-50' : ''">
-                            <div class="w-11 h-6 bg-[#101827] peer-focus:outline-none peer-focus:ring-4 peer-focus:ring-[#38BDF8]/20 rounded-full peer peer-checked:after:translate-x-full peer-checked:after:border-white after:content-[''] after:absolute after:top-[2px] after:left-[2px] after:bg-white after:border-gray-300 after:border after:rounded-full after:h-5 after:w-5 after:transition-all peer-checked:bg-[#38BDF8]"></div>
-                        </label>
-                    </div>
-
-                    <!-- Idle Timeout -->
-                    <div>
-                        <label class="block text-sm font-medium text-[#E5E7EB] mb-2">Idle Timeout</label>
-                        <p class="text-xs text-[#94A3B8] mb-2">Time before an idle backend is stopped (e.g., 15m, 1h)</p>
-                        <input type="text" x-model="settings.watchdog_idle_timeout" 
-                               :disabled="!settings.watchdog_idle_enabled"
-                               placeholder="15m"
-                               class="w-full px-3 py-2 bg-[#101827] border border-[#38BDF8]/20 rounded text-sm text-[#E5E7EB] focus:outline-none focus:ring-2 focus:ring-[#38BDF8]/50"
-                               :class="!settings.watchdog_idle_enabled ? 'opacity-50 cursor-not-allowed' : ''">
-                    </div>
-
-                    <!-- Enable Busy Check -->
-                    <div class="flex items-center justify-between">
-                        <div>
-                            <label class="text-sm font-medium text-[#E5E7EB]">Enable Busy Check</label>
-                            <p class="text-xs text-[#94A3B8] mt-1">Automatically stop backends that are busy for too long (stuck processes)</p>
-                        </div>
-                        <label class="relative inline-flex items-center cursor-pointer">
-                            <input type="checkbox" x-model="settings.watchdog_busy_enabled" 
-                                   :disabled="!settings.watchdog_enabled"
-                                   class="sr-only peer" :class="!settings.watchdog_enabled ? 'opacity-50' : ''">
-                            <div class="w-11 h-6 bg-[#101827] peer-focus:outline-none peer-focus:ring-4 peer-focus:ring-[#38BDF8]/20 rounded-full peer peer-checked:after:translate-x-full peer-checked:after:border-white after:content-[''] after:absolute after:top-[2px] after:left-[2px] after:bg-white after:border-gray-300 after:border after:rounded-full after:h-5 after:w-5 after:transition-all peer-checked:bg-[#38BDF8]"></div>
-                        </label>
-                    </div>
-
-                    <!-- Busy Timeout -->
-                    <div>
-                        <label class="block text-sm font-medium text-[#E5E7EB] mb-2">Busy Timeout</label>
-                        <p class="text-xs text-[#94A3B8] mb-2">Time before a busy backend is stopped (e.g., 5m, 30m)</p>
-                        <input type="text" x-model="settings.watchdog_busy_timeout" 
-                               :disabled="!settings.watchdog_busy_enabled"
-                               placeholder="5m"
-                               class="w-full px-3 py-2 bg-[#101827] border border-[#38BDF8]/20 rounded text-sm text-[#E5E7EB] focus:outline-none focus:ring-2 focus:ring-[#38BDF8]/50"
-                               :class="!settings.watchdog_busy_enabled ? 'opacity-50 cursor-not-allowed' : ''">
-                    </div>
-                </div>
-            </div>
-
-            <!-- Backend Request Settings Section -->
-            <div class="bg-[#1E293B] border border-[#8B5CF6]/20 rounded-lg p-6">
-                <h2 class="text-xl font-semibold text-[#E5E7EB] mb-4 flex items-center">
-                    <i class="fas fa-cogs mr-2 text-[#8B5CF6] text-sm"></i>
-                    Backend Request Settings
-                </h2>
-                <p class="text-xs text-[#94A3B8] mb-4">
-                    Configure how backends handle multiple requests
-                </p>
-
-                <div class="space-y-4">
-                    <!-- Single Backend Mode -->
-                    <div class="flex items-center justify-between">
-                        <div>
-                            <label class="text-sm font-medium text-[#E5E7EB]">Single Backend Mode</label>
-                            <p class="text-xs text-[#94A3B8] mt-1">Allow only one backend to be active at a time</p>
-                        </div>
-                        <label class="relative inline-flex items-center cursor-pointer">
-                            <input type="checkbox" x-model="settings.single_backend" 
-                                   class="sr-only peer">
-                            <div class="w-11 h-6 bg-[#101827] peer-focus:outline-none peer-focus:ring-4 peer-focus:ring-[#8B5CF6]/20 rounded-full peer peer-checked:after:translate-x-full peer-checked:after:border-white after:content-[''] after:absolute after:top-[2px] after:left-[2px] after:bg-white after:border-gray-300 after:border after:rounded-full after:h-5 after:w-5 after:transition-all peer-checked:bg-[#8B5CF6]"></div>
-                        </label>
-                    </div>
-
-                    <!-- Parallel Backend Requests -->
-                    <div class="flex items-center justify-between">
-                        <div>
-                            <label class="text-sm font-medium text-[#E5E7EB]">Parallel Backend Requests</label>
-                            <p class="text-xs text-[#94A3B8] mt-1">Enable backends to handle multiple requests in parallel (if supported)</p>
-                        </div>
-                        <label class="relative inline-flex items-center cursor-pointer">
-                            <input type="checkbox" x-model="settings.parallel_backend_requests" 
-                                   class="sr-only peer">
-                            <div class="w-11 h-6 bg-[#101827] peer-focus:outline-none peer-focus:ring-4 peer-focus:ring-[#8B5CF6]/20 rounded-full peer peer-checked:after:translate-x-full peer-checked:after:border-white after:content-[''] after:absolute after:top-[2px] after:left-[2px] after:bg-white after:border-gray-300 after:border after:rounded-full after:h-5 after:w-5 after:transition-all peer-checked:bg-[#8B5CF6]"></div>
-                        </label>
-                    </div>
-                </div>
-            </div>
-
-            <!-- Performance Settings Section -->
-            <div class="bg-[#1E293B] border border-[#10B981]/20 rounded-lg p-6">
-                <h2 class="text-xl font-semibold text-[#E5E7EB] mb-4 flex items-center">
-                    <i class="fas fa-tachometer-alt mr-2 text-[#10B981] text-sm"></i>
-                    Performance Settings
-                </h2>
-                <p class="text-xs text-[#94A3B8] mb-4">
-                    Configure default performance parameters for models
-                </p>
-
-                <div class="space-y-4">
-                    <!-- Threads -->
-                    <div>
-                        <label class="block text-sm font-medium text-[#E5E7EB] mb-2">Default Threads</label>
-                        <p class="text-xs text-[#94A3B8] mb-2">Number of threads to use for model inference (0 = auto)</p>
-                        <input type="number" x-model="settings.threads" 
-                               min="0"
-                               placeholder="0"
-                               class="w-full px-3 py-2 bg-[#101827] border border-[#10B981]/20 rounded text-sm text-[#E5E7EB] focus:outline-none focus:ring-2 focus:ring-[#10B981]/50">
-                    </div>
-
-                    <!-- Context Size -->
-                    <div>
-                        <label class="block text-sm font-medium text-[#E5E7EB] mb-2">Default Context Size</label>
-                        <p class="text-xs text-[#94A3B8] mb-2">Default context window size for models</p>
-                        <input type="number" x-model="settings.context_size" 
-                               min="0"
-                               placeholder="512"
-                               class="w-full px-3 py-2 bg-[#101827] border border-[#10B981]/20 rounded text-sm text-[#E5E7EB] focus:outline-none focus:ring-2 focus:ring-[#10B981]/50">
-                    </div>
-
-                    <!-- F16 -->
-                    <div class="flex items-center justify-between">
-                        <div>
-                            <label class="text-sm font-medium text-[#E5E7EB]">F16 Precision</label>
-                            <p class="text-xs text-[#94A3B8] mt-1">Use 16-bit floating point precision</p>
-                        </div>
-                        <label class="relative inline-flex items-center cursor-pointer">
-                            <input type="checkbox" x-model="settings.f16" 
-                                   class="sr-only peer">
-                            <div class="w-11 h-6 bg-[#101827] peer-focus:outline-none peer-focus:ring-4 peer-focus:ring-[#10B981]/20 rounded-full peer peer-checked:after:translate-x-full peer-checked:after:border-white after:content-[''] after:absolute after:top-[2px] after:left-[2px] after:bg-white after:border-gray-300 after:border after:rounded-full after:h-5 after:w-5 after:transition-all peer-checked:bg-[#10B981]"></div>
-                        </label>
-                    </div>
-
-                    <!-- Debug -->
-                    <div class="flex items-center justify-between">
-                        <div>
-                            <label class="text-sm font-medium text-[#E5E7EB]">Debug Mode</label>
-                            <p class="text-xs text-[#94A3B8] mt-1">Enable debug logging</p>
-                        </div>
-                        <label class="relative inline-flex items-center cursor-pointer">
-                            <input type="checkbox" x-model="settings.debug" 
-                                   class="sr-only peer">
-                            <div class="w-11 h-6 bg-[#101827] peer-focus:outline-none peer-focus:ring-4 peer-focus:ring-[#10B981]/20 rounded-full peer peer-checked:after:translate-x-full peer-checked:after:border-white after:content-[''] after:absolute after:top-[2px] after:left-[2px] after:bg-white after:border-gray-300 after:border after:rounded-full after:h-5 after:w-5 after:transition-all peer-checked:bg-[#10B981]"></div>
-                        </label>
-                    </div>
-                </div>
-            </div>
-
-            <!-- API Settings Section -->
-            <div class="bg-[#1E293B] border border-[#F59E0B]/20 rounded-lg p-6">
-                <h2 class="text-xl font-semibold text-[#E5E7EB] mb-4 flex items-center">
-                    <i class="fas fa-globe mr-2 text-[#F59E0B] text-sm"></i>
-                    API Settings
-                </h2>
-                <p class="text-xs text-[#94A3B8] mb-4">
-                    Configure CORS and CSRF protection
-                </p>
-
-                <div class="space-y-4">
-                    <!-- CORS -->
-                    <div class="flex items-center justify-between">
-                        <div>
-                            <label class="text-sm font-medium text-[#E5E7EB]">Enable CORS</label>
-                            <p class="text-xs text-[#94A3B8] mt-1">Enable Cross-Origin Resource Sharing</p>
-                        </div>
-                        <label class="relative inline-flex items-center cursor-pointer">
-                            <input type="checkbox" x-model="settings.cors" 
-                                   class="sr-only peer">
-                            <div class="w-11 h-6 bg-[#101827] peer-focus:outline-none peer-focus:ring-4 peer-focus:ring-[#F59E0B]/20 rounded-full peer peer-checked:after:translate-x-full peer-checked:after:border-white after:content-[''] after:absolute after:top-[2px] after:left-[2px] after:bg-white after:border-gray-300 after:border after:rounded-full after:h-5 after:w-5 after:transition-all peer-checked:bg-[#F59E0B]"></div>
-                        </label>
-                    </div>
-
-                    <!-- CORS Allow Origins -->
-                    <div>
-                        <label class="block text-sm font-medium text-[#E5E7EB] mb-2">CORS Allow Origins</label>
-                        <p class="text-xs text-[#94A3B8] mb-2">Comma-separated list of allowed origins</p>
-                        <input type="text" x-model="settings.cors_allow_origins" 
-                               placeholder="*"
-                               class="w-full px-3 py-2 bg-[#101827] border border-[#F59E0B]/20 rounded text-sm text-[#E5E7EB] focus:outline-none focus:ring-2 focus:ring-[#F59E0B]/50">
-                    </div>
-
-                    <!-- CSRF -->
-                    <div class="flex items-center justify-between">
-                        <div>
-                            <label class="text-sm font-medium text-[#E5E7EB]">Enable CSRF Protection</label>
-                            <p class="text-xs text-[#94A3B8] mt-1">Enable Cross-Site Request Forgery protection</p>
-                        </div>
-                        <label class="relative inline-flex items-center cursor-pointer">
-                            <input type="checkbox" x-model="settings.csrf" 
-                                   class="sr-only peer">
-                            <div class="w-11 h-6 bg-[#101827] peer-focus:outline-none peer-focus:ring-4 peer-focus:ring-[#F59E0B]/20 rounded-full peer peer-checked:after:translate-x-full peer-checked:after:border-white after:content-[''] after:absolute after:top-[2px] after:left-[2px] after:bg-white after:border-gray-300 after:border after:rounded-full after:h-5 after:w-5 after:transition-all peer-checked:bg-[#F59E0B]"></div>
-                        </label>
-                    </div>
-                </div>
-            </div>
-
-            <!-- P2P Settings Section -->
-            <div class="bg-[#1E293B] border border-[#EC4899]/20 rounded-lg p-6">
-                <h2 class="text-xl font-semibold text-[#E5E7EB] mb-4 flex items-center">
-                    <i class="fas fa-network-wired mr-2 text-[#EC4899] text-sm"></i>
-                    P2P Settings
-                </h2>
-                <p class="text-xs text-[#94A3B8] mb-4">
-                    Configure peer-to-peer networking
-                </p>
-
-                <div class="space-y-4">
-                    <!-- P2P Token -->
-                    <div>
-                        <label class="block text-sm font-medium text-[#E5E7EB] mb-2">P2P Token</label>
-                        <p class="text-xs text-[#94A3B8] mb-2">Authentication token for P2P network (set to 0 to generate a new token)</p>
-                        <input type="text" x-model="settings.p2p_token" 
-                               placeholder=""
-                               class="w-full px-3 py-2 bg-[#101827] border border-[#EC4899]/20 rounded text-sm text-[#E5E7EB] focus:outline-none focus:ring-2 focus:ring-[#EC4899]/50">
-                    </div>
-
-                    <!-- P2P Network ID -->
-                    <div>
-                        <label class="block text-sm font-medium text-[#E5E7EB] mb-2">P2P Network ID</label>
-                        <p class="text-xs text-[#94A3B8] mb-2">Network identifier for P2P connections</p>
-                        <input type="text" x-model="settings.p2p_network_id" 
-                               placeholder=""
-                               class="w-full px-3 py-2 bg-[#101827] border border-[#EC4899]/20 rounded text-sm text-[#E5E7EB] focus:outline-none focus:ring-2 focus:ring-[#EC4899]/50">
-                    </div>
-
-                    <!-- Federated -->
-                    <div class="flex items-center justify-between">
-                        <div>
-                            <label class="text-sm font-medium text-[#E5E7EB]">Federated Mode</label>
-                            <p class="text-xs text-[#94A3B8] mt-1">Enable federated instance mode</p>
-                        </div>
-                        <label class="relative inline-flex items-center cursor-pointer">
-                            <input type="checkbox" x-model="settings.federated" 
-                                   class="sr-only peer">
-                            <div class="w-11 h-6 bg-[#101827] peer-focus:outline-none peer-focus:ring-4 peer-focus:ring-[#EC4899]/20 rounded-full peer peer-checked:after:translate-x-full peer-checked:after:border-white after:content-[''] after:absolute after:top-[2px] after:left-[2px] after:bg-white after:border-gray-300 after:border after:rounded-full after:h-5 after:w-5 after:transition-all peer-checked:bg-[#EC4899]"></div>
-                        </label>
-                    </div>
-                </div>
-            </div>
-
-            <!-- API Keys Settings Section -->
-            <div class="bg-[#1E293B] border border-[#EF4444]/20 rounded-lg p-6">
-                <h2 class="text-xl font-semibold text-[#E5E7EB] mb-4 flex items-center">
-                    <i class="fas fa-key mr-2 text-[#EF4444] text-sm"></i>
-                    API Keys
-                </h2>
-                <p class="text-xs text-[#94A3B8] mb-4">
-                    Manage API keys for authentication. Keys from environment variables are always included.
-                </p>
-
-                <div class="space-y-4">
-                    <!-- API Keys List -->
-                    <div>
-                        <label class="block text-sm font-medium text-[#E5E7EB] mb-2">API Keys</label>
-                        <p class="text-xs text-[#94A3B8] mb-2">List of API keys (one per line or comma-separated)</p>
-                        <textarea x-model="settings.api_keys_text" 
-                                  rows="4"
-                                  placeholder="sk-1234567890abcdef&#10;sk-0987654321fedcba"
-                                  class="w-full px-3 py-2 bg-[#101827] border border-[#EF4444]/20 rounded text-sm text-[#E5E7EB] font-mono focus:outline-none focus:ring-2 focus:ring-[#EF4444]/50"></textarea>
-                        <p class="text-xs text-[#94A3B8] mt-1">Note: API keys are sensitive. Handle with care.</p>
-                    </div>
-                </div>
-            </div>
-
-            <!-- Gallery Settings Section -->
-            <div class="bg-[#1E293B] border border-[#6366F1]/20 rounded-lg p-6">
-                <h2 class="text-xl font-semibold text-[#E5E7EB] mb-4 flex items-center">
-                    <i class="fas fa-images mr-2 text-[#6366F1] text-sm"></i>
-                    Gallery Settings
-                </h2>
-                <p class="text-xs text-[#94A3B8] mb-4">
-                    Configure model and backend galleries
-                </p>
-
-                <div class="space-y-4">
-                    <!-- Autoload Galleries -->
-                    <div class="flex items-center justify-between">
-                        <div>
-                            <label class="text-sm font-medium text-[#E5E7EB]">Autoload Galleries</label>
-                            <p class="text-xs text-[#94A3B8] mt-1">Automatically load model galleries on startup</p>
-                        </div>
-                        <label class="relative inline-flex items-center cursor-pointer">
-                            <input type="checkbox" x-model="settings.autoload_galleries" 
-                                   class="sr-only peer">
-                            <div class="w-11 h-6 bg-[#101827] peer-focus:outline-none peer-focus:ring-4 peer-focus:ring-[#6366F1]/20 rounded-full peer peer-checked:after:translate-x-full peer-checked:after:border-white after:content-[''] after:absolute after:top-[2px] after:left-[2px] after:bg-white after:border-gray-300 after:border after:rounded-full after:h-5 after:w-5 after:transition-all peer-checked:bg-[#6366F1]"></div>
-                        </label>
-                    </div>
-
-                    <!-- Autoload Backend Galleries -->
-                    <div class="flex items-center justify-between">
-                        <div>
-                            <label class="text-sm font-medium text-[#E5E7EB]">Autoload Backend Galleries</label>
-                            <p class="text-xs text-[#94A3B8] mt-1">Automatically load backend galleries on startup</p>
-                        </div>
-                        <label class="relative inline-flex items-center cursor-pointer">
-                            <input type="checkbox" x-model="settings.autoload_backend_galleries" 
-                                   class="sr-only peer">
-                            <div class="w-11 h-6 bg-[#101827] peer-focus:outline-none peer-focus:ring-4 peer-focus:ring-[#6366F1]/20 rounded-full peer peer-checked:after:translate-x-full peer-checked:after:border-white after:content-[''] after:absolute after:top-[2px] after:left-[2px] after:bg-white after:border-gray-300 after:border after:rounded-full after:h-5 after:w-5 after:transition-all peer-checked:bg-[#6366F1]"></div>
-                        </label>
-                    </div>
-
-                    <!-- Galleries (JSON) -->
-                    <div>
-                        <label class="block text-sm font-medium text-[#E5E7EB] mb-2">Model Galleries (JSON)</label>
-                        <p class="text-xs text-[#94A3B8] mb-2">Array of gallery objects with 'url' and 'name' fields</p>
-                        <textarea x-model="settings.galleries_json" 
-                                  rows="4"
-                                  placeholder='[{"url": "https://example.com", "name": "Example Gallery"}]'
-                                  class="w-full px-3 py-2 bg-[#101827] border border-[#6366F1]/20 rounded text-sm text-[#E5E7EB] font-mono focus:outline-none focus:ring-2 focus:ring-[#6366F1]/50"></textarea>
-                    </div>
-
-                    <!-- Backend Galleries (JSON) -->
-                    <div>
-                        <label class="block text-sm font-medium text-[#E5E7EB] mb-2">Backend Galleries (JSON)</label>
-                        <p class="text-xs text-[#94A3B8] mb-2">Array of backend gallery objects with 'url' and 'name' fields</p>
-                        <textarea x-model="settings.backend_galleries_json" 
-                                  rows="4"
-                                  placeholder='[{"url": "https://example.com", "name": "Example Backend Gallery"}]'
-                                  class="w-full px-3 py-2 bg-[#101827] border border-[#6366F1]/20 rounded text-sm text-[#E5E7EB] font-mono focus:outline-none focus:ring-2 focus:ring-[#6366F1]/50"></textarea>
-                    </div>
-                </div>
-            </div>
-
-            <!-- Source Info -->
-            <div class="bg-yellow-500/10 border border-yellow-500/20 rounded-lg p-4" x-show="sourceInfo">
-                <div class="flex items-start">
-                    <i class="fas fa-info-circle text-yellow-400 mr-2 mt-0.5"></i>
-                    <div class="flex-1">
-                        <p class="text-sm text-yellow-300 font-medium mb-1">Configuration Source</p>
-                        <p class="text-xs text-yellow-200" x-text="'Settings are currently loaded from: ' + sourceInfo"></p>
-                        <p class="text-xs text-yellow-200 mt-1" x-show="sourceInfo === 'env'">
-                            Environment variables take precedence. To modify settings via the UI, unset the relevant environment variables first.
-                        </p>
-                    </div>
-                </div>
-            </div>
-
-            <!-- Save Button -->
-            <div class="flex justify-end">
-                <button type="submit" 
-                        :disabled="saving"
-                        class="inline-flex items-center bg-[#38BDF8] hover:bg-[#38BDF8]/90 disabled:opacity-50 disabled:cursor-not-allowed text-white py-2 px-6 rounded-lg font-medium transition-colors">
-                    <i class="fas fa-save mr-2" :class="saving ? 'fa-spin fa-spinner' : ''"></i>
-                    <span x-text="saving ? 'Saving...' : 'Save Settings'"></span>
-                </button>
-            </div>
-        </form>
-    </div>
-
-    {{template "views/partials/footer" .}}
-</div>
-
-<script>
-function settingsDashboard() {
-    return {
-        notifications: [],
-        settings: {
-            watchdog_enabled: false,
-            watchdog_idle_enabled: false,
-            watchdog_busy_enabled: false,
-            watchdog_idle_timeout: '15m',
-            watchdog_busy_timeout: '5m',
-            single_backend: false,
-            parallel_backend_requests: false,
-            threads: 0,
-            context_size: 0,
-            f16: false,
-            debug: false,
-            cors: false,
-            csrf: false,
-            cors_allow_origins: '',
-            p2p_token: '',
-            p2p_network_id: '',
-            federated: false,
-            autoload_galleries: false,
-            autoload_backend_galleries: false,
-            galleries_json: '[]',
-            backend_galleries_json: '[]',
-            api_keys_text: ''
-        },
-        sourceInfo: '',
-        saving: false,
-        
-        init() {
-            this.loadSettings();
-        },
-        
-        async loadSettings() {
-            try {
-                const response = await fetch('/api/settings');
-                const data = await response.json();
-                
-                if (response.ok) {
-                    this.settings = {
-                        watchdog_enabled: data.watchdog_enabled,
-                        watchdog_idle_enabled: data.watchdog_idle_enabled,
-                        watchdog_busy_enabled: data.watchdog_busy_enabled,
-                        watchdog_idle_timeout: data.watchdog_idle_timeout || '15m',
-                        watchdog_busy_timeout: data.watchdog_busy_timeout || '5m',
-                        single_backend: data.single_backend,
-                        parallel_backend_requests: data.parallel_backend_requests,
-                        threads: data.threads || 0,
-                        context_size: data.context_size || 0,
-                        f16: data.f16 || false,
-                        debug: data.debug || false,
-                        cors: data.cors || false,
-                        csrf: data.csrf || false,
-                        cors_allow_origins: data.cors_allow_origins || '',
-                        p2p_token: data.p2p_token || '',
-                        p2p_network_id: data.p2p_network_id || '',
-                        federated: data.federated || false,
-                        autoload_galleries: data.autoload_galleries || false,
-                        autoload_backend_galleries: data.autoload_backend_galleries || false,
-                        galleries_json: JSON.stringify(data.galleries || [], null, 2),
-                        backend_galleries_json: JSON.stringify(data.backend_galleries || [], null, 2),
-                        api_keys_text: (data.api_keys || []).join('\n')
-                    };
-                    this.sourceInfo = data.source || 'default';
-                } else {
-                    this.addNotification('Failed to load settings: ' + (data.error || 'Unknown error'), 'error');
-                }
-            } catch (error) {
-                console.error('Error loading settings:', error);
-                this.addNotification('Failed to load settings: ' + error.message, 'error');
-            }
-        },
-        
-        updateWatchdogEnabled() {
-            if (!this.settings.watchdog_enabled) {
-                this.settings.watchdog_idle_enabled = false;
-                this.settings.watchdog_busy_enabled = false;
-            }
-        },
-        
-        async saveSettings() {
-            if (this.saving) return;
-            
-            this.saving = true;
-            
-            try {
-                const payload = {};
-                
-                // Only include changed values
-                if (this.settings.watchdog_enabled !== undefined) {
-                    payload.watchdog_enabled = this.settings.watchdog_enabled;
-                }
-                if (this.settings.watchdog_idle_enabled !== undefined) {
-                    payload.watchdog_idle_enabled = this.settings.watchdog_idle_enabled;
-                }
-                if (this.settings.watchdog_busy_enabled !== undefined) {
-                    payload.watchdog_busy_enabled = this.settings.watchdog_busy_enabled;
-                }
-                if (this.settings.watchdog_idle_timeout) {
-                    payload.watchdog_idle_timeout = this.settings.watchdog_idle_timeout;
-                }
-                if (this.settings.watchdog_busy_timeout) {
-                    payload.watchdog_busy_timeout = this.settings.watchdog_busy_timeout;
-                }
-                if (this.settings.single_backend !== undefined) {
-                    payload.single_backend = this.settings.single_backend;
-                }
-                if (this.settings.parallel_backend_requests !== undefined) {
-                    payload.parallel_backend_requests = this.settings.parallel_backend_requests;
-                }
-                if (this.settings.threads !== undefined) {
-                    payload.threads = parseInt(this.settings.threads) || 0;
-                }
-                if (this.settings.context_size !== undefined) {
-                    payload.context_size = parseInt(this.settings.context_size) || 0;
-                }
-                if (this.settings.f16 !== undefined) {
-                    payload.f16 = this.settings.f16;
-                }
-                if (this.settings.debug !== undefined) {
-                    payload.debug = this.settings.debug;
-                }
-                if (this.settings.cors !== undefined) {
-                    payload.cors = this.settings.cors;
-                }
-                if (this.settings.csrf !== undefined) {
-                    payload.csrf = this.settings.csrf;
-                }
-                if (this.settings.cors_allow_origins !== undefined) {
-                    payload.cors_allow_origins = this.settings.cors_allow_origins;
-                }
-                if (this.settings.p2p_token !== undefined) {
-                    payload.p2p_token = this.settings.p2p_token;
-                }
-                if (this.settings.p2p_network_id !== undefined) {
-                    payload.p2p_network_id = this.settings.p2p_network_id;
-                }
-                if (this.settings.federated !== undefined) {
-                    payload.federated = this.settings.federated;
-                }
-                if (this.settings.autoload_galleries !== undefined) {
-                    payload.autoload_galleries = this.settings.autoload_galleries;
-                }
-                if (this.settings.autoload_backend_galleries !== undefined) {
-                    payload.autoload_backend_galleries = this.settings.autoload_backend_galleries;
-                }
-                // Parse API keys from text (split by newline or comma, trim whitespace, filter empty)
-                if (this.settings.api_keys_text !== undefined) {
-                    const keys = this.settings.api_keys_text
-                        .split(/[\n,]/)
-                        .map(k => k.trim())
-                        .filter(k => k.length > 0);
-                    if (keys.length > 0) {
-                        payload.api_keys = keys;
-                    } else {
-                        // If empty, send empty array to clear keys
-                        payload.api_keys = [];
-                    }
-                }
-                // Parse galleries JSON
-                if (this.settings.galleries_json) {
-                    try {
-                        payload.galleries = JSON.parse(this.settings.galleries_json);
-                    } catch (e) {
-                        this.addNotification('Invalid galleries JSON: ' + e.message, 'error');
-                        this.saving = false;
-                        return;
-                    }
-                }
-                if (this.settings.backend_galleries_json) {
-                    try {
-                        payload.backend_galleries = JSON.parse(this.settings.backend_galleries_json);
-                    } catch (e) {
-                        this.addNotification('Invalid backend galleries JSON: ' + e.message, 'error');
-                        this.saving = false;
-                        return;
-                    }
-                }
-                
-                const response = await fetch('/api/settings', {
-                    method: 'POST',
-                    headers: {
-                        'Content-Type': 'application/json',
-                    },
-                    body: JSON.stringify(payload)
-                });
-                
-                const data = await response.json();
-                
-                if (response.ok && data.success) {
-                    this.addNotification('Settings saved successfully!', 'success');
-                    // Reload settings to get updated source info
-                    setTimeout(() => this.loadSettings(), 1000);
-                } else {
-                    this.addNotification('Failed to save settings: ' + (data.error || 'Unknown error'), 'error');
-                }
-            } catch (error) {
-                console.error('Error saving settings:', error);
-                this.addNotification('Failed to save settings: ' + error.message, 'error');
-            } finally {
-                this.saving = false;
-            }
-        },
-        
-        addNotification(message, type = 'success') {
-            const id = Date.now();
-            this.notifications.push({ id, message, type });
-            setTimeout(() => this.dismissNotification(id), 5000);
-        },
-        
-        dismissNotification(id) {
-            this.notifications = this.notifications.filter(n => n.id !== id);
-        }
-    }
-}
-</script>
-
-</body>
-</html>
-
--- a/core/http/views/tts.html
+++ b/core/http/views/tts.html
@@ -34,14 +34,15 @@
                <div class="border-b border-[#1E293B] p-5">
                    <div class="flex flex-col sm:flex-row items-center justify-between gap-4">
                        <!-- Model Selection -->
-                        <div class="flex items-center" x-data="{ link : '{{ if .Model }}tts/{{.Model}}{{ end }}' }">
+                        <div class="flex items-center">
                            <label for="model-select" class="mr-3 text-[#94A3B8] font-medium">
                                <i class="fas fa-microphone-lines text-[#8B5CF6] mr-2"></i>Model:
                            </label>
-                            <select
+                            <select 
                                id="model-select"
-                                x-model="link"
-                                @change="window.location = link"
+                                x-data="{ link : '' }" 
+                                x-model="link" 
+                                x-init="$watch('link', value => window.location = link)" 
                                class="bg-[#101827] text-[#E5E7EB] border border-[#1E293B] focus:border-[#8B5CF6] focus:ring-2 focus:ring-[#8B5CF6]/50 rounded-lg shadow-sm p-2.5 appearance-none"
                            >	
                                <option value="" disabled class="text-[#94A3B8]">Select a model</option>
--- a/core/p2p/sync.go
+++ b/core/p2p/sync.go
@@ -0,0 +1,102 @@
+package p2p
+
+import (
+	"context"
+	"slices"
+	"time"
+
+	"github.com/google/uuid"
+	"github.com/mudler/LocalAI/core/application"
+	"github.com/mudler/LocalAI/core/gallery"
+	"github.com/mudler/LocalAI/core/services"
+
+	"github.com/mudler/edgevpn/pkg/node"
+	zlog "github.com/rs/zerolog/log"
+)
+
+func syncState(ctx context.Context, n *node.Node, app *application.Application) error {
+	zlog.Debug().Msg("[p2p-sync] Syncing state")
+
+	whatWeHave := []string{}
+	for _, model := range app.ModelConfigLoader().GetAllModelsConfigs() {
+		whatWeHave = append(whatWeHave, model.Name)
+	}
+
+	ledger, _ := n.Ledger()
+	currentData := ledger.CurrentData()
+	zlog.Debug().Msgf("[p2p-sync] Current data: %v", currentData)
+	data, exists := ledger.GetKey("shared_state", "models")
+	if !exists {
+		ledger.AnnounceUpdate(ctx, time.Minute, "shared_state", "models", whatWeHave)
+		zlog.Debug().Msgf("No models found in the ledger, announced our models: %v", whatWeHave)
+	}
+
+	models := []string{}
+	if err := data.Unmarshal(&models); err != nil {
+		zlog.Warn().Err(err).Msg("error unmarshalling models")
+		return nil
+	}
+
+	zlog.Debug().Msgf("[p2p-sync] Models that are present in this instance: %v\nModels that are in the ledger: %v", whatWeHave, models)
+
+	// Sync with our state
+	whatIsNotThere := []string{}
+	for _, model := range whatWeHave {
+		if !slices.Contains(models, model) {
+			whatIsNotThere = append(whatIsNotThere, model)
+		}
+	}
+	if len(whatIsNotThere) > 0 {
+		zlog.Debug().Msgf("[p2p-sync] Announcing our models: %v", append(models, whatIsNotThere...))
+		ledger.AnnounceUpdate(
+			ctx,
+			1*time.Minute,
+			"shared_state",
+			"models",
+			append(models, whatIsNotThere...),
+		)
+	}
+
+	// Check if we have a model that is not in our state, otherwise install it
+	for _, model := range models {
+		if slices.Contains(whatWeHave, model) {
+			zlog.Debug().Msgf("[p2p-sync] Model %s is already present in this instance", model)
+			continue
+		}
+
+		// we install model
+		zlog.Info().Msgf("[p2p-sync] Installing model which is not present in this instance: %s", model)
+
+		uuid, err := uuid.NewUUID()
+		if err != nil {
+			zlog.Error().Err(err).Msg("error generating UUID")
+			continue
+		}
+
+		app.GalleryService().ModelGalleryChannel <- services.GalleryOp[gallery.GalleryModel, gallery.ModelConfig]{
+			ID:                 uuid.String(),
+			GalleryElementName: model,
+			Galleries:          app.ApplicationConfig().Galleries,
+			BackendGalleries:   app.ApplicationConfig().BackendGalleries,
+		}
+	}
+
+	return nil
+}
+
+func Sync(ctx context.Context, n *node.Node, app *application.Application) error {
+	go func() {
+		for {
+			select {
+			case <-ctx.Done():
+				return
+			case <-time.After(1 * time.Minute):
+				if err := syncState(ctx, n, app); err != nil {
+					zlog.Error().Err(err).Msg("error syncing state")
+				}
+			}
+
+		}
+	}()
+	return nil
+}
--- a/core/services/models.go
+++ b/core/services/models.go
@@ -85,7 +85,7 @@ func (g *GalleryService) modelHandler(op *GalleryOp[gallery.GalleryModel, galler
 	}

 	// Reload models
-	err = cl.LoadModelConfigsFromPath(systemState.Model.ModelsPath, g.appConfig.ToConfigLoaderOptions()...)
+	err = cl.LoadModelConfigsFromPath(systemState.Model.ModelsPath)
 	if err != nil {
 		return err
 	}
--- a/core/startup/model_preload.go
+++ b/core/startup/model_preload.go
@@ -5,6 +5,10 @@ import (
 	"encoding/json"
 	"errors"
 	"fmt"
+	"os"
+	"path"
+	"path/filepath"
+	"strings"
 	"time"

 	"github.com/google/uuid"
@@ -12,10 +16,12 @@ import (
 	"github.com/mudler/LocalAI/core/gallery"
 	"github.com/mudler/LocalAI/core/gallery/importers"
 	"github.com/mudler/LocalAI/core/services"
+	"github.com/mudler/LocalAI/pkg/downloader"
 	"github.com/mudler/LocalAI/pkg/model"
 	"github.com/mudler/LocalAI/pkg/system"
 	"github.com/mudler/LocalAI/pkg/utils"
 	"github.com/rs/zerolog/log"
+	"gopkg.in/yaml.v2"
 )

 const (
@@ -28,59 +34,178 @@ const (
 func InstallModels(ctx context.Context, galleryService *services.GalleryService, galleries, backendGalleries []config.Gallery, systemState *system.SystemState, modelLoader *model.ModelLoader, enforceScan, autoloadBackendGalleries bool, downloadStatus func(string, string, string, float64), models ...string) error {
 	// create an error that groups all errors
 	var err error
+
+	installBackend := func(modelPath string) error {
+		// Then load the model file, and read the backend
+		modelYAML, e := os.ReadFile(modelPath)
+		if e != nil {
+			log.Error().Err(e).Str("filepath", modelPath).Msg("error reading model definition")
+			return e
+		}
+
+		var model config.ModelConfig
+		if e := yaml.Unmarshal(modelYAML, &model); e != nil {
+			log.Error().Err(e).Str("filepath", modelPath).Msg("error unmarshalling model definition")
+			return e
+		}
+
+		if model.Backend == "" {
+			log.Debug().Str("filepath", modelPath).Msg("no backend found in model definition")
+			return nil
+		}
+
+		if err := gallery.InstallBackendFromGallery(ctx, backendGalleries, systemState, modelLoader, model.Backend, downloadStatus, false); err != nil {
+			log.Error().Err(err).Str("backend", model.Backend).Msg("error installing backend")
+			return err
+		}
+
+		return nil
+	}
+
 	for _, url := range models {
-		// Check if it's a model gallery, or print a warning
-		e, found := installModel(ctx, galleries, backendGalleries, url, systemState, modelLoader, downloadStatus, enforceScan, autoloadBackendGalleries)
-		if e != nil && found {
-			log.Error().Err(err).Msgf("[startup] failed installing model '%s'", url)
-			err = errors.Join(err, e)
-		} else if !found {
-			log.Debug().Msgf("[startup] model not found in the gallery '%s'", url)
+		// As a best effort, try to resolve the model from the remote library
+		// if it's not resolved we try with the other method below

-			if galleryService == nil {
-				return fmt.Errorf("cannot start autoimporter, not sure how to handle this uri")
-			}
+		uri := downloader.URI(url)

-			// TODO: we should just use the discoverModelConfig here and default to this.
-			modelConfig, discoverErr := importers.DiscoverModelConfig(url, json.RawMessage{})
-			if discoverErr != nil {
-				log.Error().Err(discoverErr).Msgf("[startup] failed to discover model config '%s'", url)
-				err = errors.Join(discoverErr, fmt.Errorf("failed to discover model config: %w", err))
-				continue
-			}
+		switch {
+		case uri.LooksLikeOCI():
+			log.Debug().Msgf("[startup] resolved OCI model to download: %s", url)

-			uuid, uuidErr := uuid.NewUUID()
-			if uuidErr != nil {
-				err = errors.Join(uuidErr, fmt.Errorf("failed to generate UUID: %w", uuidErr))
-				continue
-			}
+			// convert OCI image name to a file name.
+			ociName := strings.TrimPrefix(url, downloader.OCIPrefix)
+			ociName = strings.TrimPrefix(ociName, downloader.OllamaPrefix)
+			ociName = strings.ReplaceAll(ociName, "/", "__")
+			ociName = strings.ReplaceAll(ociName, ":", "__")

-			galleryService.ModelGalleryChannel <- services.GalleryOp[gallery.GalleryModel, gallery.ModelConfig]{
-				Req: gallery.GalleryModel{
-					Overrides: map[string]interface{}{},
-				},
-				ID:                 uuid.String(),
-				GalleryElementName: modelConfig.Name,
-				GalleryElement:     &modelConfig,
-				BackendGalleries:   backendGalleries,
-			}
-
-			var status *services.GalleryOpStatus
-			// wait for op to finish
-			for {
-				status = galleryService.GetStatus(uuid.String())
-				if status != nil && status.Processed {
-					break
+			// check if file exists
+			if _, e := os.Stat(filepath.Join(systemState.Model.ModelsPath, ociName)); errors.Is(e, os.ErrNotExist) {
+				modelDefinitionFilePath := filepath.Join(systemState.Model.ModelsPath, ociName)
+				e := uri.DownloadFile(modelDefinitionFilePath, "", 0, 0, func(fileName, current, total string, percent float64) {
+					utils.DisplayDownloadFunction(fileName, current, total, percent)
+				})
+				if e != nil {
+					log.Error().Err(e).Str("url", url).Str("filepath", modelDefinitionFilePath).Msg("error downloading model")
+					err = errors.Join(err, e)
 				}
-				time.Sleep(1 * time.Second)
 			}

-			if status.Error != nil {
-				log.Error().Err(status.Error).Msgf("[startup] failed to import model '%s' from '%s'", modelConfig.Name, url)
-				return status.Error
+			log.Info().Msgf("[startup] installed model from OCI repository: %s", ociName)
+		case uri.LooksLikeURL():
+			log.Debug().Msgf("[startup] downloading %s", url)
+
+			// Extract filename from URL
+			fileName, e := uri.FilenameFromUrl()
+			if e != nil {
+				log.Warn().Err(e).Str("url", url).Msg("error extracting filename from URL")
+				err = errors.Join(err, e)
+				continue
 			}

-			log.Info().Msgf("[startup] imported model '%s' from '%s'", modelConfig.Name, url)
+			modelPath := filepath.Join(systemState.Model.ModelsPath, fileName)
+
+			if e := utils.VerifyPath(fileName, modelPath); e != nil {
+				log.Error().Err(e).Str("filepath", modelPath).Msg("error verifying path")
+				err = errors.Join(err, e)
+				continue
+			}
+
+			// check if file exists
+			if _, e := os.Stat(modelPath); errors.Is(e, os.ErrNotExist) {
+				e := uri.DownloadFile(modelPath, "", 0, 0, func(fileName, current, total string, percent float64) {
+					utils.DisplayDownloadFunction(fileName, current, total, percent)
+				})
+				if e != nil {
+					log.Error().Err(e).Str("url", url).Str("filepath", modelPath).Msg("error downloading model")
+					err = errors.Join(err, e)
+				}
+			}
+
+			// Check if we have the backend installed
+			if autoloadBackendGalleries && path.Ext(modelPath) == YAML_EXTENSION {
+				if err := installBackend(modelPath); err != nil {
+					log.Error().Err(err).Str("filepath", modelPath).Msg("error installing backend")
+				}
+			}
+		default:
+			if _, e := os.Stat(url); e == nil {
+				log.Debug().Msgf("[startup] resolved local model: %s", url)
+				// copy to modelPath
+				md5Name := utils.MD5(url)
+
+				modelYAML, e := os.ReadFile(url)
+				if e != nil {
+					log.Error().Err(e).Str("filepath", url).Msg("error reading model definition")
+					err = errors.Join(err, e)
+					continue
+				}
+
+				modelDefinitionFilePath := filepath.Join(systemState.Model.ModelsPath, md5Name) + YAML_EXTENSION
+				if e := os.WriteFile(modelDefinitionFilePath, modelYAML, 0600); e != nil {
+					log.Error().Err(err).Str("filepath", modelDefinitionFilePath).Msg("error loading model: %s")
+					err = errors.Join(err, e)
+				}
+
+				// Check if we have the backend installed
+				if autoloadBackendGalleries && path.Ext(modelDefinitionFilePath) == YAML_EXTENSION {
+					if err := installBackend(modelDefinitionFilePath); err != nil {
+						log.Error().Err(err).Str("filepath", modelDefinitionFilePath).Msg("error installing backend")
+					}
+				}
+			} else {
+				// Check if it's a model gallery, or print a warning
+				e, found := installModel(ctx, galleries, backendGalleries, url, systemState, modelLoader, downloadStatus, enforceScan, autoloadBackendGalleries)
+				if e != nil && found {
+					log.Error().Err(err).Msgf("[startup] failed installing model '%s'", url)
+					err = errors.Join(err, e)
+				} else if !found {
+					log.Warn().Msgf("[startup] failed resolving model '%s'", url)
+
+					if galleryService == nil {
+						err = errors.Join(err, fmt.Errorf("cannot start autoimporter, not sure how to handle this uri"))
+						continue
+					}
+
+					// TODO: we should just use the discoverModelConfig here and default to this.
+					modelConfig, discoverErr := importers.DiscoverModelConfig(url, json.RawMessage{})
+					if discoverErr != nil {
+						err = errors.Join(discoverErr, fmt.Errorf("failed to discover model config: %w", err))
+						continue
+					}
+
+					uuid, uuidErr := uuid.NewUUID()
+					if uuidErr != nil {
+						err = errors.Join(uuidErr, fmt.Errorf("failed to generate UUID: %w", uuidErr))
+						continue
+					}
+
+					galleryService.ModelGalleryChannel <- services.GalleryOp[gallery.GalleryModel, gallery.ModelConfig]{
+						Req: gallery.GalleryModel{
+							Overrides: map[string]interface{}{},
+						},
+						ID:                 uuid.String(),
+						GalleryElementName: modelConfig.Name,
+						GalleryElement:     &modelConfig,
+						BackendGalleries:   backendGalleries,
+					}
+
+					var status *services.GalleryOpStatus
+					// wait for op to finish
+					for {
+						status = galleryService.GetStatus(uuid.String())
+						if status != nil && status.Processed {
+							break
+						}
+						time.Sleep(1 * time.Second)
+					}
+
+					if status.Error != nil {
+						return status.Error
+					}
+
+					log.Info().Msgf("[startup] imported model '%s' from '%s'", modelConfig.Name, url)
+				}
+			}
 		}
 	}
 	return err
--- a/core/startup/model_preload_test.go
+++ b/core/startup/model_preload_test.go
@@ -7,7 +7,6 @@ import (
 	"path/filepath"

 	"github.com/mudler/LocalAI/core/config"
-	"github.com/mudler/LocalAI/core/services"
 	. "github.com/mudler/LocalAI/core/startup"
 	"github.com/mudler/LocalAI/pkg/model"
 	"github.com/mudler/LocalAI/pkg/system"
@@ -20,11 +19,8 @@ var _ = Describe("Preload test", func() {
 	var tmpdir string
 	var systemState *system.SystemState
 	var ml *model.ModelLoader
-	var ctx context.Context
-	var cancel context.CancelFunc

 	BeforeEach(func() {
-		ctx, cancel = context.WithCancel(context.Background())
 		var err error
 		tmpdir, err = os.MkdirTemp("", "")
 		Expect(err).ToNot(HaveOccurred())
@@ -33,24 +29,13 @@ var _ = Describe("Preload test", func() {
 		ml = model.NewModelLoader(systemState, true)
 	})

-	AfterEach(func() {
-		cancel()
-	})
-
 	Context("Preloading from strings", func() {
 		It("loads from embedded full-urls", func() {
 			url := "https://raw.githubusercontent.com/mudler/LocalAI-examples/main/configurations/phi-2.yaml"
 			fileName := fmt.Sprintf("%s.yaml", "phi-2")

-			galleryService := services.NewGalleryService(&config.ApplicationConfig{
-				SystemState: systemState,
-			}, ml)
-			galleryService.Start(ctx, config.NewModelConfigLoader(tmpdir), systemState)
+			InstallModels(context.TODO(), nil, []config.Gallery{}, []config.Gallery{}, systemState, ml, true, true, nil, url)

-			err := InstallModels(ctx, galleryService, []config.Gallery{}, []config.Gallery{}, systemState, ml, true, true, func(s1, s2, s3 string, f float64) {
-				fmt.Println(s1, s2, s3, f)
-			}, url)
-			Expect(err).ToNot(HaveOccurred())
 			resultFile := filepath.Join(tmpdir, fileName)

 			content, err := os.ReadFile(resultFile)
@@ -62,22 +47,13 @@ var _ = Describe("Preload test", func() {
 			url := "huggingface://TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/tinyllama-1.1b-chat-v0.3.Q2_K.gguf"
 			fileName := fmt.Sprintf("%s.gguf", "tinyllama-1.1b-chat-v0.3.Q2_K")

-			galleryService := services.NewGalleryService(&config.ApplicationConfig{
-				SystemState: systemState,
-			}, ml)
-			galleryService.Start(ctx, config.NewModelConfigLoader(tmpdir), systemState)
-
-			err := InstallModels(ctx, galleryService, []config.Gallery{}, []config.Gallery{}, systemState, ml, true, true, func(s1, s2, s3 string, f float64) {
-				fmt.Println(s1, s2, s3, f)
-			}, url)
+			err := InstallModels(context.TODO(), nil, []config.Gallery{}, []config.Gallery{}, systemState, ml, true, true, nil, url)
 			Expect(err).ToNot(HaveOccurred())

 			resultFile := filepath.Join(tmpdir, fileName)
-			dirs, err := os.ReadDir(tmpdir)
-			Expect(err).ToNot(HaveOccurred())

 			_, err = os.Stat(resultFile)
-			Expect(err).ToNot(HaveOccurred(), fmt.Sprintf("%+v", dirs))
+			Expect(err).ToNot(HaveOccurred())
 		})
 	})
 })
--- a/docs/config.toml
+++ b/docs/config.toml
@@ -0,0 +1,208 @@
+baseURL = "https://localai.io/"
+languageCode = "en-GB"
+contentDir = "content"
+enableEmoji = true
+enableGitInfo = true # N.B. .GitInfo does not currently function with git submodule content directories
+
+defaultContentLanguage = 'en'
+
+
+[markup]
+  defaultMarkdownHandler = "goldmark"
+  [markup.tableOfContents]
+      endLevel = 3
+      startLevel = 1
+  [markup.goldmark]
+    [markup.goldmark.renderer]
+      unsafe = true # https://jdhao.github.io/2019/12/29/hugo_html_not_shown/
+  # [markup.highlight]
+  #   codeFences = false # disables Hugo's default syntax highlighting
+  # [markup.goldmark.parser]
+  #   [markup.goldmark.parser.attribute]
+  #     block = true
+  #     title = true
+
+
+
+[params]
+
+  google_fonts = [
+    ["Inter", "300, 400, 600, 700"],
+    ["Fira Code", "500, 700"]
+  ]
+
+  sans_serif_font = "Inter"     # Default is System font
+  secondary_font  = "Inter"     # Default is System font
+  mono_font       = "Fira Code" # Default is System font
+
+    [params.footer]
+        copyright = "© 2023-2025 <a href='https://mudler.pm' target=_blank>Ettore Di Giacinto</a>"
+        version = true # includes git commit info
+
+    [params.social]
+        github = "mudler/LocalAI"        # YOUR_GITHUB_ID or YOUR_GITHUB_URL
+        twitter = "LocalAI_API"       # YOUR_TWITTER_ID
+        dicord = "uJAeKSAGDy"
+        # instagram = "colinwilson"     # YOUR_INSTAGRAM_ID
+        rss = true                    # show rss icon with link
+
+    [params.docs] # Parameters for the /docs 'template'
+
+        logo = "https://raw.githubusercontent.com/mudler/LocalAI/refs/heads/master/core/http/static/logo.png"
+        logo_text = ""
+        title           = "LocalAI"           # default html title for documentation pages/sections
+
+        pathName        = "docs"                            # path name for documentation site | default "docs"
+
+        # themeColor      = "cyan"                            # (optional) - Set theme accent colour. Options include: blue (default), green, red, yellow, emerald, cardinal, magenta, cyan
+
+        darkMode        = true                                # enable dark mode option? default false
+
+        prism           = true                                # enable syntax highlighting via Prism
+
+        prismTheme      = "solarized-light"                           # (optional) - Set theme for PrismJS. Options include: lotusdocs (default), solarized-light, twilight, lucario
+
+        # gitinfo
+        repoURL         = "https://github.com/mudler/LocalAI"  # Git repository URL for your site [support for GitHub, GitLab, and BitBucket]
+        repoBranch      = "master"
+        editPage        = true                                # enable 'Edit this page' feature - default false
+        lastMod         = true                                # enable 'Last modified' date on pages - default false
+        lastModRelative = true                                # format 'Last modified' time as relative - default true
+
+        sidebarIcons    = true                                # enable sidebar icons? default false
+        breadcrumbs     = true                                # default is true
+        backToTop       = true                                # enable back-to-top button? default true
+
+        # ToC
+        toc             = true                                # enable table of contents? default is true
+        tocMobile       = true                                # enable table of contents in mobile view? default is true
+        scrollSpy       = true                                # enable scrollspy on ToC? default is true
+
+        # front matter
+        descriptions    = true                                # enable front matter descriptions under content title?
+        titleIcon       = true                                # enable front matter icon title prefix? default is false
+
+        # content navigation
+        navDesc         = true                                # include front matter descriptions in Prev/Next navigation cards
+        navDescTrunc    = 30                                  # Number of characters by which to truncate the Prev/Next descriptions
+
+        listDescTrunc   = 100                                 # Number of characters by which to truncate the list card description
+
+        # Link behaviour
+        intLinkTooltip  = true                                # Enable a tooltip for internal links that displays info about the destination? default false
+        # extLinkNewTab   = false                             # Open external links in a new Tab? default true
+        # logoLinkURL = ""                                    # Set a custom URL destination for the top header logo link.
+
+    [params.flexsearch] # Parameters for FlexSearch
+        enabled             = true
+        # tokenize            = "full"
+        # optimize            = true
+        # cache               = 100
+        # minQueryChar        = 3 # default is 0 (disabled)
+        # maxResult           = 5 # default is 5
+        # searchSectionsIndex = []
+
+    [params.docsearch] # Parameters for DocSearch
+        # appID     = "" # Algolia Application ID
+        # apiKey    = "" # Algolia Search-Only API (Public) Key
+        # indexName = "" # Index Name to perform search on (or set env variable HUGO_PARAM_DOCSEARCH_indexName)
+
+    [params.analytics] # Parameters for Analytics (Google, Plausible)
+        # google = "G-XXXXXXXXXX" # Replace with your Google Analytics ID
+        # plausibleURL    = "/docs/s" # (or set via env variable HUGO_PARAM_ANALYTICS_plausibleURL)
+        # plausibleAPI    = "/docs/s" # optional - (or set via env variable HUGO_PARAM_ANALYTICS_plausibleAPI)
+        # plausibleDomain = ""      # (or set via env variable HUGO_PARAM_ANALYTICS_plausibleDomain)
+
+    # [params.feedback]
+    #     enabled = true
+    #     emoticonTpl = true
+    #     eventDest = ["plausible","google"]
+    #     emoticonEventName = "Feedback"
+    #     positiveEventName = "Positive Feedback"
+    #     negativeEventName = "Negative Feedback"
+    #     positiveFormTitle = "What did you like?"
+    #     negativeFormTitle = "What went wrong?"
+    #     successMsg = "Thank you for helping to improve Lotus Docs' documentation!"
+    #     errorMsg = "Sorry! There was an error while attempting to submit your feedback!"
+    #     positiveForm = [
+    #       ["Accurate", "Accurately describes the feature or option."],
+    #       ["Solved my problem", "Helped me resolve an issue."],
+    #       ["Easy to understand", "Easy to follow and comprehend."],
+    #       ["Something else"]
+    #     ]
+    #     negativeForm = [
+    #       ["Inaccurate", "Doesn't accurately describe the feature or option."],
+    #       ["Couldn't find what I was looking for", "Missing important information."],
+    #       ["Hard to understand", "Too complicated or unclear."],
+    #       ["Code sample errors", "One or more code samples are incorrect."],
+    #       ["Something else"]
+    #     ]
+
+[menu]
+ [[menu.primary]]
+    name  = "Docs"
+    url = "docs/"
+    identifier = "docs"
+    weight = 10
+[[menu.primary]]
+    name = "Discord"
+    url = "https://discord.gg/uJAeKSAGDy"
+    identifier = "discord"
+    weight = 20
+
+[languages]
+  [languages.en]
+    title = "LocalAI"
+    languageName = "English"
+    weight = 10
+#  [languages.fr]
+#    title = "LocalAI documentation"
+#    languageName = "Français"
+#    contentDir = "content/fr"
+#    weight = 20
+#  [languages.de]
+#    title = "LocalAI documentation"
+#    languageName = "Deutsch"
+#    contentDir = "content/de"
+#    weight = 30
+
+
+
+
+
+# mounts are only needed in this showcase to access the publicly available screenshots;
+# remove this section if you don't need further mounts
+[module]
+  replacements = "github.com/colinwilson/lotusdocs -> lotusdocs"
+  [[module.mounts]]
+    source = 'archetypes'
+    target = 'archetypes'
+  [[module.mounts]]
+    source = 'assets'
+    target = 'assets'
+  [[module.mounts]]
+    source = 'content'
+    target = 'content'
+  [[module.mounts]]
+    source = 'data'
+    target = 'data'
+  [[module.mounts]]
+    source = 'i18n'
+    target = 'i18n'
+  [[module.mounts]]
+    source = '../images'
+    target = 'static/images'
+  [[module.mounts]]
+    source = 'layouts'
+    target = 'layouts'
+  [[module.mounts]]
+    source = 'static'
+    target = 'static'
+    # uncomment line below for temporary local development of module
+    # or when using a 'theme' as a git submodule
+  [[module.imports]]
+    path = "github.com/colinwilson/lotusdocs"
+    disable = false
+  [[module.imports]]
+    path = "github.com/gohugoio/hugo-mod-bootstrap-scss/v5"
+    disable = false
--- a/docs/content/_index.md
+++ b/docs/content/_index.md
@@ -1,61 +0,0 @@
-+++
-title = "LocalAI"
-description = "The free, OpenAI, Anthropic alternative. Your All-in-One Complete AI Stack"
-type = "home"
-+++
-
-**The free, OpenAI, Anthropic alternative. Your All-in-One Complete AI Stack** - Run powerful language models, autonomous agents, and document intelligence **locally** on your hardware. 
-
-**No cloud, no limits, no compromise.**
-
-{{% notice tip %}}
-**[⭐ Star us on GitHub](https://github.com/mudler/LocalAI)** - 33.3k+ stars and growing!
-
-**Drop-in replacement for OpenAI API** - modular suite of tools that work seamlessly together or independently. 
-
-Start with **[LocalAI](https://localai.io)**'s OpenAI-compatible API, extend with **[LocalAGI](https://github.com/mudler/LocalAGI)**'s autonomous agents, and enhance with **[LocalRecall](https://github.com/mudler/LocalRecall)**'s semantic search - all running locally on your hardware.
-
-**Open Source** MIT Licensed.
-{{% /notice %}}
-
-## Why Choose LocalAI?
-
-**OpenAI API Compatible** - Run AI models locally with our modular ecosystem. From language models to autonomous agents and semantic search, build your complete AI stack without the cloud.
-
-### Key Features
-
- **LLM Inferencing**: LocalAI is a free, **Open Source** OpenAI alternative. Run **LLMs**, generate **images**, **audio** and more **locally** with consumer grade hardware.
- **Agentic-first**: Extend LocalAI with LocalAGI, an autonomous AI agent platform that runs locally, no coding required. Build and deploy autonomous agents with ease.
- **Memory and Knowledge base**: Extend LocalAI with LocalRecall, A local rest api for semantic search and memory management. Perfect for AI applications.
- **OpenAI Compatible**: Drop-in replacement for OpenAI API. Compatible with existing applications and libraries.
- **No GPU Required**: Run on consumer grade hardware. No need for expensive GPUs or cloud services.
- **Multiple Models**: Support for various model families including LLMs, image generation, and audio models. Supports multiple backends for inferencing.
- **Privacy Focused**: Keep your data local. No data leaves your machine, ensuring complete privacy.
- **Easy Setup**: Simple installation and configuration. Get started in minutes with Binaries installation, Docker, Podman, Kubernetes or local installation.
- **Community Driven**: Active community support and regular updates. Contribute and help shape the future of LocalAI.
-
-## Quick Start
-
-**Docker is the recommended installation method** for most users:
-
-```bash
-docker run -p 8080:8080 --name local-ai -ti localai/localai:latest
-```
-
-For complete installation instructions, see the [Installation guide](/installation/).
-
-## Get Started
-
-1. **[Install LocalAI](/installation/)** - Choose your installation method (Docker recommended)
-2. **[Quickstart Guide](/getting-started/quickstart/)** - Get started quickly after installation
-3. **[Install and Run Models](/getting-started/models/)** - Learn how to work with AI models
-4. **[Try It Out](/getting-started/try-it-out/)** - Explore examples and use cases
-
-## Learn More
-
- [Explore available models](https://models.localai.io)
- [Model compatibility](/model-compatibility/)
- [Try out examples](https://github.com/mudler/LocalAI-examples)
- [Join the community](https://discord.gg/uJAeKSAGDy)
- [Check the LocalAI Github repository](https://github.com/mudler/LocalAI)
- [Check the LocalAGI Github repository](https://github.com/mudler/LocalAGI)
--- a/docs/content/advanced/_index.en.md
+++ b/docs/content/advanced/_index.en.md
@@ -1,12 +0,0 @@
---
-weight: 20
-title: "Advanced"
-description: "Advanced usage"
-type: chapter
-icon: settings
-lead: ""
-date: 2020-10-06T08:49:15+00:00
-lastmod: 2020-10-06T08:49:15+00:00
-draft: false
-images: []
---
--- a/docs/content/docs/advanced/_index.en.md
+++ b/docs/content/docs/advanced/_index.en.md
@@ -0,0 +1,38 @@
+++
+disableToc = false
+title = "Advanced Configuration"
+weight = 20
+icon = "settings"
+description = "Advanced configuration and optimization for LocalAI"
+++
+
+This section covers advanced configuration, optimization, and fine-tuning options for LocalAI.
+
+## Configuration
+
+- **[Model Configuration]({{% relref "docs/advanced/model-configuration" %}})** - Complete model configuration reference
+- **[Advanced Usage]({{% relref "docs/advanced/advanced-usage" %}})** - Advanced configuration options
+- **[Installer Options]({{% relref "docs/advanced/installer" %}})** - Installer configuration and options
+
+## Performance & Optimization
+
+- **[Performance Tuning]({{% relref "docs/advanced/performance-tuning" %}})** - Optimize for maximum performance
+- **[VRAM Management]({{% relref "docs/advanced/vram-management" %}})** - Manage GPU memory efficiently
+
+## Specialized Topics
+
+- **[Fine-tuning]({{% relref "docs/advanced/fine-tuning" %}})** - Fine-tune models for LocalAI
+
+## Before You Begin
+
+Make sure you have:
+- LocalAI installed and running
+- Basic understanding of YAML configuration
+- Familiarity with your system's resources
+
+## Related Documentation
+
+- [Getting Started]({{% relref "docs/getting-started" %}}) - Installation and basics
+- [Model Configuration]({{% relref "docs/advanced/model-configuration" %}}) - Configuration reference
+- [Troubleshooting]({{% relref "docs/troubleshooting" %}}) - Common issues
+- [Performance Tuning]({{% relref "docs/advanced/performance-tuning" %}}) - Optimization guide
--- a/docs/content/docs/advanced/advanced-usage.md
+++ b/docs/content/docs/advanced/advanced-usage.md
@@ -27,7 +27,7 @@ template:
  chat: chat
 ```

-For a complete reference of all available configuration options, see the [Model Configuration]({{%relref "advanced/model-configuration" %}}) page.
+For a complete reference of all available configuration options, see the [Model Configuration]({{%relref "docs/advanced/model-configuration" %}}) page.

 **Configuration File Locations:**

@@ -108,6 +108,7 @@ Similarly it can be specified a path to a YAML configuration file containing a l
 ```yaml
 - url: https://raw.githubusercontent.com/go-skynet/model-gallery/main/gpt4all-j.yaml
  name: gpt4all-j
+# ...
 ```

 ### Automatic prompt caching
@@ -118,6 +119,7 @@ To enable prompt caching, you can control the settings in the model config YAML

 ```yaml

+# Enable prompt caching
 prompt_cache_path: "cache"
 prompt_cache_all: true

@@ -129,18 +131,20 @@ prompt_cache_all: true

 By default LocalAI will try to autoload the model by trying all the backends. This might work for most of models, but some of the backends are NOT configured to autoload.

-The available backends are listed in the [model compatibility table]({{%relref "reference/compatibility-table" %}}).
+The available backends are listed in the [model compatibility table]({{%relref "docs/reference/compatibility-table" %}}).

 In order to specify a backend for your models, create a model config file in your `models` directory specifying the backend:

 ```yaml
 name: gpt-3.5-turbo

+# Default model parameters
 parameters:
  # Relative to the models path
  model: ...

 backend: llama-stable
+# ...
 ```

 ### Connect external backends
@@ -179,6 +183,7 @@ make -C backend/python/vllm
 When LocalAI runs in a container,
 there are additional environment variables available that modify the behavior of LocalAI on startup:

+{{< table "table-responsive" >}}
 | Environment variable       | Default | Description                                                                                                |
 |----------------------------|---------|------------------------------------------------------------------------------------------------------------|
 | `REBUILD`                  | `false` | Rebuild LocalAI on startup                                                                                 |
@@ -188,17 +193,20 @@ there are additional environment variables available that modify the behavior of
 | `EXTRA_BACKENDS`          |         | A space separated list of backends to prepare. For example `EXTRA_BACKENDS="backend/python/diffusers backend/python/transformers"` prepares the python environment on start |
 | `DISABLE_AUTODETECT`       | `false` | Disable autodetect of CPU flagset on start                                                                     |
 | `LLAMACPP_GRPC_SERVERS`   |         | A list of llama.cpp workers to distribute the workload. For example `LLAMACPP_GRPC_SERVERS="address1:port,address2:port"` |
+{{< /table >}}

 Here is how to configure these variables:

 ```bash
+# Option 1: command line
 docker run --env REBUILD=true localai
+# Option 2: set within an env file
 docker run --env-file .env localai
 ```

 ### CLI Parameters

-For a complete reference of all CLI parameters, environment variables, and command-line options, see the [CLI Reference]({{%relref "reference/cli-reference" %}}) page.
+For a complete reference of all CLI parameters, environment variables, and command-line options, see the [CLI Reference]({{%relref "docs/reference/cli-reference" %}}) page.

 You can control LocalAI with command line arguments to specify a binding address, number of threads, model paths, and many other options. Any command line parameter can be specified via an environment variable.

@@ -274,17 +282,20 @@ A list of the environment variable that tweaks parallelism is the following:
 ### Python backends GRPC max workers
 ### Default number of workers for GRPC Python backends.
 ### This actually controls wether a backend can process multiple requests or not.
+# PYTHON_GRPC_MAX_WORKERS=1

 ### Define the number of parallel LLAMA.cpp workers (Defaults to 1)
+# LLAMACPP_PARALLEL=1

 ### Enable to run parallel requests
+# LOCALAI_PARALLEL_REQUESTS=true
 ```

 Note that, for llama.cpp you need to set accordingly `LLAMACPP_PARALLEL` to the number of parallel processes your GPU/CPU can handle. For python-based backends (like vLLM) you can set `PYTHON_GRPC_MAX_WORKERS` to the number of parallel requests.

 ### VRAM and Memory Management

-For detailed information on managing VRAM when running multiple models, see the dedicated [VRAM and Memory Management]({{%relref "advanced/vram-management" %}}) page.
+For detailed information on managing VRAM when running multiple models, see the dedicated [VRAM and Memory Management]({{%relref "docs/advanced/vram-management" %}}) page.

 ### Disable CPU flagset auto detection in llama.cpp

--- a/docs/content/docs/advanced/fine-tuning.md
+++ b/docs/content/docs/advanced/fine-tuning.md
@@ -5,9 +5,9 @@ title = "Fine-tuning LLMs for text generation"
 weight = 22
 +++

-{{% notice note %}}
+{{% alert note %}}
 Section under construction
- {{% /notice %}}
+{{% /alert %}}

 This section covers how to fine-tune a language model for text generation and consume it in LocalAI.

@@ -74,10 +74,12 @@ Prepare a dataset, and upload it to your Google Drive in case you are using the
 ### Install dependencies

 ```bash
+# Install axolotl and dependencies
 git clone https://github.com/OpenAccess-AI-Collective/axolotl && pushd axolotl && git checkout 797f3dd1de8fd8c0eafbd1c9fdb172abd9ff840a && popd #0.3.0
 pip install packaging
 pushd axolotl && pip install -e '.[flash-attn,deepspeed]' && popd

+# https://github.com/oobabooga/text-generation-webui/issues/4238
 pip install https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.0/flash_attn-2.3.0+cu117torch2.0cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
 ```

@@ -94,16 +96,19 @@ We will need to configure axolotl. In this example is provided a file to use `ax
 If you have a big dataset, you can pre-tokenize it to speedup the fine-tuning process:

 ```bash
+# Optional pre-tokenize (run only if big dataset)
 python -m axolotl.cli.preprocess axolotl.yaml
 ```

 Now we are ready to start the fine-tuning process:
 ```bash
+# Fine-tune
 accelerate launch -m axolotl.cli.train axolotl.yaml
 ```

 After we have finished the fine-tuning, we merge the Lora base with the model:
 ```bash
+# Merge lora
 python3 -m axolotl.cli.merge_lora axolotl.yaml --lora_model_dir="./qlora-out" --load_in_8bit=False --load_in_4bit=False
 ```

@@ -111,11 +116,17 @@ And we convert it to the gguf format that LocalAI can consume:

 ```bash

+# Convert to gguf
 git clone https://github.com/ggerganov/llama.cpp.git
 pushd llama.cpp && cmake -B build -DGGML_CUDA=ON && cmake --build build --config Release && popd

+# We need to convert the pytorch model into ggml for quantization
+# It crates 'ggml-model-f16.bin' in the 'merged' directory.
 pushd llama.cpp && python3 convert_hf_to_gguf.py ../qlora-out/merged && popd

+# Start off by making a basic q4_0 4-bit quantization.
+# It's important to have 'ggml' in the name of the quant for some
+# software to recognize it's file format.
 pushd llama.cpp/build/bin &&  ./llama-quantize ../../../qlora-out/merged/Merged-33B-F16.gguf \
    ../../../custom-model-q4_0.gguf q4_0

--- a/docs/content/docs/advanced/installer.md
+++ b/docs/content/docs/advanced/installer.md
@@ -0,0 +1,52 @@
+
+++
+disableToc = false
+title = "Installer options"
+weight = 24
+++
+
+An installation script is available for quick and hassle-free installations, streamlining the setup process for new users.
+
+Can be used with the following command:
+```bash
+curl https://localai.io/install.sh | sh
+```
+
+Installation can be configured with Environment variables, for example: 
+
+```bash
+curl https://localai.io/install.sh | VAR=value sh
+```
+
+List of the Environment Variables:
+| Environment Variable | Description                                                  |
+|----------------------|--------------------------------------------------------------|
+| **DOCKER_INSTALL**       | Set to "true" to enable the installation of Docker images.    |
+| **USE_AIO**              | Set to "true" to use the all-in-one LocalAI Docker image.    |
+| **USE_VULKAN**           | Set to "true" to use Vulkan GPU support.                     |
+| **API_KEY**              | Specify an API key for accessing LocalAI, if required.       |
+| **PORT**                 | Specifies the port on which LocalAI will run (default is 8080). |
+| **THREADS**              | Number of processor threads the application should use. Defaults to the number of logical cores minus one. |
+| **VERSION**              | Specifies the version of LocalAI to install. Defaults to the latest available version. |
+| **MODELS_PATH**          | Directory path where LocalAI models are stored (default is /usr/share/local-ai/models). |
+| **P2P_TOKEN** | Token to use for the federation or for starting workers see [documentation]({{%relref "docs/features/distributed_inferencing" %}}) |
+| **WORKER** | Set to "true" to make the instance a worker (p2p token is required see [documentation]({{%relref "docs/features/distributed_inferencing" %}})) |
+| **FEDERATED** | Set to "true" to share the instance with the federation (p2p token is required see [documentation]({{%relref "docs/features/distributed_inferencing" %}}))  |
+| **FEDERATED_SERVER** | Set to "true" to run the instance as a federation server which forwards requests to the federation (p2p token is required see [documentation]({{%relref "docs/features/distributed_inferencing" %}}))  |
+
+## Image Selection
+
+The installer will automatically detect your GPU and select the appropriate image. By default, it uses the standard images without extra Python dependencies. You can customize the image selection using the following environment variables:
+
+- `USE_AIO=true`: Use all-in-one images that include all dependencies
+- `USE_VULKAN=true`: Use Vulkan GPU support instead of vendor-specific GPU support
+
+## Uninstallation
+
+To uninstall, run:
+
+```
+curl https://localai.io/install.sh | sh -s -- --uninstall
+```
+
+We are looking into improving the installer, and as this is a first iteration any feedback is welcome! Open up an [issue](https://github.com/mudler/LocalAI/issues/new/choose) if something doesn't work for you!
--- a/docs/content/docs/advanced/model-configuration.md
+++ b/docs/content/docs/advanced/model-configuration.md
@@ -498,7 +498,7 @@ feature_flags:

 ## Related Documentation

- See [Advanced Usage]({{%relref "advanced/advanced-usage" %}}) for other configuration options
- See [Prompt Templates]({{%relref "advanced/advanced-usage#prompt-templates" %}}) for template examples
- See [CLI Reference]({{%relref "reference/cli-reference" %}}) for command-line options
+- See [Advanced Usage]({{%relref "docs/advanced/advanced-usage" %}}) for other configuration options
+- See [Prompt Templates]({{%relref "docs/advanced/advanced-usage#prompt-templates" %}}) for template examples
+- See [CLI Reference]({{%relref "docs/reference/cli-reference" %}}) for command-line options

--- a/docs/content/docs/advanced/performance-tuning.md
+++ b/docs/content/docs/advanced/performance-tuning.md
@@ -0,0 +1,344 @@
+++
+disableToc = false
+title = "Performance Tuning"
+weight = 22
+icon = "speed"
+description = "Optimize LocalAI for maximum performance"
+++
+
+This guide covers techniques to optimize LocalAI performance for your specific hardware and use case.
+
+## Performance Metrics
+
+Before optimizing, establish baseline metrics:
+
+- **Tokens per second**: Measure inference speed
+- **Memory usage**: Monitor RAM and VRAM
+- **Latency**: Time to first token and total response time
+- **Throughput**: Requests per second
+
+Enable debug mode to see performance stats:
+
+```bash
+DEBUG=true local-ai
+```
+
+Look for output like:
+```
+llm_load_tensors: tok/s: 45.23
+```
+
+## CPU Optimization
+
+### Thread Configuration
+
+Match threads to CPU cores:
+
+```yaml
+# Model configuration
+threads: 4  # For 4-core CPU
+```
+
+**Guidelines**:
+- Use number of physical cores (not hyperthreads)
+- Leave 1-2 cores for system
+- Too many threads can hurt performance
+
+### CPU Instructions
+
+Enable appropriate CPU instructions:
+
+```bash
+# Check available instructions
+cat /proc/cpuinfo | grep flags
+
+# Build with optimizations
+CMAKE_ARGS="-DGGML_AVX2=ON -DGGML_AVX512=ON" make build
+```
+
+### NUMA Optimization
+
+For multi-socket systems:
+
+```yaml
+numa: true
+```
+
+### Memory Mapping
+
+Enable memory mapping for faster model loading:
+
+```yaml
+mmap: true
+mmlock: false  # Set to true to lock in memory (faster but uses more RAM)
+```
+
+## GPU Optimization
+
+### Layer Offloading
+
+Offload as many layers as GPU memory allows:
+
+```yaml
+gpu_layers: 35  # Adjust based on GPU memory
+f16: true       # Use FP16 for better performance
+```
+
+**Finding optimal layers**:
+1. Start with 20 layers
+2. Monitor GPU memory: `nvidia-smi` or `rocm-smi`
+3. Gradually increase until near memory limit
+4. For maximum performance, offload all layers if possible
+
+### Batch Processing
+
+GPU excels at batch processing. Process multiple requests together when possible.
+
+### Mixed Precision
+
+Use FP16 when supported:
+
+```yaml
+f16: true
+```
+
+## Model Optimization
+
+### Quantization
+
+Choose appropriate quantization:
+
+| Quantization | Speed | Quality | Memory | Use Case |
+|-------------|-------|---------|--------|----------|
+| Q8_0 | Slowest | Highest | Most | Maximum quality |
+| Q6_K | Slow | Very High | High | High quality |
+| Q4_K_M | Medium | High | Medium | **Recommended** |
+| Q4_K_S | Fast | Medium | Low | Balanced |
+| Q2_K | Fastest | Lower | Least | Speed priority |
+
+### Context Size
+
+Reduce context size for faster inference:
+
+```yaml
+context_size: 2048  # Instead of 4096 or 8192
+```
+
+**Trade-off**: Smaller context = faster but less conversation history
+
+### Model Selection
+
+Choose models appropriate for your hardware:
+
+- **Small systems (4GB RAM)**: 1-3B parameter models
+- **Medium systems (8-16GB RAM)**: 3-7B parameter models
+- **Large systems (32GB+ RAM)**: 7B+ parameter models
+
+## Configuration Optimizations
+
+### Sampling Parameters
+
+Optimize sampling for speed:
+
+```yaml
+parameters:
+  temperature: 0.7
+  top_p: 0.9
+  top_k: 40
+  mirostat: 0  # Disable for speed (enabled by default)
+```
+
+**Note**: Disabling mirostat improves speed but may reduce quality.
+
+### Prompt Caching
+
+Enable prompt caching for repeated queries:
+
+```yaml
+prompt_cache_path: "cache"
+prompt_cache_all: true
+```
+
+### Parallel Requests
+
+LocalAI supports parallel requests. Configure appropriately:
+
+```yaml
+# In model config
+parallel_requests: 4  # Adjust based on hardware
+```
+
+## Storage Optimization
+
+### Use SSD
+
+Always use SSD for model storage:
+- HDD: Very slow model loading
+- SSD: Fast loading, better performance
+
+### Disable MMAP on HDD
+
+If stuck with HDD:
+
+```yaml
+mmap: false  # Loads entire model into RAM
+```
+
+### Model Location
+
+Store models on fastest storage:
+- Local SSD: Best performance
+- Network storage: Slower, but allows sharing
+- External drive: Slowest
+
+## System-Level Optimizations
+
+### Process Priority
+
+Increase process priority (Linux):
+
+```bash
+nice -n -10 local-ai
+```
+
+### CPU Governor
+
+Set CPU to performance mode (Linux):
+
+```bash
+# Check current governor
+cat /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor
+
+# Set to performance
+echo performance | sudo tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor
+```
+
+### Disable Swapping
+
+Prevent swapping for better performance:
+
+```bash
+# Linux
+sudo swapoff -a
+
+# Or set swappiness to 0
+echo 0 | sudo tee /proc/sys/vm/swappiness
+```
+
+### Memory Allocation
+
+For large models, consider huge pages (Linux):
+
+```bash
+# Allocate huge pages
+echo 1024 | sudo tee /proc/sys/vm/nr_hugepages
+```
+
+## Benchmarking
+
+### Measure Performance
+
+Create a benchmark script:
+
+```python
+import time
+import requests
+
+start = time.time()
+response = requests.post(
+    "http://localhost:8080/v1/chat/completions",
+    json={
+        "model": "gpt-4",
+        "messages": [{"role": "user", "content": "Hello"}]
+    }
+)
+elapsed = time.time() - start
+
+tokens = response.json()["usage"]["completion_tokens"]
+tokens_per_second = tokens / elapsed
+
+print(f"Time: {elapsed:.2f}s")
+print(f"Tokens: {tokens}")
+print(f"Speed: {tokens_per_second:.2f} tok/s")
+```
+
+### Compare Configurations
+
+Test different configurations:
+1. Baseline: Default settings
+2. Optimized: Your optimizations
+3. Measure: Tokens/second, latency, memory
+
+### Load Testing
+
+Test under load:
+
+```bash
+# Use Apache Bench or similar
+ab -n 100 -c 10 -p request.json -T application/json \
+  http://localhost:8080/v1/chat/completions
+```
+
+## Platform-Specific Tips
+
+### Apple Silicon
+
+- Metal acceleration is automatic
+- Use native builds (not Docker) for best performance
+- M1/M2/M3 have unified memory - optimize accordingly
+
+### NVIDIA GPUs
+
+- Use CUDA 12 for latest optimizations
+- Enable Tensor Cores with appropriate precision
+- Monitor with `nvidia-smi` for bottlenecks
+
+### AMD GPUs
+
+- Use ROCm/HIPBLAS backend
+- Check ROCm compatibility
+- Monitor with `rocm-smi`
+
+### Intel GPUs
+
+- Use oneAPI/SYCL backend
+- Check Intel GPU compatibility
+- Optimize for F16/F32 precision
+
+## Common Performance Issues
+
+### Slow First Response
+
+**Cause**: Model loading
+**Solution**: Pre-load models or use model warming
+
+### Degrading Performance
+
+**Cause**: Memory fragmentation
+**Solution**: Restart LocalAI periodically
+
+### Inconsistent Speed
+
+**Cause**: System load, thermal throttling
+**Solution**: Monitor system resources, ensure cooling
+
+## Performance Checklist
+
+- [ ] Threads match CPU cores
+- [ ] GPU layers optimized
+- [ ] Appropriate quantization selected
+- [ ] Context size optimized
+- [ ] Models on SSD
+- [ ] MMAP enabled (if using SSD)
+- [ ] Mirostat disabled (if speed priority)
+- [ ] System resources monitored
+- [ ] Baseline metrics established
+- [ ] Optimizations tested and verified
+
+## See Also
+
+- [GPU Acceleration]({{% relref "docs/features/gpu-acceleration" %}}) - GPU setup
+- [VRAM Management]({{% relref "docs/advanced/vram-management" %}}) - GPU memory
+- [Model Configuration]({{% relref "docs/advanced/model-configuration" %}}) - Configuration options
+- [Troubleshooting]({{% relref "docs/troubleshooting" %}}) - Performance issues
+
--- a/docs/content/docs/advanced/vram-management.md
+++ b/docs/content/docs/advanced/vram-management.md
@@ -23,8 +23,10 @@ The simplest approach is to ensure only one model is loaded at a time. When a ne
 ### Configuration

 ```bash
+# Via command line
 ./local-ai --single-active-backend

+# Via environment variable
 LOCALAI_SINGLE_ACTIVE_BACKEND=true ./local-ai
 ```

@@ -37,10 +39,13 @@ LOCALAI_SINGLE_ACTIVE_BACKEND=true ./local-ai
 ### Example

 ```bash
+# Start LocalAI with single active backend
 LOCALAI_SINGLE_ACTIVE_BACKEND=true ./local-ai

+# First request loads model A
 curl http://localhost:8080/v1/chat/completions -d '{"model": "model-a", ...}'

+# Second request automatically unloads model A and loads model B
 curl http://localhost:8080/v1/chat/completions -d '{"model": "model-b", ...}'
 ```

@@ -48,42 +53,40 @@ curl http://localhost:8080/v1/chat/completions -d '{"model": "model-b", ...}'

 For more flexible memory management, LocalAI provides watchdog mechanisms that automatically unload models based on their activity state. This allows multiple models to be loaded simultaneously, but automatically frees memory when models become inactive or stuck.

-> **Note:** Watchdog settings can be configured via the [Runtime Settings]({{%relref "features/runtime-settings#watchdog-settings" %}}) web interface, which allows you to adjust settings without restarting the application.
-
 ### Idle Watchdog

 The idle watchdog monitors models that haven't been used for a specified period and automatically unloads them to free VRAM.

 #### Configuration

-Via environment variables or CLI:
 ```bash
+# Enable idle watchdog with default timeout (15 minutes)
 LOCALAI_WATCHDOG_IDLE=true ./local-ai

+# Customize the idle timeout (e.g., 10 minutes)
 LOCALAI_WATCHDOG_IDLE=true LOCALAI_WATCHDOG_IDLE_TIMEOUT=10m ./local-ai

+# Via command line
 ./local-ai --enable-watchdog-idle --watchdog-idle-timeout=10m
 ```

-Via web UI: Navigate to Settings → Watchdog Settings and enable "Watchdog Idle Enabled" with your desired timeout.
-
 ### Busy Watchdog

 The busy watchdog monitors models that have been processing requests for an unusually long time and terminates them if they exceed a threshold. This is useful for detecting and recovering from stuck or hung backends.

 #### Configuration

-Via environment variables or CLI:
 ```bash
+# Enable busy watchdog with default timeout (5 minutes)
 LOCALAI_WATCHDOG_BUSY=true ./local-ai

+# Customize the busy timeout (e.g., 10 minutes)
 LOCALAI_WATCHDOG_BUSY=true LOCALAI_WATCHDOG_BUSY_TIMEOUT=10m ./local-ai

+# Via command line
 ./local-ai --enable-watchdog-busy --watchdog-busy-timeout=10m
 ```

-Via web UI: Navigate to Settings → Watchdog Settings and enable "Watchdog Busy Enabled" with your desired timeout.
-
 ### Combined Configuration

 You can enable both watchdogs simultaneously for comprehensive memory management:
@@ -114,15 +117,19 @@ Or using command line flags:
 ### Example

 ```bash
+# Start LocalAI with both watchdogs enabled
 LOCALAI_WATCHDOG_IDLE=true \
 LOCALAI_WATCHDOG_IDLE_TIMEOUT=10m \
 LOCALAI_WATCHDOG_BUSY=true \
 LOCALAI_WATCHDOG_BUSY_TIMEOUT=5m \
 ./local-ai

+# Load multiple models
 curl http://localhost:8080/v1/chat/completions -d '{"model": "model-a", ...}'
 curl http://localhost:8080/v1/chat/completions -d '{"model": "model-b", ...}'

+# After 10 minutes of inactivity, model-a will be automatically unloaded
+# If a model gets stuck processing for more than 5 minutes, it will be terminated
 ```

 ### Timeout Format
@@ -147,6 +154,7 @@ LocalAI cannot reliably estimate VRAM usage of new models to load across differe
 If automatic management doesn't meet your needs, you can manually stop models using the LocalAI management API:

 ```bash
+# Stop a specific model
 curl -X POST http://localhost:8080/backend/shutdown \
  -H "Content-Type: application/json" \
  -d '{"model": "model-name"}'
@@ -164,7 +172,7 @@ To stop all models, you'll need to call the endpoint for each loaded model indiv

 ## Related Documentation

- See [Advanced Usage]({{%relref "advanced/advanced-usage" %}}) for other configuration options
- See [GPU Acceleration]({{%relref "features/GPU-acceleration" %}}) for GPU setup and configuration
- See [Backend Flags]({{%relref "advanced/advanced-usage#backend-flags" %}}) for all available backend configuration options
+- See [Advanced Usage]({{%relref "docs/advanced/advanced-usage" %}}) for other configuration options
+- See [GPU Acceleration]({{%relref "docs/features/GPU-acceleration" %}}) for GPU setup and configuration
+- See [Backend Flags]({{%relref "docs/advanced/advanced-usage#backend-flags" %}}) for all available backend configuration options

--- a/docs/content/docs/faq.md
+++ b/docs/content/docs/faq.md
@@ -0,0 +1,215 @@
+
+++
+disableToc = false
+title = "FAQ"
+weight = 24
+icon = "quiz"
+url = "/faq/"
+++
+
+## Frequently asked questions
+
+Here are answers to some of the most common questions.
+
+
+### How do I get models? 
+
+There are several ways to get models for LocalAI:
+
+1. **WebUI Import** (Easiest): Use the WebUI's model import interface:
+   - Open `http://localhost:8080` and navigate to the Models tab
+   - Click "Import Model" or "New Model"
+   - Enter a model URI (Hugging Face, OCI, file path, etc.)
+   - Configure preferences in Simple Mode or edit YAML in Advanced Mode
+   - The WebUI provides syntax highlighting, validation, and a user-friendly interface
+
+2. **Model Gallery** (Recommended): Use the built-in model gallery accessible via:
+   - WebUI: Navigate to the Models tab in the LocalAI interface and browse available models
+   - CLI: `local-ai models list` to see available models, then `local-ai models install <model-name>`
+   - Online: Browse models at [models.localai.io](https://models.localai.io)
+
+3. **Hugging Face**: Most GGUF-based models from Hugging Face work with LocalAI. You can install them via:
+   - WebUI: Import using `huggingface://TheBloke/phi-2-GGUF/phi-2.Q8_0.gguf`
+   - CLI: `local-ai run huggingface://TheBloke/phi-2-GGUF/phi-2.Q8_0.gguf`
+
+4. **Manual Installation**: Download model files and place them in your models directory. See [Install and Run Models]({{% relref "docs/getting-started/models" %}}) for details.
+
+5. **OCI Registries**: Install models from OCI-compatible registries:
+   - WebUI: Import using `ollama://gemma:2b` or `oci://localai/phi-2:latest`
+   - CLI: `local-ai run ollama://gemma:2b` or `local-ai run oci://localai/phi-2:latest`
+
+**Security Note**: Be cautious when downloading models from the internet. Always verify the source and use trusted repositories when possible.
+
+### Where are models stored?
+
+LocalAI stores downloaded models in the following locations by default:
+
+- **Command line**: `./models` (relative to current working directory)
+- **Docker**: `/models` (inside the container, typically mounted to `./models` on host)
+- **Launcher application**: `~/.localai/models` (in your home directory)
+
+You can customize the model storage location using the `LOCALAI_MODELS_PATH` environment variable or `--models-path` command line flag. This is useful if you want to store models outside your home directory for backup purposes or to avoid filling up your home directory with large model files.
+
+### How much storage space do models require?
+
+Model sizes vary significantly depending on the model and quantization level:
+
+- **Small models (1-3B parameters)**: 1-3 GB
+- **Medium models (7-13B parameters)**: 4-8 GB  
+- **Large models (30B+ parameters)**: 15-30+ GB
+
+**Quantization levels** (smaller files, slightly reduced quality):
+- `Q4_K_M`: ~75% of original size
+- `Q4_K_S`: ~60% of original size
+- `Q2_K`: ~50% of original size
+
+**Storage recommendations**:
+- Ensure you have at least 2-3x the model size available for downloads and temporary files
+- Use SSD storage for better performance
+- Consider the model size relative to your system RAM - models larger than your RAM may not run efficiently
+
+### Benchmarking LocalAI and llama.cpp shows different results!
+
+LocalAI applies a set of defaults when loading models with the llama.cpp backend, one of these is mirostat sampling - while it achieves better results, it slows down the inference. You can disable this by setting `mirostat: 0` in the model config file. See also the advanced section ({{%relref "docs/advanced/advanced-usage" %}}) for more information and [this issue](https://github.com/mudler/LocalAI/issues/2780).
+
+### What's the difference with Serge, or XXX?
+
+LocalAI is a multi-model solution that doesn't focus on a specific model type (e.g., llama.cpp or alpaca.cpp), and it handles all of these internally for faster inference,  easy to set up locally and deploy to Kubernetes.
+
+### Everything is slow, how is it possible?
+
+There are few situation why this could occur. Some tips are:
+- Don't use HDD to store your models. Prefer SSD over HDD. In case you are stuck with HDD, disable `mmap` in the model config file so it loads everything in memory.
+- Watch out CPU overbooking. Ideally the `--threads` should match the number of physical cores. For instance if your CPU has 4 cores, you would ideally allocate `<= 4` threads to a model.
+- Run LocalAI with `DEBUG=true`. This gives more information, including stats on the token inference speed.
+- Check that you are actually getting an output: run a simple curl request with `"stream": true` to see how fast the model is responding. 
+
+### Can I use it with a Discord bot, or XXX?
+
+Yes! If the client uses OpenAI and supports setting a different base URL to send requests to, you can use the LocalAI endpoint. This allows to use this with every application that was supposed to work with OpenAI, but without changing the application!
+
+### Can this leverage GPUs? 
+
+There is GPU support, see {{%relref "docs/features/GPU-acceleration" %}}.
+
+### Where is the webUI? 
+
+LocalAI includes a built-in WebUI that is automatically available when you start LocalAI. Simply navigate to `http://localhost:8080` in your web browser after starting LocalAI.
+
+The WebUI provides:
+- Chat interface for interacting with models
+- Model gallery browser and installer
+- Backend management
+- Configuration tools
+
+If you prefer a different interface, LocalAI is compatible with any OpenAI-compatible UI. You can find examples in the [LocalAI-examples repository](https://github.com/mudler/LocalAI-examples), including integrations with popular UIs like chatbot-ui.
+
+### Does it work with AutoGPT? 
+
+Yes, see the [examples](https://github.com/mudler/LocalAI-examples)!
+
+### How can I troubleshoot when something is wrong?
+
+Enable the debug mode by setting `DEBUG=true` in the environment variables. This will give you more information on what's going on.
+You can also specify `--debug` in the command line.
+
+### I'm getting 'invalid pitch' error when running with CUDA, what's wrong?
+
+This typically happens when your prompt exceeds the context size. Try to reduce the prompt size, or increase the context size.
+
+### I'm getting a 'SIGILL' error, what's wrong?
+
+Your CPU probably does not have support for certain instructions that are compiled by default in the pre-built binaries. If you are running in a container, try setting `REBUILD=true` and disable the CPU instructions that are not compatible with your CPU. For instance: `CMAKE_ARGS="-DGGML_F16C=OFF -DGGML_AVX512=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF" make build`
+
+Alternatively, you can use the backend management system to install a compatible backend for your CPU architecture. See [Backend Management]({{% relref "docs/features/backends" %}}) for more information.
+
+### How do I install backends?
+
+LocalAI now uses a backend management system where backends are automatically downloaded when needed. You can also manually install backends:
+
+```bash
+# List available backends
+local-ai backends list
+
+# Install a specific backend
+local-ai backends install llama-cpp
+
+# Install a backend for a specific GPU type
+local-ai backends install llama-cpp --gpu-type nvidia
+```
+
+For more details, see the [Backends documentation]({{% relref "docs/features/backends" %}}).
+
+### How do I set up API keys for security?
+
+You can secure your LocalAI instance by setting API keys using the `API_KEY` environment variable:
+
+```bash
+# Single API key
+API_KEY=your-secret-key local-ai
+
+# Multiple API keys (comma-separated)
+API_KEY=key1,key2,key3 local-ai
+```
+
+When API keys are set, all requests must include the key in the `Authorization` header:
+```bash
+curl http://localhost:8080/v1/models \
+  -H "Authorization: Bearer your-secret-key"
+```
+
+**Important**: API keys provide full access to all LocalAI features (admin-level access). Make sure to protect your API keys and use HTTPS when exposing LocalAI remotely.
+
+### My model is not loading or showing errors
+
+Here are common issues and solutions:
+
+1. **Backend not installed**: The required backend may not be installed. Check with `local-ai backends list` and install if needed.
+2. **Insufficient memory**: Large models require significant RAM. Check available memory and consider using a smaller quantized model.
+3. **Wrong backend specified**: Ensure the backend in your model configuration matches the model type. See the [Compatibility Table]({{% relref "docs/reference/compatibility-table" %}}).
+4. **Model file corruption**: Re-download the model file.
+5. **Check logs**: Enable debug mode (`DEBUG=true`) to see detailed error messages.
+
+For more troubleshooting help, see the [Troubleshooting Guide]({{% relref "docs/troubleshooting" %}}).
+
+### How do I use GPU acceleration?
+
+LocalAI supports multiple GPU types:
+
+- **NVIDIA (CUDA)**: Use `--gpus all` with Docker and CUDA-enabled images
+- **AMD (ROCm)**: Use images with `hipblas` tag
+- **Intel**: Use images with `intel` tag or Intel oneAPI
+- **Apple Silicon (Metal)**: Automatically detected on macOS
+
+For detailed setup instructions, see [GPU Acceleration]({{% relref "docs/features/gpu-acceleration" %}}).
+
+### Can I use LocalAI with LangChain, AutoGPT, or other frameworks?
+
+Yes! LocalAI is compatible with any framework that supports OpenAI's API. Simply point the framework to your LocalAI endpoint:
+
+```python
+# Example with LangChain
+from langchain.llms import OpenAI
+
+llm = OpenAI(
+    openai_api_key="not-needed",
+    openai_api_base="http://localhost:8080/v1"
+)
+```
+
+See the [Integrations]({{% relref "docs/integrations" %}}) page for a list of compatible projects and examples.
+
+### What's the difference between AIO images and standard images?
+
+**AIO (All-in-One) images** come pre-configured with:
+- Pre-installed models ready to use
+- All necessary backends included
+- Quick start with no configuration needed
+
+**Standard images** are:
+- Smaller in size
+- No pre-installed models
+- You install models and backends as needed
+- More flexible for custom setups
+
+Choose AIO images for quick testing and standard images for production deployments. See [Container Images]({{% relref "docs/getting-started/container-images" %}}) for details.
--- a/docs/content/docs/features/GPU-acceleration.md
+++ b/docs/content/docs/features/GPU-acceleration.md
@@ -5,15 +5,15 @@ weight = 9
 url = "/features/gpu-acceleration/"
 +++

-{{% notice context="warning" %}}
+{{% alert context="warning" %}}
 Section under construction
- {{% /notice %}}
+{{% /alert %}}

 This section contains instruction on how to use LocalAI with GPU acceleration.

-{{% notice icon="⚡" context="warning" %}}
-For acceleration for AMD or Metal HW is still in development, for additional details see the [build]({{%relref "installation/build#Acceleration" %}})
- {{% /notice %}}
+{{% alert icon="⚡" context="warning" %}}
+For acceleration for AMD or Metal HW is still in development, for additional details see the [build]({{%relref "docs/getting-started/build#Acceleration" %}})
+{{% /alert %}}

 ## Automatic Backend Detection

@@ -32,6 +32,7 @@ Depending on the model architecture and backend used, there might be different w

 ```yaml
 name: my-model-name
+# Default model parameters
 parameters:
  # Relative to the models path
  model: llama.cpp-model.ggmlv3.q5_K_M.bin
@@ -123,7 +124,7 @@ llama_init_from_file: kv self size  =  512.00 MB

 There are a limited number of tested configurations for ROCm systems however most newer deditated GPU consumer grade devices seem to be supported under the current ROCm6 implementation.

-Due to the nature of ROCm it is best to run all implementations in containers as this limits the number of packages required for installation on host system, compatibility and package versions for dependencies across all variations of OS must be tested independently if desired, please refer to the [build]({{%relref "installation/build#Acceleration" %}}) documentation.
+Due to the nature of ROCm it is best to run all implementations in containers as this limits the number of packages required for installation on host system, compatibility and package versions for dependencies across all variations of OS must be tested independently if desired, please refer to the [build]({{%relref "docs/getting-started/build#Acceleration" %}}) documentation.

 ### Requirements

@@ -180,6 +181,7 @@ The devices in the following list have been tested with `hipblas` images running
 The following are examples of the ROCm specific configuration elements required.

 ```yaml
+# docker-compose.yaml
    # For full functionality select a non-'core' image, version locking the image is recommended for debug purposes.
    image: quay.io/go-skynet/local-ai:master-aio-gpu-hipblas
    environment:
--- a/Show More
+++ b/Show More